c433cac9af98655aedafe217fc304242db052db1
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 static int      ixgbe_setup_msix(struct adapter *);
125 static void     ixgbe_free_pci_resources(struct adapter *);
126 static void     ixgbe_local_timer(void *);
127 static int      ixgbe_setup_interface(device_t, struct adapter *);
128 static void     ixgbe_config_link(struct adapter *);
129
130 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
131 static int      ixgbe_setup_transmit_structures(struct adapter *);
132 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
133 static void     ixgbe_initialize_transmit_units(struct adapter *);
134 static void     ixgbe_free_transmit_structures(struct adapter *);
135 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
136
137 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
138 static int      ixgbe_setup_receive_structures(struct adapter *);
139 static int      ixgbe_setup_receive_ring(struct rx_ring *);
140 static void     ixgbe_initialize_receive_units(struct adapter *);
141 static void     ixgbe_free_receive_structures(struct adapter *);
142 static void     ixgbe_free_receive_buffers(struct rx_ring *);
143 #if 0   /* NET_LRO */
144 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
145 #endif
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static bool     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *, int);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
165                     const char *, int *, int);
166 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
167 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
168 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
169 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
170 static void     ixgbe_configure_ivars(struct adapter *);
171 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
172
173 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
174 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
175 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
176
177 static void     ixgbe_add_hw_stats(struct adapter *adapter);
178
179 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
180 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
181                     struct mbuf *, u32);
182
183 /* Support for pluggable optic modules */
184 static bool     ixgbe_sfp_probe(struct adapter *);
185 static void     ixgbe_setup_optics(struct adapter *);
186
187 /* Legacy (single vector interrupt handler */
188 static void     ixgbe_legacy_irq(void *);
189
190 /* The MSI/X Interrupt handlers */
191 static void     ixgbe_msix_que(void *);
192 static void     ixgbe_msix_link(void *);
193
194 /* Deferred interrupt tasklets */
195 static void     ixgbe_handle_que(void *, int);
196 static void     ixgbe_handle_link(void *, int);
197 static void     ixgbe_handle_msf(void *, int);
198 static void     ixgbe_handle_mod(void *, int);
199
200 #ifdef IXGBE_FDIR
201 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
202 static void     ixgbe_reinit_fdir(void *, int);
203 #endif
204
205 /*********************************************************************
206  *  FreeBSD Device Interface Entry Points
207  *********************************************************************/
208
209 static device_method_t ixgbe_methods[] = {
210         /* Device interface */
211         DEVMETHOD(device_probe, ixgbe_probe),
212         DEVMETHOD(device_attach, ixgbe_attach),
213         DEVMETHOD(device_detach, ixgbe_detach),
214         DEVMETHOD(device_shutdown, ixgbe_shutdown),
215         {0, 0}
216 };
217
218 static driver_t ixgbe_driver = {
219         "ix", ixgbe_methods, sizeof(struct adapter),
220 };
221
222 devclass_t ixgbe_devclass;
223 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
224
225 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
226 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
227
228 /*
229 ** TUNEABLE PARAMETERS:
230 */
231
232 /*
233 ** AIM: Adaptive Interrupt Moderation
234 ** which means that the interrupt rate
235 ** is varied over time based on the
236 ** traffic for that interrupt vector
237 */
238 static int ixgbe_enable_aim = TRUE;
239 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
240
241 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
242 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
243
244 /* How many packets rxeof tries to clean at a time */
245 static int ixgbe_rx_process_limit = 128;
246 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
247
248 /*
249 ** Smart speed setting, default to on
250 ** this only works as a compile option
251 ** right now as its during attach, set
252 ** this to 'ixgbe_smart_speed_off' to
253 ** disable.
254 */
255 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
256
257 static int ixgbe_msi_enable = 1;
258 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
259
260 /*
261  * MSIX should be the default for best performance,
262  * but this allows it to be forced off for testing.
263  */
264 static int ixgbe_enable_msix = 1;
265 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
266
267 /*
268  * Header split: this causes the hardware to DMA
269  * the header into a separate mbuf from the payload,
270  * it can be a performance win in some workloads, but
271  * in others it actually hurts, its off by default. 
272  */
273 static int ixgbe_header_split = FALSE;
274 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
275
276 /*
277  * Number of Queues, can be set to 0,
278  * it then autoconfigures based on the
279  * number of cpus with a max of 8. This
280  * can be overriden manually here.
281  */
282 static int ixgbe_num_queues = 0;
283 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
284
285 /*
286 ** Number of TX descriptors per ring,
287 ** setting higher than RX as this seems
288 ** the better performing choice.
289 */
290 static int ixgbe_txd = PERFORM_TXD;
291 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
292
293 /* Number of RX descriptors per ring */
294 static int ixgbe_rxd = PERFORM_RXD;
295 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
296
297 /* Keep running tab on them for sanity check */
298 static int ixgbe_total_ports;
299
300 #ifdef IXGBE_FDIR
301 /*
302 ** For Flow Director: this is the
303 ** number of TX packets we sample
304 ** for the filter pool, this means
305 ** every 20th packet will be probed.
306 **
307 ** This feature can be disabled by 
308 ** setting this to 0.
309 */
310 static int atr_sample_rate = 20;
311 /* 
312 ** Flow Director actually 'steals'
313 ** part of the packet buffer as its
314 ** filter pool, this variable controls
315 ** how much it uses:
316 **  0 = 64K, 1 = 128K, 2 = 256K
317 */
318 static int fdir_pballoc = 1;
319 #endif
320
321 #ifdef DEV_NETMAP
322 /*
323  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
324  * be a reference on how to implement netmap support in a driver.
325  * Additional comments are in ixgbe_netmap.h .
326  *
327  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
328  * that extend the standard driver.
329  */
330 #include <dev/netmap/ixgbe_netmap.h>
331 #endif /* DEV_NETMAP */
332
333 /*********************************************************************
334  *  Device identification routine
335  *
336  *  ixgbe_probe determines if the driver should be loaded on
337  *  adapter based on PCI vendor/device id of the adapter.
338  *
339  *  return BUS_PROBE_DEFAULT on success, positive on failure
340  *********************************************************************/
341
342 static int
343 ixgbe_probe(device_t dev)
344 {
345         ixgbe_vendor_info_t *ent;
346
347         u16     pci_vendor_id = 0;
348         u16     pci_device_id = 0;
349         u16     pci_subvendor_id = 0;
350         u16     pci_subdevice_id = 0;
351         char    adapter_name[256];
352
353         INIT_DEBUGOUT("ixgbe_probe: begin");
354
355         pci_vendor_id = pci_get_vendor(dev);
356         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
357                 return (ENXIO);
358
359         pci_device_id = pci_get_device(dev);
360         pci_subvendor_id = pci_get_subvendor(dev);
361         pci_subdevice_id = pci_get_subdevice(dev);
362
363         ent = ixgbe_vendor_info_array;
364         while (ent->vendor_id != 0) {
365                 if ((pci_vendor_id == ent->vendor_id) &&
366                     (pci_device_id == ent->device_id) &&
367
368                     ((pci_subvendor_id == ent->subvendor_id) ||
369                      (ent->subvendor_id == 0)) &&
370
371                     ((pci_subdevice_id == ent->subdevice_id) ||
372                      (ent->subdevice_id == 0))) {
373                         ksprintf(adapter_name, "%s, Version - %s",
374                                 ixgbe_strings[ent->index],
375                                 ixgbe_driver_version);
376                         device_set_desc_copy(dev, adapter_name);
377                         ++ixgbe_total_ports;
378                         return (BUS_PROBE_DEFAULT);
379                 }
380                 ent++;
381         }
382         return (ENXIO);
383 }
384
385 /*********************************************************************
386  *  Device initialization routine
387  *
388  *  The attach entry point is called when the driver is being loaded.
389  *  This routine identifies the type of hardware, allocates all resources
390  *  and initializes the hardware.
391  *
392  *  return 0 on success, positive on failure
393  *********************************************************************/
394
395 static int
396 ixgbe_attach(device_t dev)
397 {
398         struct adapter *adapter;
399         struct ixgbe_hw *hw;
400         int             error = 0;
401         u16             csum;
402         u32             ctrl_ext;
403
404         INIT_DEBUGOUT("ixgbe_attach: begin");
405
406         if (resource_disabled("ixgbe", device_get_unit(dev))) {
407                 device_printf(dev, "Disabled by device hint\n");
408                 return (ENXIO);
409         }
410
411         /* Allocate, clear, and link in our adapter structure */
412         adapter = device_get_softc(dev);
413         adapter->dev = adapter->osdep.dev = dev;
414         hw = &adapter->hw;
415
416         /* Core Lock Init*/
417         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
418
419         /* SYSCTL APIs */
420
421         sysctl_ctx_init(&adapter->sysctl_ctx);
422         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
423             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
424             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
425         if (adapter->sysctl_tree == NULL) {
426                 device_printf(adapter->dev, "can't add sysctl node\n");
427                 return (EINVAL);
428         }
429         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
430                         SYSCTL_CHILDREN(adapter->sysctl_tree),
431                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
432                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
433
434         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
435                         SYSCTL_CHILDREN(adapter->sysctl_tree),
436                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
437                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
438
439         /*
440         ** Allow a kind of speed control by forcing the autoneg
441         ** advertised speed list to only a certain value, this
442         ** supports 1G on 82599 devices, and 100Mb on x540.
443         */
444         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
445                         SYSCTL_CHILDREN(adapter->sysctl_tree),
446                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
447                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
448
449         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
450                         SYSCTL_CHILDREN(adapter->sysctl_tree),
451                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
452                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
453
454         /* Set up the timer callout */
455         callout_init_mp(&adapter->timer);
456
457         /* Determine hardware revision */
458         ixgbe_identify_hardware(adapter);
459
460         /* Enable bus mastering */
461         pci_enable_busmaster(dev);
462
463         /* Do base PCI setup - map BAR0 */
464         if (ixgbe_allocate_pci_resources(adapter)) {
465                 device_printf(dev, "Allocation of PCI resources failed\n");
466                 error = ENXIO;
467                 goto err_out;
468         }
469
470         /* Do descriptor calc and sanity checks */
471         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
472             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
473                 device_printf(dev, "TXD config issue, using default!\n");
474                 adapter->num_tx_desc = DEFAULT_TXD;
475         } else
476                 adapter->num_tx_desc = ixgbe_txd;
477
478         /*
479         ** With many RX rings it is easy to exceed the
480         ** system mbuf allocation. Tuning nmbclusters
481         ** can alleviate this.
482         */
483         if (nmbclusters > 0 ) {
484                 int s;
485                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
486                 if (s > nmbclusters) {
487                         device_printf(dev, "RX Descriptors exceed "
488                             "system mbuf max, using default instead!\n");
489                         ixgbe_rxd = DEFAULT_RXD;
490                 }
491         }
492
493         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
494             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
495                 device_printf(dev, "RXD config issue, using default!\n");
496                 adapter->num_rx_desc = DEFAULT_RXD;
497         } else
498                 adapter->num_rx_desc = ixgbe_rxd;
499
500         /* Allocate our TX/RX Queues */
501         if (ixgbe_allocate_queues(adapter)) {
502                 error = ENOMEM;
503                 goto err_out;
504         }
505
506         /* Allocate multicast array memory. */
507         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
508             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
509         if (adapter->mta == NULL) {
510                 device_printf(dev, "Can not allocate multicast setup array\n");
511                 error = ENOMEM;
512                 goto err_late;
513         }
514
515         /* Initialize the shared code */
516         error = ixgbe_init_shared_code(hw);
517         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
518                 /*
519                 ** No optics in this port, set up
520                 ** so the timer routine will probe 
521                 ** for later insertion.
522                 */
523                 adapter->sfp_probe = TRUE;
524                 error = 0;
525         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
526                 device_printf(dev,"Unsupported SFP+ module detected!\n");
527                 error = EIO;
528                 goto err_late;
529         } else if (error) {
530                 device_printf(dev,"Unable to initialize the shared code\n");
531                 error = EIO;
532                 goto err_late;
533         }
534
535         /* Make sure we have a good EEPROM before we read from it */
536         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
537                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
538                 error = EIO;
539                 goto err_late;
540         }
541
542         error = ixgbe_init_hw(hw);
543         switch (error) {
544         case IXGBE_ERR_EEPROM_VERSION:
545                 device_printf(dev, "This device is a pre-production adapter/"
546                     "LOM.  Please be aware there may be issues associated "
547                     "with your hardware.\n If you are experiencing problems "
548                     "please contact your Intel or hardware representative "
549                     "who provided you with this hardware.\n");
550                 break;
551         case IXGBE_ERR_SFP_NOT_SUPPORTED:
552                 device_printf(dev,"Unsupported SFP+ Module\n");
553                 error = EIO;
554                 device_printf(dev,"Hardware Initialization Failure\n");
555                 goto err_late;
556         case IXGBE_ERR_SFP_NOT_PRESENT:
557                 device_printf(dev,"No SFP+ Module found\n");
558                 /* falls thru */
559         default:
560                 break;
561         }
562
563         /* Detect and set physical type */
564         ixgbe_setup_optics(adapter);
565
566         if ((adapter->msix > 1) && (ixgbe_enable_msix))
567                 error = ixgbe_allocate_msix(adapter); 
568         else
569                 error = ixgbe_allocate_legacy(adapter); 
570         if (error) 
571                 goto err_late;
572
573         /* Setup OS specific network interface */
574         if (ixgbe_setup_interface(dev, adapter) != 0)
575                 goto err_late;
576
577         /* Sysctl for limiting the amount of work done in the taskqueue */
578         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
579             "max number of rx packets to process", &adapter->rx_process_limit,
580             ixgbe_rx_process_limit);
581
582         /* Initialize statistics */
583         ixgbe_update_stats_counters(adapter);
584
585         /* Register for VLAN events */
586         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
587             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
589             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
590
591         /* Print PCIE bus type/speed/width info */
592         ixgbe_get_bus_info(hw);
593         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
594             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
595             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
596             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
597             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
598             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
599             ("Unknown"));
600
601         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
602             (hw->bus.speed == ixgbe_bus_speed_2500)) {
603                 device_printf(dev, "PCI-Express bandwidth available"
604                     " for this card\n     is not sufficient for"
605                     " optimal performance.\n");
606                 device_printf(dev, "For optimal performance a x8 "
607                     "PCIE, or x4 PCIE 2 slot is required.\n");
608         }
609
610         /* let hardware know driver is loaded */
611         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
612         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
613         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
614
615         ixgbe_add_hw_stats(adapter);
616
617 #ifdef DEV_NETMAP
618         ixgbe_netmap_attach(adapter);
619 #endif /* DEV_NETMAP */
620         INIT_DEBUGOUT("ixgbe_attach: end");
621         return (0);
622 err_late:
623         ixgbe_free_transmit_structures(adapter);
624         ixgbe_free_receive_structures(adapter);
625 err_out:
626         if (adapter->ifp != NULL)
627                 if_free(adapter->ifp);
628         ixgbe_free_pci_resources(adapter);
629         kfree(adapter->mta, M_DEVBUF);
630         return (error);
631
632 }
633
634 /*********************************************************************
635  *  Device removal routine
636  *
637  *  The detach entry point is called when the driver is being removed.
638  *  This routine stops the adapter and deallocates all the resources
639  *  that were allocated for driver operation.
640  *
641  *  return 0 on success, positive on failure
642  *********************************************************************/
643
644 static int
645 ixgbe_detach(device_t dev)
646 {
647         struct adapter *adapter = device_get_softc(dev);
648         struct ix_queue *que = adapter->queues;
649         u32     ctrl_ext;
650
651         INIT_DEBUGOUT("ixgbe_detach: begin");
652
653         /* Make sure VLANS are not using driver */
654         if (adapter->ifp->if_vlantrunks != NULL) {
655                 device_printf(dev,"Vlan in use, detach first\n");
656                 return (EBUSY);
657         }
658
659         IXGBE_CORE_LOCK(adapter);
660         ixgbe_stop(adapter);
661         IXGBE_CORE_UNLOCK(adapter);
662
663         for (int i = 0; i < adapter->num_queues; i++, que++) {
664                 if (que->tq) {
665                         taskqueue_drain(que->tq, &que->que_task);
666                         taskqueue_free(que->tq);
667                 }
668         }
669
670         /* Drain the Link queue */
671         if (adapter->tq) {
672                 taskqueue_drain(adapter->tq, &adapter->link_task);
673                 taskqueue_drain(adapter->tq, &adapter->mod_task);
674                 taskqueue_drain(adapter->tq, &adapter->msf_task);
675 #ifdef IXGBE_FDIR
676                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
677 #endif
678                 taskqueue_free(adapter->tq);
679         }
680
681         /* let hardware know driver is unloading */
682         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
683         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
684         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
685
686         /* Unregister VLAN events */
687         if (adapter->vlan_attach != NULL)
688                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
689         if (adapter->vlan_detach != NULL)
690                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
691
692         ether_ifdetach(adapter->ifp);
693         callout_stop(&adapter->timer);
694 #ifdef DEV_NETMAP
695         netmap_detach(adapter->ifp);
696 #endif /* DEV_NETMAP */
697         ixgbe_free_pci_resources(adapter);
698         bus_generic_detach(dev);
699         if_free(adapter->ifp);
700
701         ixgbe_free_transmit_structures(adapter);
702         ixgbe_free_receive_structures(adapter);
703         kfree(adapter->mta, M_DEVBUF);
704         sysctl_ctx_free(&adapter->sysctl_ctx);
705         
706         IXGBE_CORE_LOCK_DESTROY(adapter);
707         return (0);
708 }
709
710 /*********************************************************************
711  *
712  *  Shutdown entry point
713  *
714  **********************************************************************/
715
716 static int
717 ixgbe_shutdown(device_t dev)
718 {
719         struct adapter *adapter = device_get_softc(dev);
720         IXGBE_CORE_LOCK(adapter);
721         ixgbe_stop(adapter);
722         IXGBE_CORE_UNLOCK(adapter);
723         return (0);
724 }
725
726
727 /*********************************************************************
728  *  Transmit entry point
729  *
730  *  ixgbe_start is called by the stack to initiate a transmit.
731  *  The driver will remain in this routine as long as there are
732  *  packets to transmit and transmit resources are available.
733  *  In case resources are not available stack is notified and
734  *  the packet is requeued.
735  **********************************************************************/
736
737 static void
738 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
739 {
740         struct mbuf    *m_head;
741         struct adapter *adapter = txr->adapter;
742
743         IXGBE_TX_LOCK_ASSERT(txr);
744
745         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
746                 return;
747         if (!adapter->link_active)
748                 return;
749
750         while (!ifq_is_empty(&ifp->if_snd)) {
751                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
752                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
753                         break;
754                 }
755
756                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
757                 if (m_head == NULL)
758                         break;
759
760                 if (ixgbe_xmit(txr, &m_head)) {
761 #if 0 /* XXX: prepend to an ALTQ queue ? */
762                         if (m_head != NULL)
763                                 IF_PREPEND(&ifp->if_snd, m_head);
764 #endif
765                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
767                         break;
768                 }
769                 /* Send a copy of the frame to the BPF listener */
770                 ETHER_BPF_MTAP(ifp, m_head);
771
772                 /* Set watchdog on */
773                 txr->watchdog_time = ticks;
774                 txr->queue_status = IXGBE_QUEUE_WORKING;
775
776         }
777         return;
778 }
779
780 /*
781  * Legacy TX start - called by the stack, this
782  * always uses the first tx ring, and should
783  * not be used with multiqueue tx enabled.
784  */
785 static void
786 ixgbe_start(struct ifnet *ifp)
787 {
788         struct adapter *adapter = ifp->if_softc;
789         struct tx_ring  *txr = adapter->tx_rings;
790
791         if (ifp->if_flags & IFF_RUNNING) {
792                 IXGBE_TX_LOCK(txr);
793                 ixgbe_start_locked(txr, ifp);
794                 IXGBE_TX_UNLOCK(txr);
795         }
796         return;
797 }
798
799 #if 0 /* __FreeBSD_version >= 800000 */
800 /*
801 ** Multiqueue Transmit driver
802 **
803 */
804 static int
805 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
806 {
807         struct adapter  *adapter = ifp->if_softc;
808         struct ix_queue *que;
809         struct tx_ring  *txr;
810         int             i = 0, err = 0;
811
812         /* Which queue to use */
813         if ((m->m_flags & M_FLOWID) != 0)
814                 i = m->m_pkthdr.flowid % adapter->num_queues;
815         else
816                 i = curcpu % adapter->num_queues;
817
818         txr = &adapter->tx_rings[i];
819         que = &adapter->queues[i];
820
821         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
822             IXGBE_TX_TRYLOCK(txr)) {
823                 err = ixgbe_mq_start_locked(ifp, txr, m);
824                 IXGBE_TX_UNLOCK(txr);
825         } else {
826                 err = drbr_enqueue(ifp, txr->br, m);
827                 taskqueue_enqueue(que->tq, &que->que_task);
828         }
829
830         return (err);
831 }
832
833 static int
834 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
835 {
836         struct adapter  *adapter = txr->adapter;
837         struct mbuf     *next;
838         int             enqueued, err = 0;
839
840         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
841             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
842             adapter->link_active == 0) {
843                 if (m != NULL)
844                         err = drbr_enqueue(ifp, txr->br, m);
845                 return (err);
846         }
847
848         enqueued = 0;
849         if (m == NULL) {
850                 next = drbr_dequeue(ifp, txr->br);
851         } else if (drbr_needs_enqueue(ifp, txr->br)) {
852                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853                         return (err);
854                 next = drbr_dequeue(ifp, txr->br);
855         } else
856                 next = m;
857
858         /* Process the queue */
859         while (next != NULL) {
860                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
861                         if (next != NULL)
862                                 err = drbr_enqueue(ifp, txr->br, next);
863                         break;
864                 }
865                 enqueued++;
866                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867                 /* Send a copy of the frame to the BPF listener */
868                 ETHER_BPF_MTAP(ifp, next);
869                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
870                         break;
871                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
872                         ixgbe_txeof(txr);
873                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
874                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
875                         break;
876                 }
877                 next = drbr_dequeue(ifp, txr->br);
878         }
879
880         if (enqueued > 0) {
881                 /* Set watchdog on */
882                 txr->queue_status |= IXGBE_QUEUE_WORKING;
883                 txr->watchdog_time = ticks;
884         }
885
886         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
887                 ixgbe_txeof(txr);
888
889         return (err);
890 }
891
892 /*
893 ** Flush all ring buffers
894 */
895 static void
896 ixgbe_qflush(struct ifnet *ifp)
897 {
898         struct adapter  *adapter = ifp->if_softc;
899         struct tx_ring  *txr = adapter->tx_rings;
900         struct mbuf     *m;
901
902         for (int i = 0; i < adapter->num_queues; i++, txr++) {
903                 IXGBE_TX_LOCK(txr);
904                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
905                         m_freem(m);
906                 IXGBE_TX_UNLOCK(txr);
907         }
908         if_qflush(ifp);
909 }
910 #endif /* __FreeBSD_version >= 800000 */
911
912 /*********************************************************************
913  *  Ioctl entry point
914  *
915  *  ixgbe_ioctl is called when the user wants to configure the
916  *  interface.
917  *
918  *  return 0 on success, positive on failure
919  **********************************************************************/
920
921 static int
922 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
923 {
924         struct adapter  *adapter = ifp->if_softc;
925         struct ifreq    *ifr = (struct ifreq *) data;
926 #if defined(INET) || defined(INET6)
927         struct ifaddr *ifa = (struct ifaddr *)data;
928         bool            avoid_reset = FALSE;
929 #endif
930         int             error = 0;
931
932         switch (command) {
933
934         case SIOCSIFADDR:
935 #ifdef INET
936                 if (ifa->ifa_addr->sa_family == AF_INET)
937                         avoid_reset = TRUE;
938 #endif
939 #ifdef INET6
940                 if (ifa->ifa_addr->sa_family == AF_INET6)
941                         avoid_reset = TRUE;
942 #endif
943 #if defined(INET) || defined(INET6)
944                 /*
945                 ** Calling init results in link renegotiation,
946                 ** so we avoid doing it when possible.
947                 */
948                 if (avoid_reset) {
949                         ifp->if_flags |= IFF_UP;
950                         if (!(ifp->if_flags & IFF_RUNNING))
951                                 ixgbe_init(adapter);
952                         if (!(ifp->if_flags & IFF_NOARP))
953                                 arp_ifinit(ifp, ifa);
954                 } else
955                         error = ether_ioctl(ifp, command, data);
956 #endif
957                 break;
958         case SIOCSIFMTU:
959                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
960                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
961                         error = EINVAL;
962                 } else {
963                         IXGBE_CORE_LOCK(adapter);
964                         ifp->if_mtu = ifr->ifr_mtu;
965                         adapter->max_frame_size =
966                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
967                         ixgbe_init_locked(adapter);
968                         IXGBE_CORE_UNLOCK(adapter);
969                 }
970                 break;
971         case SIOCSIFFLAGS:
972                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
973                 IXGBE_CORE_LOCK(adapter);
974                 if (ifp->if_flags & IFF_UP) {
975                         if ((ifp->if_flags & IFF_RUNNING)) {
976                                 if ((ifp->if_flags ^ adapter->if_flags) &
977                                     (IFF_PROMISC | IFF_ALLMULTI)) {
978                                         ixgbe_set_promisc(adapter);
979                                 }
980                         } else
981                                 ixgbe_init_locked(adapter);
982                 } else
983                         if (ifp->if_flags & IFF_RUNNING)
984                                 ixgbe_stop(adapter);
985                 adapter->if_flags = ifp->if_flags;
986                 IXGBE_CORE_UNLOCK(adapter);
987                 break;
988         case SIOCADDMULTI:
989         case SIOCDELMULTI:
990                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
991                 if (ifp->if_flags & IFF_RUNNING) {
992                         IXGBE_CORE_LOCK(adapter);
993                         ixgbe_disable_intr(adapter);
994                         ixgbe_set_multi(adapter);
995                         ixgbe_enable_intr(adapter);
996                         IXGBE_CORE_UNLOCK(adapter);
997                 }
998                 break;
999         case SIOCSIFMEDIA:
1000         case SIOCGIFMEDIA:
1001                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1002                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1003                 break;
1004         case SIOCSIFCAP:
1005         {
1006                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1007                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1008                 if (mask & IFCAP_HWCSUM)
1009                         ifp->if_capenable ^= IFCAP_HWCSUM;
1010                 if (mask & IFCAP_TSO4)
1011                         ifp->if_capenable ^= IFCAP_TSO4;
1012                 if (mask & IFCAP_TSO6)
1013                         ifp->if_capenable ^= IFCAP_TSO6;
1014 #if 0 /* NET_LRO */
1015                 if (mask & IFCAP_LRO)
1016                         ifp->if_capenable ^= IFCAP_LRO;
1017 #endif
1018                 if (mask & IFCAP_VLAN_HWTAGGING)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1020                 if (mask & IFCAP_VLAN_HWFILTER)
1021                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1022 #if 0 /* NET_TSO */
1023                 if (mask & IFCAP_VLAN_HWTSO)
1024                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1025 #endif
1026                 if (ifp->if_flags & IFF_RUNNING) {
1027                         IXGBE_CORE_LOCK(adapter);
1028                         ixgbe_init_locked(adapter);
1029                         IXGBE_CORE_UNLOCK(adapter);
1030                 }
1031 #if 0
1032                 VLAN_CAPABILITIES(ifp);
1033 #endif
1034                 break;
1035         }
1036
1037         default:
1038                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1039                 error = ether_ioctl(ifp, command, data);
1040                 break;
1041         }
1042
1043         return (error);
1044 }
1045
1046 /*********************************************************************
1047  *  Init entry point
1048  *
1049  *  This routine is used in two ways. It is used by the stack as
1050  *  init entry point in network interface structure. It is also used
1051  *  by the driver as a hw/sw initialization routine to get to a
1052  *  consistent state.
1053  *
1054  *  return 0 on success, positive on failure
1055  **********************************************************************/
1056 #define IXGBE_MHADD_MFS_SHIFT 16
1057
1058 static void
1059 ixgbe_init_locked(struct adapter *adapter)
1060 {
1061         struct ifnet   *ifp = adapter->ifp;
1062         device_t        dev = adapter->dev;
1063         struct ixgbe_hw *hw = &adapter->hw;
1064         u32             k, txdctl, mhadd, gpie;
1065         u32             rxdctl, rxctrl;
1066
1067         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1068         INIT_DEBUGOUT("ixgbe_init: begin");
1069         hw->adapter_stopped = FALSE;
1070         ixgbe_stop_adapter(hw);
1071         callout_stop(&adapter->timer);
1072
1073         /* reprogram the RAR[0] in case user changed it. */
1074         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1075
1076         /* Get the latest mac address, User can use a LAA */
1077         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1078               IXGBE_ETH_LENGTH_OF_ADDRESS);
1079         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1080         hw->addr_ctrl.rar_used_count = 1;
1081
1082         /* Set the various hardware offload abilities */
1083         ifp->if_hwassist = 0;
1084         if (ifp->if_capenable & IFCAP_TSO)
1085                 ifp->if_hwassist |= CSUM_TSO;
1086         if (ifp->if_capenable & IFCAP_TXCSUM) {
1087                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1088 #if 0
1089                 if (hw->mac.type != ixgbe_mac_82598EB)
1090                         ifp->if_hwassist |= CSUM_SCTP;
1091 #endif
1092         }
1093
1094         /* Prepare transmit descriptors and buffers */
1095         if (ixgbe_setup_transmit_structures(adapter)) {
1096                 device_printf(dev,"Could not setup transmit structures\n");
1097                 ixgbe_stop(adapter);
1098                 return;
1099         }
1100
1101         ixgbe_init_hw(hw);
1102         ixgbe_initialize_transmit_units(adapter);
1103
1104         /* Setup Multicast table */
1105         ixgbe_set_multi(adapter);
1106
1107         /*
1108         ** Determine the correct mbuf pool
1109         ** for doing jumbo/headersplit
1110         */
1111         if (adapter->max_frame_size <= 2048)
1112                 adapter->rx_mbuf_sz = MCLBYTES;
1113         else if (adapter->max_frame_size <= 4096)
1114                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1115         else if (adapter->max_frame_size <= 9216)
1116                 adapter->rx_mbuf_sz = MJUM9BYTES;
1117         else
1118                 adapter->rx_mbuf_sz = MJUM16BYTES;
1119
1120         /* Prepare receive descriptors and buffers */
1121         if (ixgbe_setup_receive_structures(adapter)) {
1122                 device_printf(dev,"Could not setup receive structures\n");
1123                 ixgbe_stop(adapter);
1124                 return;
1125         }
1126
1127         /* Configure RX settings */
1128         ixgbe_initialize_receive_units(adapter);
1129
1130         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1131
1132         /* Enable Fan Failure Interrupt */
1133         gpie |= IXGBE_SDP1_GPIEN;
1134
1135         /* Add for Module detection */
1136         if (hw->mac.type == ixgbe_mac_82599EB)
1137                 gpie |= IXGBE_SDP2_GPIEN;
1138
1139         /* Thermal Failure Detection */
1140         if (hw->mac.type == ixgbe_mac_X540)
1141                 gpie |= IXGBE_SDP0_GPIEN;
1142
1143         if (adapter->msix > 1) {
1144                 /* Enable Enhanced MSIX mode */
1145                 gpie |= IXGBE_GPIE_MSIX_MODE;
1146                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1147                     IXGBE_GPIE_OCD;
1148         }
1149         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1150
1151         /* Set MTU size */
1152         if (ifp->if_mtu > ETHERMTU) {
1153                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1154                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1155                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1156                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1157         }
1158         
1159         /* Now enable all the queues */
1160
1161         for (int i = 0; i < adapter->num_queues; i++) {
1162                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1163                 txdctl |= IXGBE_TXDCTL_ENABLE;
1164                 /* Set WTHRESH to 8, burst writeback */
1165                 txdctl |= (8 << 16);
1166                 /*
1167                  * When the internal queue falls below PTHRESH (32),
1168                  * start prefetching as long as there are at least
1169                  * HTHRESH (1) buffers ready. The values are taken
1170                  * from the Intel linux driver 3.8.21.
1171                  * Prefetching enables tx line rate even with 1 queue.
1172                  */
1173                 txdctl |= (32 << 0) | (1 << 8);
1174                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1175         }
1176
1177         for (int i = 0; i < adapter->num_queues; i++) {
1178                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1179                 if (hw->mac.type == ixgbe_mac_82598EB) {
1180                         /*
1181                         ** PTHRESH = 21
1182                         ** HTHRESH = 4
1183                         ** WTHRESH = 8
1184                         */
1185                         rxdctl &= ~0x3FFFFF;
1186                         rxdctl |= 0x080420;
1187                 }
1188                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1189                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1190                 for (k = 0; k < 10; k++) {
1191                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1192                             IXGBE_RXDCTL_ENABLE)
1193                                 break;
1194                         else
1195                                 msec_delay(1);
1196                 }
1197                 wmb();
1198 #ifdef DEV_NETMAP
1199                 /*
1200                  * In netmap mode, we must preserve the buffers made
1201                  * available to userspace before the if_init()
1202                  * (this is true by default on the TX side, because
1203                  * init makes all buffers available to userspace).
1204                  *
1205                  * netmap_reset() and the device specific routines
1206                  * (e.g. ixgbe_setup_receive_rings()) map these
1207                  * buffers at the end of the NIC ring, so here we
1208                  * must set the RDT (tail) register to make sure
1209                  * they are not overwritten.
1210                  *
1211                  * In this driver the NIC ring starts at RDH = 0,
1212                  * RDT points to the last slot available for reception (?),
1213                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1214                  */
1215                 if (ifp->if_capenable & IFCAP_NETMAP) {
1216                         struct netmap_adapter *na = NA(adapter->ifp);
1217                         struct netmap_kring *kring = &na->rx_rings[i];
1218                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1219
1220                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1221                 } else
1222 #endif /* DEV_NETMAP */
1223                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1224         }
1225
1226         /* Set up VLAN support and filter */
1227         ixgbe_setup_vlan_hw_support(adapter);
1228
1229         /* Enable Receive engine */
1230         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1231         if (hw->mac.type == ixgbe_mac_82598EB)
1232                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1233         rxctrl |= IXGBE_RXCTRL_RXEN;
1234         ixgbe_enable_rx_dma(hw, rxctrl);
1235
1236         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1237
1238         /* Set up MSI/X routing */
1239         if (ixgbe_enable_msix)  {
1240                 ixgbe_configure_ivars(adapter);
1241                 /* Set up auto-mask */
1242                 if (hw->mac.type == ixgbe_mac_82598EB)
1243                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1244                 else {
1245                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1246                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1247                 }
1248         } else {  /* Simple settings for Legacy/MSI */
1249                 ixgbe_set_ivar(adapter, 0, 0, 0);
1250                 ixgbe_set_ivar(adapter, 0, 0, 1);
1251                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1252         }
1253
1254 #ifdef IXGBE_FDIR
1255         /* Init Flow director */
1256         if (hw->mac.type != ixgbe_mac_82598EB) {
1257                 u32 hdrm = 32 << fdir_pballoc;
1258
1259                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1260                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1261         }
1262 #endif
1263
1264         /*
1265         ** Check on any SFP devices that
1266         ** need to be kick-started
1267         */
1268         if (hw->phy.type == ixgbe_phy_none) {
1269                 int err = hw->phy.ops.identify(hw);
1270                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1271                         device_printf(dev,
1272                             "Unsupported SFP+ module type was detected.\n");
1273                         return;
1274                 }
1275         }
1276
1277         /* Set moderation on the Link interrupt */
1278         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1279
1280         /* Config/Enable Link */
1281         ixgbe_config_link(adapter);
1282
1283         /* Hardware Packet Buffer & Flow Control setup */
1284         {
1285                 u32 rxpb, frame, size, tmp;
1286
1287                 frame = adapter->max_frame_size;
1288
1289                 /* Calculate High Water */
1290                 if (hw->mac.type == ixgbe_mac_X540)
1291                         tmp = IXGBE_DV_X540(frame, frame);
1292                 else
1293                         tmp = IXGBE_DV(frame, frame);
1294                 size = IXGBE_BT2KB(tmp);
1295                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1296                 hw->fc.high_water[0] = rxpb - size;
1297
1298                 /* Now calculate Low Water */
1299                 if (hw->mac.type == ixgbe_mac_X540)
1300                         tmp = IXGBE_LOW_DV_X540(frame);
1301                 else
1302                         tmp = IXGBE_LOW_DV(frame);
1303                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1304                 
1305                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1306                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1307                 hw->fc.send_xon = TRUE;
1308         }
1309         /* Initialize the FC settings */
1310         ixgbe_start_hw(hw);
1311
1312         /* And now turn on interrupts */
1313         ixgbe_enable_intr(adapter);
1314
1315         /* Now inform the stack we're ready */
1316         ifp->if_flags |= IFF_RUNNING;
1317         ifp->if_flags &= ~IFF_OACTIVE;
1318
1319         return;
1320 }
1321
1322 static void
1323 ixgbe_init(void *arg)
1324 {
1325         struct adapter *adapter = arg;
1326
1327         IXGBE_CORE_LOCK(adapter);
1328         ixgbe_init_locked(adapter);
1329         IXGBE_CORE_UNLOCK(adapter);
1330         return;
1331 }
1332
1333
1334 /*
1335 **
1336 ** MSIX Interrupt Handlers and Tasklets
1337 **
1338 */
1339
1340 static inline void
1341 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1342 {
1343         struct ixgbe_hw *hw = &adapter->hw;
1344         u64     queue = (u64)(1 << vector);
1345         u32     mask;
1346
1347         if (hw->mac.type == ixgbe_mac_82598EB) {
1348                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1349                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1350         } else {
1351                 mask = (queue & 0xFFFFFFFF);
1352                 if (mask)
1353                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1354                 mask = (queue >> 32);
1355                 if (mask)
1356                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1357         }
1358 }
1359
1360 static inline void
1361 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1362 {
1363         struct ixgbe_hw *hw = &adapter->hw;
1364         u64     queue = (u64)(1 << vector);
1365         u32     mask;
1366
1367         if (hw->mac.type == ixgbe_mac_82598EB) {
1368                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1369                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1370         } else {
1371                 mask = (queue & 0xFFFFFFFF);
1372                 if (mask)
1373                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1374                 mask = (queue >> 32);
1375                 if (mask)
1376                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1377         }
1378 }
1379
1380 static inline void
1381 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1382 {
1383         u32 mask;
1384
1385         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1386                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1387                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1388         } else {
1389                 mask = (queues & 0xFFFFFFFF);
1390                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1391                 mask = (queues >> 32);
1392                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1393         }
1394 }
1395
1396
1397 static void
1398 ixgbe_handle_que(void *context, int pending)
1399 {
1400         struct ix_queue *que = context;
1401         struct adapter  *adapter = que->adapter;
1402         struct tx_ring  *txr = que->txr;
1403         struct ifnet    *ifp = adapter->ifp;
1404         bool            more;
1405
1406         if (ifp->if_flags & IFF_RUNNING) {
1407                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1408                 IXGBE_TX_LOCK(txr);
1409                 ixgbe_txeof(txr);
1410 #if 0 /*__FreeBSD_version >= 800000*/
1411                 if (!drbr_empty(ifp, txr->br))
1412                         ixgbe_mq_start_locked(ifp, txr, NULL);
1413 #else
1414                 if (!ifq_is_empty(&ifp->if_snd))
1415                         ixgbe_start_locked(txr, ifp);
1416 #endif
1417                 IXGBE_TX_UNLOCK(txr);
1418                 if (more) {
1419                         taskqueue_enqueue(que->tq, &que->que_task);
1420                         return;
1421                 }
1422         }
1423
1424         /* Reenable this interrupt */
1425         ixgbe_enable_queue(adapter, que->msix);
1426         return;
1427 }
1428
1429
1430 /*********************************************************************
1431  *
1432  *  Legacy Interrupt Service routine
1433  *
1434  **********************************************************************/
1435
1436 static void
1437 ixgbe_legacy_irq(void *arg)
1438 {
1439         struct ix_queue *que = arg;
1440         struct adapter  *adapter = que->adapter;
1441         struct ixgbe_hw *hw = &adapter->hw;
1442         struct          tx_ring *txr = adapter->tx_rings;
1443         bool            more_tx, more_rx;
1444         u32             reg_eicr, loop = MAX_LOOP;
1445
1446
1447         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1448
1449         ++que->irqs;
1450         if (reg_eicr == 0) {
1451                 ixgbe_enable_intr(adapter);
1452                 return;
1453         }
1454
1455         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1456
1457         IXGBE_TX_LOCK(txr);
1458         do {
1459                 more_tx = ixgbe_txeof(txr);
1460         } while (loop-- && more_tx);
1461         IXGBE_TX_UNLOCK(txr);
1462
1463         if (more_rx || more_tx)
1464                 taskqueue_enqueue(que->tq, &que->que_task);
1465
1466         /* Check for fan failure */
1467         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1468             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1469                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1470                     "REPLACE IMMEDIATELY!!\n");
1471                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1472         }
1473
1474         /* Link status change */
1475         if (reg_eicr & IXGBE_EICR_LSC)
1476                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1477
1478         ixgbe_enable_intr(adapter);
1479         return;
1480 }
1481
1482
1483 /*********************************************************************
1484  *
1485  *  MSIX Queue Interrupt Service routine
1486  *
1487  **********************************************************************/
1488 void
1489 ixgbe_msix_que(void *arg)
1490 {
1491         struct ix_queue *que = arg;
1492         struct adapter  *adapter = que->adapter;
1493         struct tx_ring  *txr = que->txr;
1494         struct rx_ring  *rxr = que->rxr;
1495         bool            more_tx, more_rx;
1496         u32             newitr = 0;
1497
1498         ixgbe_disable_queue(adapter, que->msix);
1499         ++que->irqs;
1500
1501         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1502
1503         IXGBE_TX_LOCK(txr);
1504         more_tx = ixgbe_txeof(txr);
1505         /*
1506         ** Make certain that if the stack 
1507         ** has anything queued the task gets
1508         ** scheduled to handle it.
1509         */
1510 #if 0
1511 #if __FreeBSD_version < 800000
1512         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1513 #else
1514         if (!drbr_empty(adapter->ifp, txr->br))
1515 #endif
1516 #endif
1517         if (!ifq_is_empty(&adapter->ifp->if_snd))
1518                 more_tx = 1;
1519         IXGBE_TX_UNLOCK(txr);
1520
1521         /* Do AIM now? */
1522
1523         if (ixgbe_enable_aim == FALSE)
1524                 goto no_calc;
1525         /*
1526         ** Do Adaptive Interrupt Moderation:
1527         **  - Write out last calculated setting
1528         **  - Calculate based on average size over
1529         **    the last interval.
1530         */
1531         if (que->eitr_setting)
1532                 IXGBE_WRITE_REG(&adapter->hw,
1533                     IXGBE_EITR(que->msix), que->eitr_setting);
1534  
1535         que->eitr_setting = 0;
1536
1537         /* Idle, do nothing */
1538         if ((txr->bytes == 0) && (rxr->bytes == 0))
1539                 goto no_calc;
1540                                 
1541         if ((txr->bytes) && (txr->packets))
1542                 newitr = txr->bytes/txr->packets;
1543         if ((rxr->bytes) && (rxr->packets))
1544                 newitr = max(newitr,
1545                     (rxr->bytes / rxr->packets));
1546         newitr += 24; /* account for hardware frame, crc */
1547
1548         /* set an upper boundary */
1549         newitr = min(newitr, 3000);
1550
1551         /* Be nice to the mid range */
1552         if ((newitr > 300) && (newitr < 1200))
1553                 newitr = (newitr / 3);
1554         else
1555                 newitr = (newitr / 2);
1556
1557         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1558                 newitr |= newitr << 16;
1559         else
1560                 newitr |= IXGBE_EITR_CNT_WDIS;
1561                  
1562         /* save for next interrupt */
1563         que->eitr_setting = newitr;
1564
1565         /* Reset state */
1566         txr->bytes = 0;
1567         txr->packets = 0;
1568         rxr->bytes = 0;
1569         rxr->packets = 0;
1570
1571 no_calc:
1572         if (more_tx || more_rx)
1573                 taskqueue_enqueue(que->tq, &que->que_task);
1574         else /* Reenable this interrupt */
1575                 ixgbe_enable_queue(adapter, que->msix);
1576         return;
1577 }
1578
1579
1580 static void
1581 ixgbe_msix_link(void *arg)
1582 {
1583         struct adapter  *adapter = arg;
1584         struct ixgbe_hw *hw = &adapter->hw;
1585         u32             reg_eicr;
1586
1587         ++adapter->link_irq;
1588
1589         /* First get the cause */
1590         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1591         /* Clear interrupt with write */
1592         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1593
1594         /* Link status change */
1595         if (reg_eicr & IXGBE_EICR_LSC)
1596                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1597
1598         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1599 #ifdef IXGBE_FDIR
1600                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1601                         /* This is probably overkill :) */
1602                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1603                                 return;
1604                         /* Disable the interrupt */
1605                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1606                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1607                 } else
1608 #endif
1609                 if (reg_eicr & IXGBE_EICR_ECC) {
1610                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1611                             "Please Reboot!!\n");
1612                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1613                 } else
1614
1615                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1616                         /* Clear the interrupt */
1617                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1618                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1619                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1620                         /* Clear the interrupt */
1621                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1622                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1623                 }
1624         } 
1625
1626         /* Check for fan failure */
1627         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1628             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1629                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1630                     "REPLACE IMMEDIATELY!!\n");
1631                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1632         }
1633
1634         /* Check for over temp condition */
1635         if ((hw->mac.type == ixgbe_mac_X540) &&
1636             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1637                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1638                     "PHY IS SHUT DOWN!!\n");
1639                 device_printf(adapter->dev, "System shutdown required\n");
1640                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1641         }
1642
1643         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1644         return;
1645 }
1646
1647 /*********************************************************************
1648  *
1649  *  Media Ioctl callback
1650  *
1651  *  This routine is called whenever the user queries the status of
1652  *  the interface using ifconfig.
1653  *
1654  **********************************************************************/
1655 static void
1656 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1657 {
1658         struct adapter *adapter = ifp->if_softc;
1659
1660         INIT_DEBUGOUT("ixgbe_media_status: begin");
1661         IXGBE_CORE_LOCK(adapter);
1662         ixgbe_update_link_status(adapter);
1663
1664         ifmr->ifm_status = IFM_AVALID;
1665         ifmr->ifm_active = IFM_ETHER;
1666
1667         if (!adapter->link_active) {
1668                 IXGBE_CORE_UNLOCK(adapter);
1669                 return;
1670         }
1671
1672         ifmr->ifm_status |= IFM_ACTIVE;
1673
1674         switch (adapter->link_speed) {
1675                 case IXGBE_LINK_SPEED_100_FULL:
1676                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1677                         break;
1678                 case IXGBE_LINK_SPEED_1GB_FULL:
1679                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1680                         break;
1681                 case IXGBE_LINK_SPEED_10GB_FULL:
1682                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1683                         break;
1684         }
1685
1686         IXGBE_CORE_UNLOCK(adapter);
1687
1688         return;
1689 }
1690
1691 /*********************************************************************
1692  *
1693  *  Media Ioctl callback
1694  *
1695  *  This routine is called when the user changes speed/duplex using
1696  *  media/mediopt option with ifconfig.
1697  *
1698  **********************************************************************/
1699 static int
1700 ixgbe_media_change(struct ifnet * ifp)
1701 {
1702         struct adapter *adapter = ifp->if_softc;
1703         struct ifmedia *ifm = &adapter->media;
1704
1705         INIT_DEBUGOUT("ixgbe_media_change: begin");
1706
1707         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1708                 return (EINVAL);
1709
1710         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1711         case IFM_AUTO:
1712                 adapter->hw.phy.autoneg_advertised =
1713                     IXGBE_LINK_SPEED_100_FULL |
1714                     IXGBE_LINK_SPEED_1GB_FULL |
1715                     IXGBE_LINK_SPEED_10GB_FULL;
1716                 break;
1717         default:
1718                 device_printf(adapter->dev, "Only auto media type\n");
1719                 return (EINVAL);
1720         }
1721
1722         return (0);
1723 }
1724
1725 /*********************************************************************
1726  *
1727  *  This routine maps the mbufs to tx descriptors, allowing the
1728  *  TX engine to transmit the packets. 
1729  *      - return 0 on success, positive on failure
1730  *
1731  **********************************************************************/
1732
1733 static int
1734 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1735 {
1736         struct adapter  *adapter = txr->adapter;
1737         u32             olinfo_status = 0, cmd_type_len;
1738         u32             paylen = 0;
1739         int             i, j, error, nsegs, maxsegs;
1740         int             first, last = 0;
1741         struct mbuf     *m_head;
1742         bus_dma_segment_t segs[adapter->num_segs];
1743         bus_dmamap_t    map;
1744         struct ixgbe_tx_buf *txbuf;
1745         union ixgbe_adv_tx_desc *txd = NULL;
1746
1747         m_head = *m_headp;
1748
1749         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1750                 error = ixgbe_tso_pullup(txr, m_headp);
1751                 if (error)
1752                         return error;
1753                 m_head = *m_headp;
1754         }
1755
1756         /* Basic descriptor defines */
1757         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1758             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1759
1760         if (m_head->m_flags & M_VLANTAG)
1761                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1762
1763         /*
1764          * Important to capture the first descriptor
1765          * used because it will contain the index of
1766          * the one we tell the hardware to report back
1767          */
1768         first = txr->next_avail_desc;
1769         txbuf = &txr->tx_buffers[first];
1770         map = txbuf->map;
1771
1772         /*
1773          * Map the packet for DMA.
1774          */
1775         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1776         if (maxsegs > adapter->num_segs)
1777                 maxsegs = adapter->num_segs;
1778
1779         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1780             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1781         if (error) {
1782                 if (error == ENOBUFS)
1783                         adapter->mbuf_defrag_failed++;
1784                 else
1785                         adapter->no_tx_dma_setup++;
1786
1787                 m_freem(*m_headp);
1788                 *m_headp = NULL;
1789                 return (error);
1790         }
1791
1792         /* Make certain there are enough descriptors */
1793         if (nsegs > txr->tx_avail - 2) {
1794                 txr->no_desc_avail++;
1795                 error = ENOBUFS;
1796                 goto xmit_fail;
1797         }
1798         m_head = *m_headp;
1799
1800         /*
1801         ** Set up the appropriate offload context
1802         ** this becomes the first descriptor of 
1803         ** a packet.
1804         */
1805         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1806                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1807                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1808                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1809                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1810                         ++adapter->tso_tx;
1811                 } else
1812                         return (ENXIO);
1813         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1814                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1815
1816 #ifdef IXGBE_IEEE1588
1817         /* This is changing soon to an mtag detection */
1818         if (we detect this mbuf has a TSTAMP mtag)
1819                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1820 #endif
1821
1822 #ifdef IXGBE_FDIR
1823         /* Do the flow director magic */
1824         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1825                 ++txr->atr_count;
1826                 if (txr->atr_count >= atr_sample_rate) {
1827                         ixgbe_atr(txr, m_head);
1828                         txr->atr_count = 0;
1829                 }
1830         }
1831 #endif
1832         /* Record payload length */
1833         if (paylen == 0)
1834                 olinfo_status |= m_head->m_pkthdr.len <<
1835                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1836
1837         i = txr->next_avail_desc;
1838         for (j = 0; j < nsegs; j++) {
1839                 bus_size_t seglen;
1840                 bus_addr_t segaddr;
1841
1842                 txbuf = &txr->tx_buffers[i];
1843                 txd = &txr->tx_base[i];
1844                 seglen = segs[j].ds_len;
1845                 segaddr = htole64(segs[j].ds_addr);
1846
1847                 txd->read.buffer_addr = segaddr;
1848                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1849                     cmd_type_len |seglen);
1850                 txd->read.olinfo_status = htole32(olinfo_status);
1851                 last = i; /* descriptor that will get completion IRQ */
1852
1853                 if (++i == adapter->num_tx_desc)
1854                         i = 0;
1855
1856                 txbuf->m_head = NULL;
1857                 txbuf->eop_index = -1;
1858         }
1859
1860         txd->read.cmd_type_len |=
1861             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1862         txr->tx_avail -= nsegs;
1863         txr->next_avail_desc = i;
1864
1865         txbuf->m_head = m_head;
1866         /* Swap the dma map between the first and last descriptor */
1867         txr->tx_buffers[first].map = txbuf->map;
1868         txbuf->map = map;
1869         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1870
1871         /* Set the index of the descriptor that will be marked done */
1872         txbuf = &txr->tx_buffers[first];
1873         txbuf->eop_index = last;
1874
1875         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1876             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1877         /*
1878          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1879          * hardware that this frame is available to transmit.
1880          */
1881         ++txr->total_packets;
1882         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1883
1884         return (0);
1885
1886 xmit_fail:
1887         bus_dmamap_unload(txr->txtag, txbuf->map);
1888         return (error);
1889
1890 }
1891
1892 static void
1893 ixgbe_set_promisc(struct adapter *adapter)
1894 {
1895         u_int32_t       reg_rctl;
1896         struct ifnet   *ifp = adapter->ifp;
1897
1898         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1899         reg_rctl &= (~IXGBE_FCTRL_UPE);
1900         reg_rctl &= (~IXGBE_FCTRL_MPE);
1901         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1902
1903         if (ifp->if_flags & IFF_PROMISC) {
1904                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1905                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1906         } else if (ifp->if_flags & IFF_ALLMULTI) {
1907                 reg_rctl |= IXGBE_FCTRL_MPE;
1908                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1909                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1910         }
1911         return;
1912 }
1913
1914
1915 /*********************************************************************
1916  *  Multicast Update
1917  *
1918  *  This routine is called whenever multicast address list is updated.
1919  *
1920  **********************************************************************/
1921 #define IXGBE_RAR_ENTRIES 16
1922
1923 static void
1924 ixgbe_set_multi(struct adapter *adapter)
1925 {
1926         u32     fctrl;
1927         u8      *mta;
1928         u8      *update_ptr;
1929         struct  ifmultiaddr *ifma;
1930         int     mcnt = 0;
1931         struct ifnet   *ifp = adapter->ifp;
1932
1933         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1934
1935         mta = adapter->mta;
1936         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1937             MAX_NUM_MULTICAST_ADDRESSES);
1938
1939         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1940         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1941         if (ifp->if_flags & IFF_PROMISC)
1942                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1943         else if (ifp->if_flags & IFF_ALLMULTI) {
1944                 fctrl |= IXGBE_FCTRL_MPE;
1945                 fctrl &= ~IXGBE_FCTRL_UPE;
1946         } else
1947                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1948         
1949         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1950
1951         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1952                 if (ifma->ifma_addr->sa_family != AF_LINK)
1953                         continue;
1954                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1955                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1956                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1957                 mcnt++;
1958         }
1959
1960         update_ptr = mta;
1961         ixgbe_update_mc_addr_list(&adapter->hw,
1962             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1963
1964         return;
1965 }
1966
1967 /*
1968  * This is an iterator function now needed by the multicast
1969  * shared code. It simply feeds the shared code routine the
1970  * addresses in the array of ixgbe_set_multi() one by one.
1971  */
1972 static u8 *
1973 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1974 {
1975         u8 *addr = *update_ptr;
1976         u8 *newptr;
1977         *vmdq = 0;
1978
1979         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1980         *update_ptr = newptr;
1981         return addr;
1982 }
1983
1984
1985 /*********************************************************************
1986  *  Timer routine
1987  *
1988  *  This routine checks for link status,updates statistics,
1989  *  and runs the watchdog check.
1990  *
1991  **********************************************************************/
1992
1993 static void
1994 ixgbe_local_timer(void *arg)
1995 {
1996         struct adapter  *adapter = arg;
1997         device_t        dev = adapter->dev;
1998         struct ifnet    *ifp = adapter->ifp;
1999         struct ix_queue *que = adapter->queues;
2000         struct tx_ring  *txr = adapter->tx_rings;
2001         int             hung, busy, paused;
2002
2003         IXGBE_CORE_LOCK(adapter);
2004         hung = busy = paused = 0;
2005
2006         /* Check for pluggable optics */
2007         if (adapter->sfp_probe)
2008                 if (!ixgbe_sfp_probe(adapter))
2009                         goto out; /* Nothing to do */
2010
2011         ixgbe_update_link_status(adapter);
2012         ixgbe_update_stats_counters(adapter);
2013
2014         /*
2015          * If the interface has been paused
2016          * then don't do the watchdog check
2017          */
2018         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2019                 paused = 1;
2020
2021         /*
2022         ** Check the TX queues status
2023         **      - central locked handling of OACTIVE
2024         **      - watchdog only if all queues show hung
2025         */          
2026         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2027                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
2028                     (paused == 0))
2029                         ++hung;
2030                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
2031                         ++busy;
2032                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
2033                         taskqueue_enqueue(que->tq, &que->que_task);
2034         }
2035         /* Only truely watchdog if all queues show hung */
2036         if (hung == adapter->num_queues)
2037                 goto watchdog;
2038         /* Only turn off the stack flow when ALL are depleted */
2039         if (busy == adapter->num_queues)
2040                 ifp->if_flags |= IFF_OACTIVE;
2041         else if ((ifp->if_flags & IFF_OACTIVE) &&
2042             (busy < adapter->num_queues))
2043                 ifp->if_flags &= ~IFF_OACTIVE;
2044
2045 out:
2046         ixgbe_rearm_queues(adapter, adapter->que_mask);
2047         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2048         IXGBE_CORE_UNLOCK(adapter);
2049         return;
2050
2051 watchdog:
2052         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2053         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2054             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2055             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2056         device_printf(dev,"TX(%d) desc avail = %d,"
2057             "Next TX to Clean = %d\n",
2058             txr->me, txr->tx_avail, txr->next_to_clean);
2059         adapter->ifp->if_flags &= ~IFF_RUNNING;
2060         adapter->watchdog_events++;
2061         ixgbe_init_locked(adapter);
2062
2063         IXGBE_CORE_UNLOCK(adapter);
2064 }
2065
2066 /*
2067 ** Note: this routine updates the OS on the link state
2068 **      the real check of the hardware only happens with
2069 **      a link interrupt.
2070 */
2071 static void
2072 ixgbe_update_link_status(struct adapter *adapter)
2073 {
2074         struct ifnet    *ifp = adapter->ifp;
2075         struct tx_ring *txr = adapter->tx_rings;
2076         device_t dev = adapter->dev;
2077
2078
2079         if (adapter->link_up){ 
2080                 if (adapter->link_active == FALSE) {
2081                         if (bootverbose)
2082                                 device_printf(dev,"Link is up %d Gbps %s \n",
2083                                     ((adapter->link_speed == 128)? 10:1),
2084                                     "Full Duplex");
2085                         adapter->link_active = TRUE;
2086                         /* Update any Flow Control changes */
2087                         ixgbe_fc_enable(&adapter->hw);
2088                         ifp->if_link_state = LINK_STATE_UP;
2089                         if_link_state_change(ifp);
2090                 }
2091         } else { /* Link down */
2092                 if (adapter->link_active == TRUE) {
2093                         if (bootverbose)
2094                                 device_printf(dev,"Link is Down\n");
2095                         ifp->if_link_state = LINK_STATE_DOWN;
2096                         if_link_state_change(ifp);
2097                         adapter->link_active = FALSE;
2098                         for (int i = 0; i < adapter->num_queues;
2099                             i++, txr++)
2100                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2101                 }
2102         }
2103
2104         return;
2105 }
2106
2107
2108 /*********************************************************************
2109  *
2110  *  This routine disables all traffic on the adapter by issuing a
2111  *  global reset on the MAC and deallocates TX/RX buffers.
2112  *
2113  **********************************************************************/
2114
2115 static void
2116 ixgbe_stop(void *arg)
2117 {
2118         struct ifnet   *ifp;
2119         struct adapter *adapter = arg;
2120         struct ixgbe_hw *hw = &adapter->hw;
2121         ifp = adapter->ifp;
2122
2123         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2124
2125         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2126         ixgbe_disable_intr(adapter);
2127         callout_stop(&adapter->timer);
2128
2129         /* Let the stack know...*/
2130         ifp->if_flags &= ~IFF_RUNNING;
2131         ifp->if_flags |= IFF_OACTIVE;
2132
2133         ixgbe_reset_hw(hw);
2134         hw->adapter_stopped = FALSE;
2135         ixgbe_stop_adapter(hw);
2136         /* Turn off the laser */
2137         if (hw->phy.multispeed_fiber)
2138                 ixgbe_disable_tx_laser(hw);
2139
2140         /* reprogram the RAR[0] in case user changed it. */
2141         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2142
2143         return;
2144 }
2145
2146
2147 /*********************************************************************
2148  *
2149  *  Determine hardware revision.
2150  *
2151  **********************************************************************/
2152 static void
2153 ixgbe_identify_hardware(struct adapter *adapter)
2154 {
2155         device_t        dev = adapter->dev;
2156         struct ixgbe_hw *hw = &adapter->hw;
2157
2158         /* Save off the information about this board */
2159         hw->vendor_id = pci_get_vendor(dev);
2160         hw->device_id = pci_get_device(dev);
2161         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2162         hw->subsystem_vendor_id =
2163             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2164         hw->subsystem_device_id =
2165             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2166
2167         /* We need this here to set the num_segs below */
2168         ixgbe_set_mac_type(hw);
2169
2170         /* Pick up the 82599 and VF settings */
2171         if (hw->mac.type != ixgbe_mac_82598EB) {
2172                 hw->phy.smart_speed = ixgbe_smart_speed;
2173                 adapter->num_segs = IXGBE_82599_SCATTER;
2174         } else
2175                 adapter->num_segs = IXGBE_82598_SCATTER;
2176
2177         return;
2178 }
2179
2180 /*********************************************************************
2181  *
2182  *  Determine optic type
2183  *
2184  **********************************************************************/
2185 static void
2186 ixgbe_setup_optics(struct adapter *adapter)
2187 {
2188         struct ixgbe_hw *hw = &adapter->hw;
2189         int             layer;
2190         
2191         layer = ixgbe_get_supported_physical_layer(hw);
2192
2193         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2194                 adapter->optics = IFM_10G_T;
2195                 return;
2196         }
2197
2198         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2199                 adapter->optics = IFM_1000_T;
2200                 return;
2201         }
2202
2203         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2204             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2205                 adapter->optics = IFM_10G_LR;
2206                 return;
2207         }
2208
2209         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2210                 adapter->optics = IFM_10G_SR;
2211                 return;
2212         }
2213
2214         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2215                 adapter->optics = IFM_10G_TWINAX;
2216                 return;
2217         }
2218
2219         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2220             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2221                 adapter->optics = IFM_10G_CX4;
2222                 return;
2223         }
2224
2225         /* If we get here just set the default */
2226         adapter->optics = IFM_ETHER | IFM_AUTO;
2227         return;
2228 }
2229
2230 /*********************************************************************
2231  *
2232  *  Setup the Legacy or MSI Interrupt handler
2233  *
2234  **********************************************************************/
2235 static int
2236 ixgbe_allocate_legacy(struct adapter *adapter)
2237 {
2238         device_t dev = adapter->dev;
2239         struct          ix_queue *que = adapter->queues;
2240         int error, rid = 0;
2241         unsigned int intr_flags;
2242
2243         /* MSI RID at 1 */
2244         if (adapter->msix == 1)
2245                 rid = 1;
2246
2247         /* Try allocating a MSI interrupt first */
2248         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2249                 &rid, &intr_flags);
2250
2251         /* We allocate a single interrupt resource */
2252         adapter->res = bus_alloc_resource_any(dev,
2253             SYS_RES_IRQ, &rid, intr_flags);
2254         if (adapter->res == NULL) {
2255                 device_printf(dev, "Unable to allocate bus resource: "
2256                     "interrupt\n");
2257                 return (ENXIO);
2258         }
2259
2260         /*
2261          * Try allocating a fast interrupt and the associated deferred
2262          * processing contexts.
2263          */
2264         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2265         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2266             taskqueue_thread_enqueue, &que->tq);
2267         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2268             device_get_nameunit(adapter->dev));
2269
2270         /* Tasklets for Link, SFP and Multispeed Fiber */
2271         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2272         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2273         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2274 #ifdef IXGBE_FDIR
2275         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2276 #endif
2277         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2278             taskqueue_thread_enqueue, &adapter->tq);
2279         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2280             device_get_nameunit(adapter->dev));
2281
2282         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2283             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2284                 device_printf(dev, "Failed to register fast interrupt "
2285                     "handler: %d\n", error);
2286                 taskqueue_free(que->tq);
2287                 taskqueue_free(adapter->tq);
2288                 que->tq = NULL;
2289                 adapter->tq = NULL;
2290                 return (error);
2291         }
2292         /* For simplicity in the handlers */
2293         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2294
2295         return (0);
2296 }
2297
2298
2299 /*********************************************************************
2300  *
2301  *  Setup MSIX Interrupt resources and handlers 
2302  *
2303  **********************************************************************/
2304 static int
2305 ixgbe_allocate_msix(struct adapter *adapter)
2306 {
2307         device_t        dev = adapter->dev;
2308         struct          ix_queue *que = adapter->queues;
2309         int             error, rid, vector = 0;
2310         char            desc[16];
2311
2312         error = pci_setup_msix(dev);
2313         if (error) {
2314                 device_printf(dev, "MSI-X setup failed\n");
2315                 return (error);
2316         }
2317
2318         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2319                 rid = vector + 1;
2320
2321                 /*
2322                 ** Bind the msix vector, and thus the
2323                 ** ring to the corresponding cpu.
2324                 */
2325                 error = pci_alloc_msix_vector(dev, vector, &rid, i);
2326                 if (error) {
2327                         device_printf(dev, "pci_alloc_msix_vector failed\n");
2328                         return (error);
2329                 }
2330
2331                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2332                     RF_SHAREABLE | RF_ACTIVE);
2333                 if (que->res == NULL) {
2334                         device_printf(dev,"Unable to allocate"
2335                             " bus resource: que interrupt [%d]\n", vector);
2336                         return (ENXIO);
2337                 }
2338                 /* Set the handler function */
2339                 ksnprintf(desc, sizeof(desc), "%s que %d",
2340                     device_get_nameunit(dev), i);
2341                 error = bus_setup_intr_descr(dev, que->res, INTR_MPSAFE,
2342                     ixgbe_msix_que, que, &que->tag, &que->serializer, desc);
2343                 if (error) {
2344                         que->res = NULL;
2345                         device_printf(dev, "Failed to register QUE handler");
2346                         return (error);
2347                 }
2348                 que->msix = vector;
2349                 adapter->que_mask |= (u64)(1 << que->msix);
2350
2351                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2352                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2353                     taskqueue_thread_enqueue, &que->tq);
2354                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2355                     device_get_nameunit(adapter->dev));
2356         }
2357
2358         /* and Link, bind vector to cpu #0 */
2359         rid = vector + 1;
2360         error = pci_alloc_msix_vector(dev, vector, &rid, 0);
2361         if (error) {
2362                 device_printf(dev, "pci_alloc_msix_vector failed\n");
2363                 return (error);
2364         }
2365         adapter->res = bus_alloc_resource_any(dev,
2366             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2367         if (!adapter->res) {
2368                 device_printf(dev,"Unable to allocate"
2369             " bus resource: Link interrupt [%d]\n", rid);
2370                 return (ENXIO);
2371         }
2372         /* Set the link handler function */
2373         error = bus_setup_intr_descr(dev, adapter->res, INTR_MPSAFE,
2374             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer,
2375             "link");
2376         if (error) {
2377                 adapter->res = NULL;
2378                 device_printf(dev, "Failed to register LINK handler");
2379                 return (error);
2380         }
2381         pci_enable_msix(dev);
2382
2383         adapter->linkvec = vector;
2384         /* Tasklets for Link, SFP and Multispeed Fiber */
2385         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2386         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2387         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2388 #ifdef IXGBE_FDIR
2389         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2390 #endif
2391         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2392             taskqueue_thread_enqueue, &adapter->tq);
2393         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2394             device_get_nameunit(adapter->dev));
2395
2396         return (0);
2397 }
2398
2399 /*
2400  * Setup Either MSI/X or MSI
2401  */
2402 static int
2403 ixgbe_setup_msix(struct adapter *adapter)
2404 {
2405         device_t dev = adapter->dev;
2406         int rid, want, queues, msgs;
2407
2408         /* Override by tuneable */
2409         if (ixgbe_enable_msix == 0)
2410                 goto msi;
2411
2412         /* First try MSI/X */
2413         rid = PCIR_BAR(MSIX_82598_BAR);
2414         adapter->msix_mem = bus_alloc_resource_any(dev,
2415             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2416         if (!adapter->msix_mem) {
2417                 rid += 4;       /* 82599 maps in higher BAR */
2418                 adapter->msix_mem = bus_alloc_resource_any(dev,
2419                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2420         }
2421         if (!adapter->msix_mem) {
2422                 /* May not be enabled */
2423                 device_printf(adapter->dev,
2424                     "Unable to map MSIX table \n");
2425                 goto msi;
2426         }
2427
2428         msgs = pci_msix_count(dev); 
2429         if (msgs == 0) { /* system has msix disabled */
2430                 bus_release_resource(dev, SYS_RES_MEMORY,
2431                     rid, adapter->msix_mem);
2432                 adapter->msix_mem = NULL;
2433                 goto msi;
2434         }
2435
2436         /* Figure out a reasonable auto config value */
2437         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2438
2439         if (ixgbe_num_queues != 0)
2440                 queues = ixgbe_num_queues;
2441         /* Set max queues to 8 when autoconfiguring */
2442         else if ((ixgbe_num_queues == 0) && (queues > 8))
2443                 queues = 8;
2444
2445         /*
2446         ** Want one vector (RX/TX pair) per queue
2447         ** plus an additional for Link.
2448         */
2449         want = queues + 1;
2450         if (msgs >= want)
2451                 msgs = want;
2452         else {
2453                 device_printf(adapter->dev,
2454                     "MSIX Configuration Problem, "
2455                     "%d vectors but %d queues wanted!\n",
2456                     msgs, want);
2457                 return (0); /* Will go to Legacy setup */
2458         }
2459         if (msgs) {
2460                 device_printf(adapter->dev,
2461                     "Using MSIX interrupts with %d vectors\n", msgs);
2462                 adapter->num_queues = queues;
2463                 return (msgs);
2464         }
2465 msi:
2466         msgs = pci_msi_count(dev);
2467         return (msgs);
2468 }
2469
2470
2471 static int
2472 ixgbe_allocate_pci_resources(struct adapter *adapter)
2473 {
2474         int             rid;
2475         device_t        dev = adapter->dev;
2476
2477         rid = PCIR_BAR(0);
2478         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479             &rid, RF_ACTIVE);
2480
2481         if (!(adapter->pci_mem)) {
2482                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2483                 return (ENXIO);
2484         }
2485
2486         adapter->osdep.mem_bus_space_tag =
2487                 rman_get_bustag(adapter->pci_mem);
2488         adapter->osdep.mem_bus_space_handle =
2489                 rman_get_bushandle(adapter->pci_mem);
2490         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2491
2492         /* Legacy defaults */
2493         adapter->num_queues = 1;
2494         adapter->hw.back = &adapter->osdep;
2495
2496         /*
2497         ** Now setup MSI or MSI/X, should
2498         ** return us the number of supported
2499         ** vectors. (Will be 1 for MSI)
2500         */
2501         adapter->msix = ixgbe_setup_msix(adapter);
2502         return (0);
2503 }
2504
2505 static void
2506 ixgbe_free_pci_resources(struct adapter * adapter)
2507 {
2508         struct          ix_queue *que = adapter->queues;
2509         device_t        dev = adapter->dev;
2510         int             rid, memrid;
2511
2512         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2513                 memrid = PCIR_BAR(MSIX_82598_BAR);
2514         else
2515                 memrid = PCIR_BAR(MSIX_82599_BAR);
2516
2517         /*
2518         ** There is a slight possibility of a failure mode
2519         ** in attach that will result in entering this function
2520         ** before interrupt resources have been initialized, and
2521         ** in that case we do not want to execute the loops below
2522         ** We can detect this reliably by the state of the adapter
2523         ** res pointer.
2524         */
2525         if (adapter->res == NULL)
2526                 goto mem;
2527
2528         /*
2529         **  Release all msix queue resources:
2530         */
2531         for (int i = 0; i < adapter->num_queues; i++, que++) {
2532                 rid = que->msix + 1;
2533                 if (que->tag != NULL) {
2534                         bus_teardown_intr(dev, que->res, que->tag);
2535                         que->tag = NULL;
2536                 }
2537                 if (que->res != NULL)
2538                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2539         }
2540
2541
2542         /* Clean the Legacy or Link interrupt last */
2543         if (adapter->linkvec) /* we are doing MSIX */
2544                 rid = adapter->linkvec + 1;
2545         else
2546                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2547
2548         if (adapter->tag != NULL) {
2549                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2550                 adapter->tag = NULL;
2551         }
2552         if (adapter->res != NULL)
2553                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2554         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2555                 pci_release_msi(adapter->dev);
2556
2557 mem:
2558         if (adapter->msix)
2559                 pci_release_msi(dev);
2560
2561         if (adapter->msix_mem != NULL)
2562                 bus_release_resource(dev, SYS_RES_MEMORY,
2563                     memrid, adapter->msix_mem);
2564
2565         if (adapter->pci_mem != NULL)
2566                 bus_release_resource(dev, SYS_RES_MEMORY,
2567                     PCIR_BAR(0), adapter->pci_mem);
2568
2569         return;
2570 }
2571
2572 /*********************************************************************
2573  *
2574  *  Setup networking device structure and register an interface.
2575  *
2576  **********************************************************************/
2577 static int
2578 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2579 {
2580         struct ixgbe_hw *hw = &adapter->hw;
2581         struct ifnet   *ifp;
2582
2583         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2584
2585         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2586         if (ifp == NULL) {
2587                 device_printf(dev, "can not allocate ifnet structure\n");
2588                 return (-1);
2589         }
2590         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2591         ifp->if_baudrate = 1000000000;
2592         ifp->if_init = ixgbe_init;
2593         ifp->if_softc = adapter;
2594         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2595         ifp->if_ioctl = ixgbe_ioctl;
2596         ifp->if_start = ixgbe_start;
2597 #if 0 /* __FreeBSD_version >= 800000 */
2598         ifp->if_transmit = ixgbe_mq_start;
2599         ifp->if_qflush = ixgbe_qflush;
2600 #endif
2601         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2602
2603         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2604
2605         adapter->max_frame_size =
2606             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2607
2608         /*
2609          * Tell the upper layer(s) we support long frames.
2610          */
2611         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2612
2613         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2614         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2615         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2616 #if 0 /* NET_TSO */
2617                              |  IFCAP_VLAN_HWTSO
2618 #endif
2619                              |  IFCAP_VLAN_MTU;
2620         ifp->if_capenable = ifp->if_capabilities;
2621
2622         /* Don't enable LRO by default */
2623 #if 0 /* NET_LRO */
2624         ifp->if_capabilities |= IFCAP_LRO;
2625 #endif
2626
2627         /*
2628         ** Don't turn this on by default, if vlans are
2629         ** created on another pseudo device (eg. lagg)
2630         ** then vlan events are not passed thru, breaking
2631         ** operation, but with HW FILTER off it works. If
2632         ** using vlans directly on the ixgbe driver you can
2633         ** enable this and get full hardware tag filtering.
2634         */
2635         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2636
2637         /*
2638          * Specify the media types supported by this adapter and register
2639          * callbacks to update media and link information
2640          */
2641         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2642                      ixgbe_media_status);
2643         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2644         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2645         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2646                 ifmedia_add(&adapter->media,
2647                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2648                 ifmedia_add(&adapter->media,
2649                     IFM_ETHER | IFM_1000_T, 0, NULL);
2650         }
2651         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2652         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2653
2654         return (0);
2655 }
2656
2657 static void
2658 ixgbe_config_link(struct adapter *adapter)
2659 {
2660         struct ixgbe_hw *hw = &adapter->hw;
2661         u32     autoneg, err = 0;
2662         bool    sfp, negotiate;
2663
2664         sfp = ixgbe_is_sfp(hw);
2665
2666         if (sfp) { 
2667                 if (hw->phy.multispeed_fiber) {
2668                         hw->mac.ops.setup_sfp(hw);
2669                         ixgbe_enable_tx_laser(hw);
2670                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2671                 } else
2672                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2673         } else {
2674                 if (hw->mac.ops.check_link)
2675                         err = ixgbe_check_link(hw, &autoneg,
2676                             &adapter->link_up, FALSE);
2677                 if (err)
2678                         goto out;
2679                 autoneg = hw->phy.autoneg_advertised;
2680                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2681                         err  = hw->mac.ops.get_link_capabilities(hw,
2682                             &autoneg, &negotiate);
2683                 if (err)
2684                         goto out;
2685                 if (hw->mac.ops.setup_link)
2686                         err = hw->mac.ops.setup_link(hw, autoneg,
2687                             negotiate, adapter->link_up);
2688         }
2689 out:
2690         return;
2691 }
2692
2693 /********************************************************************
2694  * Manage DMA'able memory.
2695  *******************************************************************/
2696 static void
2697 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2698 {
2699         if (error)
2700                 return;
2701         *(bus_addr_t *) arg = segs->ds_addr;
2702         return;
2703 }
2704
2705 static int
2706 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2707                 struct ixgbe_dma_alloc *dma, int mapflags)
2708 {
2709         device_t dev = adapter->dev;
2710         int             r;
2711
2712         r = bus_dma_tag_create(NULL,    /* parent */
2713                                DBA_ALIGN, 0,    /* alignment, bounds */
2714                                BUS_SPACE_MAXADDR,       /* lowaddr */
2715                                BUS_SPACE_MAXADDR,       /* highaddr */
2716                                NULL, NULL,      /* filter, filterarg */
2717                                size,    /* maxsize */
2718                                1,       /* nsegments */
2719                                size,    /* maxsegsize */
2720                                BUS_DMA_ALLOCNOW,        /* flags */
2721                                &dma->dma_tag);
2722         if (r != 0) {
2723                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2724                        "error %u\n", r);
2725                 goto fail_0;
2726         }
2727         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2728                              BUS_DMA_NOWAIT, &dma->dma_map);
2729         if (r != 0) {
2730                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2731                        "error %u\n", r);
2732                 goto fail_1;
2733         }
2734         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2735                             size,
2736                             ixgbe_dmamap_cb,
2737                             &dma->dma_paddr,
2738                             mapflags | BUS_DMA_NOWAIT);
2739         if (r != 0) {
2740                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2741                        "error %u\n", r);
2742                 goto fail_2;
2743         }
2744         dma->dma_size = size;
2745         return (0);
2746 fail_2:
2747         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2748 fail_1:
2749         bus_dma_tag_destroy(dma->dma_tag);
2750 fail_0:
2751         dma->dma_map = NULL;
2752         dma->dma_tag = NULL;
2753         return (r);
2754 }
2755
2756 static void
2757 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2758 {
2759         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2760             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2761         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2762         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2763         bus_dma_tag_destroy(dma->dma_tag);
2764 }
2765
2766
2767 /*********************************************************************
2768  *
2769  *  Allocate memory for the transmit and receive rings, and then
2770  *  the descriptors associated with each, called only once at attach.
2771  *
2772  **********************************************************************/
2773 static int
2774 ixgbe_allocate_queues(struct adapter *adapter)
2775 {
2776         device_t        dev = adapter->dev;
2777         struct ix_queue *que;
2778         struct tx_ring  *txr;
2779         struct rx_ring  *rxr;
2780         int rsize, tsize, error = IXGBE_SUCCESS;
2781         int txconf = 0, rxconf = 0;
2782
2783         /* First allocate the top level queue structs */
2784         if (!(adapter->queues =
2785             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2786             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2787                 device_printf(dev, "Unable to allocate queue memory\n");
2788                 error = ENOMEM;
2789                 goto fail;
2790         }
2791
2792         /* First allocate the TX ring struct memory */
2793         if (!(adapter->tx_rings =
2794             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2795             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2796                 device_printf(dev, "Unable to allocate TX ring memory\n");
2797                 error = ENOMEM;
2798                 goto tx_fail;
2799         }
2800
2801         /* Next allocate the RX */
2802         if (!(adapter->rx_rings =
2803             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2804             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2805                 device_printf(dev, "Unable to allocate RX ring memory\n");
2806                 error = ENOMEM;
2807                 goto rx_fail;
2808         }
2809
2810         /* For the ring itself */
2811         tsize = roundup2(adapter->num_tx_desc *
2812             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2813
2814         /*
2815          * Now set up the TX queues, txconf is needed to handle the
2816          * possibility that things fail midcourse and we need to
2817          * undo memory gracefully
2818          */ 
2819         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2820                 /* Set up some basics */
2821                 txr = &adapter->tx_rings[i];
2822                 txr->adapter = adapter;
2823                 txr->me = i;
2824
2825                 /* Initialize the TX side lock */
2826                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2827                     device_get_nameunit(dev), txr->me);
2828                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2829
2830                 if (ixgbe_dma_malloc(adapter, tsize,
2831                         &txr->txdma, BUS_DMA_NOWAIT)) {
2832                         device_printf(dev,
2833                             "Unable to allocate TX Descriptor memory\n");
2834                         error = ENOMEM;
2835                         goto err_tx_desc;
2836                 }
2837                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2838                 bzero((void *)txr->tx_base, tsize);
2839
2840                 /* Now allocate transmit buffers for the ring */
2841                 if (ixgbe_allocate_transmit_buffers(txr)) {
2842                         device_printf(dev,
2843                             "Critical Failure setting up transmit buffers\n");
2844                         error = ENOMEM;
2845                         goto err_tx_desc;
2846                 }
2847 #if 0 /* __FreeBSD_version >= 800000 */
2848                 /* Allocate a buf ring */
2849                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2850                     M_WAITOK, &txr->tx_mtx);
2851                 if (txr->br == NULL) {
2852                         device_printf(dev,
2853                             "Critical Failure setting up buf ring\n");
2854                         error = ENOMEM;
2855                         goto err_tx_desc;
2856                 }
2857 #endif
2858         }
2859
2860         /*
2861          * Next the RX queues...
2862          */ 
2863         rsize = roundup2(adapter->num_rx_desc *
2864             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2865         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2866                 rxr = &adapter->rx_rings[i];
2867                 /* Set up some basics */
2868                 rxr->adapter = adapter;
2869                 rxr->me = i;
2870
2871                 /* Initialize the RX side lock */
2872                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2873                     device_get_nameunit(dev), rxr->me);
2874                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2875
2876                 if (ixgbe_dma_malloc(adapter, rsize,
2877                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2878                         device_printf(dev,
2879                             "Unable to allocate RxDescriptor memory\n");
2880                         error = ENOMEM;
2881                         goto err_rx_desc;
2882                 }
2883                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2884                 bzero((void *)rxr->rx_base, rsize);
2885
2886                 /* Allocate receive buffers for the ring*/
2887                 if (ixgbe_allocate_receive_buffers(rxr)) {
2888                         device_printf(dev,
2889                             "Critical Failure setting up receive buffers\n");
2890                         error = ENOMEM;
2891                         goto err_rx_desc;
2892                 }
2893         }
2894
2895         /*
2896         ** Finally set up the queue holding structs
2897         */
2898         for (int i = 0; i < adapter->num_queues; i++) {
2899                 que = &adapter->queues[i];
2900                 que->adapter = adapter;
2901                 que->txr = &adapter->tx_rings[i];
2902                 que->rxr = &adapter->rx_rings[i];
2903         }
2904
2905         return (0);
2906
2907 err_rx_desc:
2908         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2909                 ixgbe_dma_free(adapter, &rxr->rxdma);
2910 err_tx_desc:
2911         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2912                 ixgbe_dma_free(adapter, &txr->txdma);
2913         kfree(adapter->rx_rings, M_DEVBUF);
2914 rx_fail:
2915         kfree(adapter->tx_rings, M_DEVBUF);
2916 tx_fail:
2917         kfree(adapter->queues, M_DEVBUF);
2918 fail:
2919         return (error);
2920 }
2921
2922 /*********************************************************************
2923  *
2924  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2925  *  the information needed to transmit a packet on the wire. This is
2926  *  called only once at attach, setup is done every reset.
2927  *
2928  **********************************************************************/
2929 static int
2930 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2931 {
2932         struct adapter *adapter = txr->adapter;
2933         device_t dev = adapter->dev;
2934         struct ixgbe_tx_buf *txbuf;
2935         int error, i;
2936
2937         /*
2938          * Setup DMA descriptor areas.
2939          */
2940         if ((error = bus_dma_tag_create(
2941                                NULL,    /* parent */
2942                                1, 0,            /* alignment, bounds */
2943                                BUS_SPACE_MAXADDR,       /* lowaddr */
2944                                BUS_SPACE_MAXADDR,       /* highaddr */
2945                                NULL, NULL,              /* filter, filterarg */
2946                                IXGBE_TSO_SIZE,          /* maxsize */
2947                                adapter->num_segs,       /* nsegments */
2948                                PAGE_SIZE,               /* maxsegsize */
2949                                0,                       /* flags */
2950                                &txr->txtag))) {
2951                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2952                 goto fail;
2953         }
2954
2955         if (!(txr->tx_buffers =
2956             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2957             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2958                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2959                 error = ENOMEM;
2960                 goto fail;
2961         }
2962
2963         /* Create the descriptor buffer dma maps */
2964         txbuf = txr->tx_buffers;
2965         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2966                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2967                 if (error != 0) {
2968                         device_printf(dev, "Unable to create TX DMA map\n");
2969                         goto fail;
2970                 }
2971         }
2972
2973         return 0;
2974 fail:
2975         /* We free all, it handles case where we are in the middle */
2976         ixgbe_free_transmit_structures(adapter);
2977         return (error);
2978 }
2979
2980 /*********************************************************************
2981  *
2982  *  Initialize a transmit ring.
2983  *
2984  **********************************************************************/
2985 static void
2986 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2987 {
2988         struct adapter *adapter = txr->adapter;
2989         struct ixgbe_tx_buf *txbuf;
2990         int i;
2991 #ifdef DEV_NETMAP
2992         struct netmap_adapter *na = NA(adapter->ifp);
2993         struct netmap_slot *slot;
2994 #endif /* DEV_NETMAP */
2995
2996         /* Clear the old ring contents */
2997         IXGBE_TX_LOCK(txr);
2998 #ifdef DEV_NETMAP
2999         /*
3000          * (under lock): if in netmap mode, do some consistency
3001          * checks and set slot to entry 0 of the netmap ring.
3002          */
3003         slot = netmap_reset(na, NR_TX, txr->me, 0);
3004 #endif /* DEV_NETMAP */
3005         bzero((void *)txr->tx_base,
3006               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3007         /* Reset indices */
3008         txr->next_avail_desc = 0;
3009         txr->next_to_clean = 0;
3010
3011         /* Free any existing tx buffers. */
3012         txbuf = txr->tx_buffers;
3013         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3014                 if (txbuf->m_head != NULL) {
3015                         bus_dmamap_sync(txr->txtag, txbuf->map,
3016                             BUS_DMASYNC_POSTWRITE);
3017                         bus_dmamap_unload(txr->txtag, txbuf->map);
3018                         m_freem(txbuf->m_head);
3019                         txbuf->m_head = NULL;
3020                 }
3021 #ifdef DEV_NETMAP
3022                 /*
3023                  * In netmap mode, set the map for the packet buffer.
3024                  * NOTE: Some drivers (not this one) also need to set
3025                  * the physical buffer address in the NIC ring.
3026                  * Slots in the netmap ring (indexed by "si") are
3027                  * kring->nkr_hwofs positions "ahead" wrt the
3028                  * corresponding slot in the NIC ring. In some drivers
3029                  * (not here) nkr_hwofs can be negative. Function
3030                  * netmap_idx_n2k() handles wraparounds properly.
3031                  */
3032                 if (slot) {
3033                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3034                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3035                 }
3036 #endif /* DEV_NETMAP */
3037                 /* Clear the EOP index */
3038                 txbuf->eop_index = -1;
3039         }
3040
3041 #ifdef IXGBE_FDIR
3042         /* Set the rate at which we sample packets */
3043         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3044                 txr->atr_sample = atr_sample_rate;
3045 #endif
3046
3047         /* Set number of descriptors available */
3048         txr->tx_avail = adapter->num_tx_desc;
3049
3050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3051             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3052         IXGBE_TX_UNLOCK(txr);
3053 }
3054
3055 /*********************************************************************
3056  *
3057  *  Initialize all transmit rings.
3058  *
3059  **********************************************************************/
3060 static int
3061 ixgbe_setup_transmit_structures(struct adapter *adapter)
3062 {
3063         struct tx_ring *txr = adapter->tx_rings;
3064
3065         for (int i = 0; i < adapter->num_queues; i++, txr++)
3066                 ixgbe_setup_transmit_ring(txr);
3067
3068         return (0);
3069 }
3070
3071 /*********************************************************************
3072  *
3073  *  Enable transmit unit.
3074  *
3075  **********************************************************************/
3076 static void
3077 ixgbe_initialize_transmit_units(struct adapter *adapter)
3078 {
3079         struct tx_ring  *txr = adapter->tx_rings;
3080         struct ixgbe_hw *hw = &adapter->hw;
3081
3082         /* Setup the Base and Length of the Tx Descriptor Ring */
3083
3084         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3085                 u64     tdba = txr->txdma.dma_paddr;
3086                 u32     txctrl;
3087
3088                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3089                        (tdba & 0x00000000ffffffffULL));
3090                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3091                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3092                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3093
3094                 /* Setup the HW Tx Head and Tail descriptor pointers */
3095                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3096                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3097
3098                 /* Setup Transmit Descriptor Cmd Settings */
3099                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3100                 txr->queue_status = IXGBE_QUEUE_IDLE;
3101
3102                 /* Disable Head Writeback */
3103                 switch (hw->mac.type) {
3104                 case ixgbe_mac_82598EB:
3105                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3106                         break;
3107                 case ixgbe_mac_82599EB:
3108                 case ixgbe_mac_X540:
3109                 default:
3110                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3111                         break;
3112                 }
3113                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3114                 switch (hw->mac.type) {
3115                 case ixgbe_mac_82598EB:
3116                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3117                         break;
3118                 case ixgbe_mac_82599EB:
3119                 case ixgbe_mac_X540:
3120                 default:
3121                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3122                         break;
3123                 }
3124
3125         }
3126
3127         if (hw->mac.type != ixgbe_mac_82598EB) {
3128                 u32 dmatxctl, rttdcs;
3129                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3130                 dmatxctl |= IXGBE_DMATXCTL_TE;
3131                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3132                 /* Disable arbiter to set MTQC */
3133                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3134                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3135                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3136                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3137                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3138                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3139         }
3140
3141         return;
3142 }
3143
3144 /*********************************************************************
3145  *
3146  *  Free all transmit rings.
3147  *
3148  **********************************************************************/
3149 static void
3150 ixgbe_free_transmit_structures(struct adapter *adapter)
3151 {
3152         struct tx_ring *txr = adapter->tx_rings;
3153
3154         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3155                 IXGBE_TX_LOCK(txr);
3156                 ixgbe_free_transmit_buffers(txr);
3157                 ixgbe_dma_free(adapter, &txr->txdma);
3158                 IXGBE_TX_UNLOCK(txr);
3159                 IXGBE_TX_LOCK_DESTROY(txr);
3160         }
3161         kfree(adapter->tx_rings, M_DEVBUF);
3162 }
3163
3164 /*********************************************************************
3165  *
3166  *  Free transmit ring related data structures.
3167  *
3168  **********************************************************************/
3169 static void
3170 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3171 {
3172         struct adapter *adapter = txr->adapter;
3173         struct ixgbe_tx_buf *tx_buffer;
3174         int             i;
3175
3176         INIT_DEBUGOUT("free_transmit_ring: begin");
3177
3178         if (txr->tx_buffers == NULL)
3179                 return;
3180
3181         tx_buffer = txr->tx_buffers;
3182         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3183                 if (tx_buffer->m_head != NULL) {
3184                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3185                             BUS_DMASYNC_POSTWRITE);
3186                         bus_dmamap_unload(txr->txtag,
3187                             tx_buffer->map);
3188                         m_freem(tx_buffer->m_head);
3189                         tx_buffer->m_head = NULL;
3190                         if (tx_buffer->map != NULL) {
3191                                 bus_dmamap_destroy(txr->txtag,
3192                                     tx_buffer->map);
3193                                 tx_buffer->map = NULL;
3194                         }
3195                 } else if (tx_buffer->map != NULL) {
3196                         bus_dmamap_unload(txr->txtag,
3197                             tx_buffer->map);
3198                         bus_dmamap_destroy(txr->txtag,
3199                             tx_buffer->map);
3200                         tx_buffer->map = NULL;
3201                 }
3202         }
3203 #if 0 /* __FreeBSD_version >= 800000 */
3204         if (txr->br != NULL)
3205                 buf_ring_free(txr->br, M_DEVBUF);
3206 #endif
3207         if (txr->tx_buffers != NULL) {
3208                 kfree(txr->tx_buffers, M_DEVBUF);
3209                 txr->tx_buffers = NULL;
3210         }
3211         if (txr->txtag != NULL) {
3212                 bus_dma_tag_destroy(txr->txtag);
3213                 txr->txtag = NULL;
3214         }
3215         return;
3216 }
3217
3218 /*********************************************************************
3219  *
3220  *  Advanced Context Descriptor setup for VLAN or CSUM
3221  *
3222  **********************************************************************/
3223
3224 static bool
3225 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3226 {
3227         struct adapter *adapter = txr->adapter;
3228         struct ixgbe_adv_tx_context_desc *TXD;
3229         struct ixgbe_tx_buf        *tx_buffer;
3230         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3231         struct ether_vlan_header *eh;
3232         struct ip *ip;
3233         struct ip6_hdr *ip6;
3234         int  ehdrlen, ip_hlen = 0;
3235         u16     etype;
3236         u8      ipproto = 0;
3237         bool    offload = TRUE;
3238         int ctxd = txr->next_avail_desc;
3239         u16 vtag = 0;
3240
3241
3242         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3243                 offload = FALSE;
3244
3245         tx_buffer = &txr->tx_buffers[ctxd];
3246         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3247
3248         /*
3249         ** In advanced descriptors the vlan tag must 
3250         ** be placed into the descriptor itself.
3251         */
3252         if (mp->m_flags & M_VLANTAG) {
3253                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3254                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3255         } else if (offload == FALSE)
3256                 return FALSE;
3257
3258         /*
3259          * Determine where frame payload starts.
3260          * Jump over vlan headers if already present,
3261          * helpful for QinQ too.
3262          */
3263         eh = mtod(mp, struct ether_vlan_header *);
3264         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3265                 etype = ntohs(eh->evl_proto);
3266                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3267         } else {
3268                 etype = ntohs(eh->evl_encap_proto);
3269                 ehdrlen = ETHER_HDR_LEN;
3270         }
3271
3272         /* Set the ether header length */
3273         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3274
3275         switch (etype) {
3276                 case ETHERTYPE_IP:
3277                         ip = (struct ip *)(mp->m_data + ehdrlen);
3278                         ip_hlen = ip->ip_hl << 2;
3279                         ipproto = ip->ip_p;
3280                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3281                         break;
3282                 case ETHERTYPE_IPV6:
3283                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3284                         ip_hlen = sizeof(struct ip6_hdr);
3285                         /* XXX-BZ this will go badly in case of ext hdrs. */
3286                         ipproto = ip6->ip6_nxt;
3287                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3288                         break;
3289                 default:
3290                         offload = FALSE;
3291                         break;
3292         }
3293
3294         vlan_macip_lens |= ip_hlen;
3295         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3296
3297         switch (ipproto) {
3298                 case IPPROTO_TCP:
3299                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3300                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3301                         break;
3302
3303                 case IPPROTO_UDP:
3304                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3305                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3306                         break;
3307
3308 #if 0
3309                 case IPPROTO_SCTP:
3310                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3311                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3312                         break;
3313 #endif
3314                 default:
3315                         offload = FALSE;
3316                         break;
3317         }
3318
3319         /* Now copy bits into descriptor */
3320         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3321         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3322         TXD->seqnum_seed = htole32(0);
3323         TXD->mss_l4len_idx = htole32(0);
3324
3325         tx_buffer->m_head = NULL;
3326         tx_buffer->eop_index = -1;
3327
3328         /* We've consumed the first desc, adjust counters */
3329         if (++ctxd == adapter->num_tx_desc)
3330                 ctxd = 0;
3331         txr->next_avail_desc = ctxd;
3332         --txr->tx_avail;
3333
3334         return (offload);
3335 }
3336
3337 /**********************************************************************
3338  *
3339  *  Setup work for hardware segmentation offload (TSO) on
3340  *  adapters using advanced tx descriptors
3341  *
3342  **********************************************************************/
3343 static bool
3344 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3345     u32 *olinfo_status)
3346 {
3347         struct adapter *adapter = txr->adapter;
3348         struct ixgbe_adv_tx_context_desc *TXD;
3349         struct ixgbe_tx_buf        *tx_buffer;
3350         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3351         u16 vtag = 0, eh_type;
3352         u32 mss_l4len_idx = 0, len;
3353         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3354         struct ether_vlan_header *eh;
3355 #if 0 /* IPv6 TSO */
3356 #ifdef INET6
3357         struct ip6_hdr *ip6;
3358 #endif
3359 #endif
3360 #ifdef INET
3361         struct ip *ip;
3362 #endif
3363         struct tcphdr *th;
3364
3365
3366         /*
3367          * Determine where frame payload starts.
3368          * Jump over vlan headers if already present
3369          */
3370         eh = mtod(mp, struct ether_vlan_header *);
3371         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3372                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3373                 eh_type = eh->evl_proto;
3374         } else {
3375                 ehdrlen = ETHER_HDR_LEN;
3376                 eh_type = eh->evl_encap_proto;
3377         }
3378
3379         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3380         len = ehdrlen + sizeof(struct tcphdr);
3381         switch (ntohs(eh_type)) {
3382 #if 0 /* IPv6 TSO */
3383 #ifdef INET6
3384         case ETHERTYPE_IPV6:
3385                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3386                         return FALSE;
3387                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3388                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3389                 if (ip6->ip6_nxt != IPPROTO_TCP)
3390                         return FALSE;
3391                 ip_hlen = sizeof(struct ip6_hdr);
3392                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3393                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3394                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3395                 break;
3396 #endif
3397 #endif
3398 #ifdef INET
3399         case ETHERTYPE_IP:
3400                 if (mp->m_len < len + sizeof(struct ip))
3401                         return FALSE;
3402                 ip = (struct ip *)(mp->m_data + ehdrlen);
3403                 if (ip->ip_p != IPPROTO_TCP)
3404                         return FALSE;
3405                 ip->ip_sum = 0;
3406                 ip_hlen = ip->ip_hl << 2;
3407                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3408                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3409                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3410                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3411                 /* Tell transmit desc to also do IPv4 checksum. */
3412                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3413                 break;
3414 #endif
3415         default:
3416                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3417                     __func__, ntohs(eh_type));
3418                 break;
3419         }
3420
3421         ctxd = txr->next_avail_desc;
3422         tx_buffer = &txr->tx_buffers[ctxd];
3423         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3424
3425         tcp_hlen = th->th_off << 2;
3426
3427         /* This is used in the transmit desc in encap */
3428         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3429
3430         /* VLAN MACLEN IPLEN */
3431         if (mp->m_flags & M_VLANTAG) {
3432                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3433                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3434         }
3435
3436         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3437         vlan_macip_lens |= ip_hlen;
3438         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3439
3440         /* ADV DTYPE TUCMD */
3441         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3442         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3443         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3444
3445         /* MSS L4LEN IDX */
3446         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3447         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3448         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3449
3450         TXD->seqnum_seed = htole32(0);
3451         tx_buffer->m_head = NULL;
3452         tx_buffer->eop_index = -1;
3453
3454         if (++ctxd == adapter->num_tx_desc)
3455                 ctxd = 0;
3456
3457         txr->tx_avail--;
3458         txr->next_avail_desc = ctxd;
3459         return TRUE;
3460 }
3461
3462 #ifdef IXGBE_FDIR
3463 /*
3464 ** This routine parses packet headers so that Flow
3465 ** Director can make a hashed filter table entry 
3466 ** allowing traffic flows to be identified and kept
3467 ** on the same cpu.  This would be a performance
3468 ** hit, but we only do it at IXGBE_FDIR_RATE of
3469 ** packets.
3470 */
3471 static void
3472 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3473 {
3474         struct adapter                  *adapter = txr->adapter;
3475         struct ix_queue                 *que;
3476         struct ip                       *ip;
3477         struct tcphdr                   *th;
3478         struct udphdr                   *uh;
3479         struct ether_vlan_header        *eh;
3480         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3481         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3482         int                             ehdrlen, ip_hlen;
3483         u16                             etype;
3484
3485         eh = mtod(mp, struct ether_vlan_header *);
3486         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3487                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3488                 etype = eh->evl_proto;
3489         } else {
3490                 ehdrlen = ETHER_HDR_LEN;
3491                 etype = eh->evl_encap_proto;
3492         }
3493
3494         /* Only handling IPv4 */
3495         if (etype != htons(ETHERTYPE_IP))
3496                 return;
3497
3498         ip = (struct ip *)(mp->m_data + ehdrlen);
3499         ip_hlen = ip->ip_hl << 2;
3500
3501         /* check if we're UDP or TCP */
3502         switch (ip->ip_p) {
3503         case IPPROTO_TCP:
3504                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3505                 /* src and dst are inverted */
3506                 common.port.dst ^= th->th_sport;
3507                 common.port.src ^= th->th_dport;
3508                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3509                 break;
3510         case IPPROTO_UDP:
3511                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3512                 /* src and dst are inverted */
3513                 common.port.dst ^= uh->uh_sport;
3514                 common.port.src ^= uh->uh_dport;
3515                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3516                 break;
3517         default:
3518                 return;
3519         }
3520
3521         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3522         if (mp->m_pkthdr.ether_vtag)
3523                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3524         else
3525                 common.flex_bytes ^= etype;
3526         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3527
3528         que = &adapter->queues[txr->me];
3529         /*
3530         ** This assumes the Rx queue and Tx
3531         ** queue are bound to the same CPU
3532         */
3533         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3534             input, common, que->msix);
3535 }
3536 #endif /* IXGBE_FDIR */
3537
3538 /**********************************************************************
3539  *
3540  *  Examine each tx_buffer in the used queue. If the hardware is done
3541  *  processing the packet then free associated resources. The
3542  *  tx_buffer is put back on the free queue.
3543  *
3544  **********************************************************************/
3545 static bool
3546 ixgbe_txeof(struct tx_ring *txr)
3547 {
3548         struct adapter  *adapter = txr->adapter;
3549         struct ifnet    *ifp = adapter->ifp;
3550         u32     first, last, done, processed;
3551         struct ixgbe_tx_buf *tx_buffer;
3552         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3553
3554         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3555
3556 #ifdef DEV_NETMAP
3557         if (ifp->if_capenable & IFCAP_NETMAP) {
3558                 struct netmap_adapter *na = NA(ifp);
3559                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3560
3561                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3562
3563                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3564                     BUS_DMASYNC_POSTREAD);
3565                 /*
3566                  * In netmap mode, all the work is done in the context
3567                  * of the client thread. Interrupt handlers only wake up
3568                  * clients, which may be sleeping on individual rings
3569                  * or on a global resource for all rings.
3570                  * To implement tx interrupt mitigation, we wake up the client
3571                  * thread roughly every half ring, even if the NIC interrupts
3572                  * more frequently. This is implemented as follows:
3573                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3574                  *   the slot that should wake up the thread (nkr_num_slots
3575                  *   means the user thread should not be woken up);
3576                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3577                  *   or the slot has the DD bit set.
3578                  *
3579                  * When the driver has separate locks, we need to
3580                  * release and re-acquire txlock to avoid deadlocks.
3581                  * XXX see if we can find a better way.
3582                  */
3583                 if (!netmap_mitigate ||
3584                     (kring->nr_kflags < kring->nkr_num_slots &&
3585                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3586                         kring->nr_kflags = kring->nkr_num_slots;
3587                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3588                         IXGBE_TX_UNLOCK(txr);
3589                         IXGBE_CORE_LOCK(adapter);
3590                         selwakeuppri(&na->tx_si, PI_NET);
3591                         IXGBE_CORE_UNLOCK(adapter);
3592                         IXGBE_TX_LOCK(txr);
3593                 }
3594                 return FALSE;
3595         }
3596 #endif /* DEV_NETMAP */
3597
3598         if (txr->tx_avail == adapter->num_tx_desc) {
3599                 txr->queue_status = IXGBE_QUEUE_IDLE;
3600                 return FALSE;
3601         }
3602
3603         processed = 0;
3604         first = txr->next_to_clean;
3605         tx_buffer = &txr->tx_buffers[first];
3606         /* For cleanup we just use legacy struct */
3607         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3608         last = tx_buffer->eop_index;
3609         if (last == -1)
3610                 return FALSE;
3611         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3612
3613         /*
3614         ** Get the index of the first descriptor
3615         ** BEYOND the EOP and call that 'done'.
3616         ** I do this so the comparison in the
3617         ** inner while loop below can be simple
3618         */
3619         if (++last == adapter->num_tx_desc) last = 0;
3620         done = last;
3621
3622         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3623             BUS_DMASYNC_POSTREAD);
3624         /*
3625         ** Only the EOP descriptor of a packet now has the DD
3626         ** bit set, this is what we look for...
3627         */
3628         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3629                 /* We clean the range of the packet */
3630                 while (first != done) {
3631                         tx_desc->upper.data = 0;
3632                         tx_desc->lower.data = 0;
3633                         tx_desc->buffer_addr = 0;
3634                         ++txr->tx_avail;
3635                         ++processed;
3636
3637                         if (tx_buffer->m_head) {
3638                                 txr->bytes +=
3639                                     tx_buffer->m_head->m_pkthdr.len;
3640                                 bus_dmamap_sync(txr->txtag,
3641                                     tx_buffer->map,
3642                                     BUS_DMASYNC_POSTWRITE);
3643                                 bus_dmamap_unload(txr->txtag,
3644                                     tx_buffer->map);
3645                                 m_freem(tx_buffer->m_head);
3646                                 tx_buffer->m_head = NULL;
3647                                 tx_buffer->map = NULL;
3648                         }
3649                         tx_buffer->eop_index = -1;
3650                         txr->watchdog_time = ticks;
3651
3652                         if (++first == adapter->num_tx_desc)
3653                                 first = 0;
3654
3655                         tx_buffer = &txr->tx_buffers[first];
3656                         tx_desc =
3657                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3658                 }
3659                 ++txr->packets;
3660                 ++ifp->if_opackets;
3661                 /* See if there is more work now */
3662                 last = tx_buffer->eop_index;
3663                 if (last != -1) {
3664                         eop_desc =
3665                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3666                         /* Get next done point */
3667                         if (++last == adapter->num_tx_desc) last = 0;
3668                         done = last;
3669                 } else
3670                         break;
3671         }
3672         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3673             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3674
3675         txr->next_to_clean = first;
3676
3677         /*
3678         ** Watchdog calculation, we know there's
3679         ** work outstanding or the first return
3680         ** would have been taken, so none processed
3681         ** for too long indicates a hang.
3682         */
3683         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3684                 txr->queue_status = IXGBE_QUEUE_HUNG;
3685
3686         /* With a minimum free clear the depleted state bit.  */
3687         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3688                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3689
3690         if (txr->tx_avail == adapter->num_tx_desc) {
3691                 txr->queue_status = IXGBE_QUEUE_IDLE;
3692                 return (FALSE);
3693         }
3694
3695         return TRUE;
3696 }
3697
3698 /*********************************************************************
3699  *
3700  *  Refresh mbuf buffers for RX descriptor rings
3701  *   - now keeps its own state so discards due to resource
3702  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3703  *     it just returns, keeping its placeholder, thus it can simply
3704  *     be recalled to try again.
3705  *
3706  **********************************************************************/
3707 static void
3708 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3709 {
3710         struct adapter          *adapter = rxr->adapter;
3711         bus_dma_segment_t       hseg[1];
3712         bus_dma_segment_t       pseg[1];
3713         struct ixgbe_rx_buf     *rxbuf;
3714         struct mbuf             *mh, *mp;
3715         int                     i, j, nsegs, error;
3716         bool                    refreshed = FALSE;
3717
3718         i = j = rxr->next_to_refresh;
3719         /* Control the loop with one beyond */
3720         if (++j == adapter->num_rx_desc)
3721                 j = 0;
3722
3723         while (j != limit) {
3724                 rxbuf = &rxr->rx_buffers[i];
3725                 if (rxr->hdr_split == FALSE)
3726                         goto no_split;
3727
3728                 if (rxbuf->m_head == NULL) {
3729                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3730                         if (mh == NULL)
3731                                 goto update;
3732                 } else
3733                         mh = rxbuf->m_head;
3734
3735                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3736                 mh->m_len = MHLEN;
3737                 mh->m_flags |= M_PKTHDR;
3738                 /* Get the memory mapping */
3739                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3740                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3741                 if (error != 0) {
3742                         kprintf("Refresh mbufs: hdr dmamap load"
3743                             " failure - %d\n", error);
3744                         m_free(mh);
3745                         rxbuf->m_head = NULL;
3746                         goto update;
3747                 }
3748                 rxbuf->m_head = mh;
3749                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3750                     BUS_DMASYNC_PREREAD);
3751                 rxr->rx_base[i].read.hdr_addr =
3752                     htole64(hseg[0].ds_addr);
3753
3754 no_split:
3755                 if (rxbuf->m_pack == NULL) {
3756                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3757                             M_PKTHDR, adapter->rx_mbuf_sz);
3758                         if (mp == NULL)
3759                                 goto update;
3760                 } else
3761                         mp = rxbuf->m_pack;
3762
3763                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3764                 /* Get the memory mapping */
3765                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3766                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3767                 if (error != 0) {
3768                         kprintf("Refresh mbufs: payload dmamap load"
3769                             " failure - %d\n", error);
3770                         m_free(mp);
3771                         rxbuf->m_pack = NULL;
3772                         goto update;
3773                 }
3774                 rxbuf->m_pack = mp;
3775                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3776                     BUS_DMASYNC_PREREAD);
3777                 rxr->rx_base[i].read.pkt_addr =
3778                     htole64(pseg[0].ds_addr);
3779
3780                 refreshed = TRUE;
3781                 /* Next is precalculated */
3782                 i = j;
3783                 rxr->next_to_refresh = i;
3784                 if (++j == adapter->num_rx_desc)
3785                         j = 0;
3786         }
3787 update:
3788         if (refreshed) /* Update hardware tail index */
3789                 IXGBE_WRITE_REG(&adapter->hw,
3790                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3791         return;
3792 }
3793
3794 /*********************************************************************
3795  *
3796  *  Allocate memory for rx_buffer structures. Since we use one
3797  *  rx_buffer per received packet, the maximum number of rx_buffer's
3798  *  that we'll need is equal to the number of receive descriptors
3799  *  that we've allocated.
3800  *
3801  **********************************************************************/
3802 static int
3803 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3804 {
3805         struct  adapter         *adapter = rxr->adapter;
3806         device_t                dev = adapter->dev;
3807         struct ixgbe_rx_buf     *rxbuf;
3808         int                     i, bsize, error;
3809
3810         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3811         if (!(rxr->rx_buffers =
3812             (struct ixgbe_rx_buf *) kmalloc(bsize,
3813             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3814                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3815                 error = ENOMEM;
3816                 goto fail;
3817         }
3818
3819         if ((error = bus_dma_tag_create(NULL,   /* parent */
3820                                    1, 0,        /* alignment, bounds */
3821                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3822                                    BUS_SPACE_MAXADDR,   /* highaddr */
3823                                    NULL, NULL,          /* filter, filterarg */
3824                                    MSIZE,               /* maxsize */
3825                                    1,                   /* nsegments */
3826                                    MSIZE,               /* maxsegsize */
3827                                    0,                   /* flags */
3828                                    &rxr->htag))) {
3829                 device_printf(dev, "Unable to create RX DMA tag\n");
3830                 goto fail;
3831         }
3832
3833         if ((error = bus_dma_tag_create(NULL,   /* parent */
3834                                    1, 0,        /* alignment, bounds */
3835                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3836                                    BUS_SPACE_MAXADDR,   /* highaddr */
3837                                    NULL, NULL,          /* filter, filterarg */
3838                                    MJUM16BYTES,         /* maxsize */
3839                                    1,                   /* nsegments */
3840                                    MJUM16BYTES,         /* maxsegsize */
3841                                    0,                   /* flags */
3842                                    &rxr->ptag))) {
3843                 device_printf(dev, "Unable to create RX DMA tag\n");
3844                 goto fail;
3845         }
3846
3847         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3848                 rxbuf = &rxr->rx_buffers[i];
3849                 error = bus_dmamap_create(rxr->htag,
3850                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3851                 if (error) {
3852                         device_printf(dev, "Unable to create RX head map\n");
3853                         goto fail;
3854                 }
3855                 error = bus_dmamap_create(rxr->ptag,
3856                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3857                 if (error) {
3858                         device_printf(dev, "Unable to create RX pkt map\n");
3859                         goto fail;
3860                 }
3861         }
3862
3863         return (0);
3864
3865 fail:
3866         /* Frees all, but can handle partial completion */
3867         ixgbe_free_receive_structures(adapter);
3868         return (error);
3869 }
3870
3871 /*
3872 ** Used to detect a descriptor that has
3873 ** been merged by Hardware RSC.
3874 */
3875 static inline u32
3876 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3877 {
3878         return (le32toh(rx->wb.lower.lo_dword.data) &
3879             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3880 }
3881
3882 /*********************************************************************
3883  *
3884  *  Initialize Hardware RSC (LRO) feature on 82599
3885  *  for an RX ring, this is toggled by the LRO capability
3886  *  even though it is transparent to the stack.
3887  *
3888  **********************************************************************/
3889 #if 0   /* NET_LRO */
3890 static void
3891 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3892 {
3893         struct  adapter         *adapter = rxr->adapter;
3894         struct  ixgbe_hw        *hw = &adapter->hw;
3895         u32                     rscctrl, rdrxctl;
3896
3897         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3898         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3899 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3900         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3901 #endif /* DEV_NETMAP */
3902         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3903         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3904         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3905
3906         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3907         rscctrl |= IXGBE_RSCCTL_RSCEN;
3908         /*
3909         ** Limit the total number of descriptors that
3910         ** can be combined, so it does not exceed 64K
3911         */
3912         if (adapter->rx_mbuf_sz == MCLBYTES)
3913                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3914         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3915                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3916         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3917                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3918         else  /* Using 16K cluster */
3919                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3920
3921         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3922
3923         /* Enable TCP header recognition */
3924         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3925             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3926             IXGBE_PSRTYPE_TCPHDR));
3927
3928         /* Disable RSC for ACK packets */
3929         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3930             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3931
3932         rxr->hw_rsc = TRUE;
3933 }
3934 #endif
3935
3936 static void     
3937 ixgbe_free_receive_ring(struct rx_ring *rxr)
3938
3939         struct  adapter         *adapter;
3940         struct ixgbe_rx_buf       *rxbuf;
3941         int i;
3942
3943         adapter = rxr->adapter;
3944         for (i = 0; i < adapter->num_rx_desc; i++) {
3945                 rxbuf = &rxr->rx_buffers[i];
3946                 if (rxbuf->m_head != NULL) {
3947                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3948                             BUS_DMASYNC_POSTREAD);
3949                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3950                         rxbuf->m_head->m_flags |= M_PKTHDR;
3951                         m_freem(rxbuf->m_head);
3952                 }
3953                 if (rxbuf->m_pack != NULL) {
3954                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3955                             BUS_DMASYNC_POSTREAD);
3956                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3957                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3958                         m_freem(rxbuf->m_pack);
3959                 }
3960                 rxbuf->m_head = NULL;
3961                 rxbuf->m_pack = NULL;
3962         }
3963 }
3964
3965
3966 /*********************************************************************
3967  *
3968  *  Initialize a receive ring and its buffers.
3969  *
3970  **********************************************************************/
3971 static int
3972 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3973 {
3974         struct  adapter         *adapter;
3975         struct ifnet            *ifp;
3976         device_t                dev;
3977         struct ixgbe_rx_buf     *rxbuf;
3978         bus_dma_segment_t       pseg[1], hseg[1];
3979 #if 0   /* NET_LRO */
3980         struct lro_ctrl         *lro = &rxr->lro;
3981 #endif
3982         int                     rsize, nsegs, error = 0;
3983 #ifdef DEV_NETMAP
3984         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3985         struct netmap_slot *slot;
3986 #endif /* DEV_NETMAP */
3987
3988         adapter = rxr->adapter;
3989         ifp = adapter->ifp;
3990         dev = adapter->dev;
3991
3992         /* Clear the ring contents */
3993         IXGBE_RX_LOCK(rxr);
3994 #ifdef DEV_NETMAP
3995         /* same as in ixgbe_setup_transmit_ring() */
3996         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3997 #endif /* DEV_NETMAP */
3998         rsize = roundup2(adapter->num_rx_desc *
3999             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4000         bzero((void *)rxr->rx_base, rsize);
4001
4002         /* Free current RX buffer structs and their mbufs */
4003         ixgbe_free_receive_ring(rxr);
4004
4005         /* Configure header split? */
4006         if (ixgbe_header_split)
4007                 rxr->hdr_split = TRUE;
4008
4009         /* Now replenish the mbufs */
4010         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4011                 struct mbuf     *mh, *mp;
4012
4013                 rxbuf = &rxr->rx_buffers[j];
4014 #ifdef DEV_NETMAP
4015                 /*
4016                  * In netmap mode, fill the map and set the buffer
4017                  * address in the NIC ring, considering the offset
4018                  * between the netmap and NIC rings (see comment in
4019                  * ixgbe_setup_transmit_ring() ). No need to allocate
4020                  * an mbuf, so end the block with a continue;
4021                  */
4022                 if (slot) {
4023                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4024                         uint64_t paddr;
4025                         void *addr;
4026
4027                         addr = PNMB(slot + sj, &paddr);
4028                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4029                         /* Update descriptor */
4030                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4031                         continue;
4032                 }
4033 #endif /* DEV_NETMAP */
4034                 /*
4035                 ** Don't allocate mbufs if not
4036                 ** doing header split, its wasteful
4037                 */ 
4038                 if (rxr->hdr_split == FALSE)
4039                         goto skip_head;
4040
4041                 /* First the header */
4042                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4043                 if (rxbuf->m_head == NULL) {
4044                         error = ENOBUFS;
4045                         goto fail;
4046                 }
4047                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4048                 mh = rxbuf->m_head;
4049                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4050                 mh->m_flags |= M_PKTHDR;
4051                 /* Get the memory mapping */
4052                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
4053                     rxbuf->hmap, rxbuf->m_head, hseg, 1,
4054                     &nsegs, BUS_DMA_NOWAIT);
4055
4056                 if (error != 0) /* Nothing elegant to do here */
4057                         goto fail;
4058                 bus_dmamap_sync(rxr->htag,
4059                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4060                 /* Update descriptor */
4061                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4062
4063 skip_head:
4064                 /* Now the payload cluster */
4065                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4066                     M_PKTHDR, adapter->rx_mbuf_sz);
4067                 if (rxbuf->m_pack == NULL) {
4068                         error = ENOBUFS;
4069                         goto fail;
4070                 }
4071                 mp = rxbuf->m_pack;
4072                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4073                 /* Get the memory mapping */
4074                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
4075                     rxbuf->pmap, mp, hseg, 1,
4076                     &nsegs, BUS_DMA_NOWAIT);
4077                 if (error != 0)
4078                         goto fail;
4079                 bus_dmamap_sync(rxr->ptag,
4080                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4081                 /* Update descriptor */
4082                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4083         }
4084
4085
4086         /* Setup our descriptor indices */
4087         rxr->next_to_check = 0;
4088         rxr->next_to_refresh = 0;
4089         rxr->lro_enabled = FALSE;
4090         rxr->rx_split_packets = 0;
4091         rxr->rx_bytes = 0;
4092         rxr->discard = FALSE;
4093         rxr->vtag_strip = FALSE;
4094
4095         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4096             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4097
4098         /*
4099         ** Now set up the LRO interface:
4100         ** 82598 uses software LRO, the
4101         ** 82599 and X540 use a hardware assist.
4102         */
4103 #if 0 /* NET_LRO */
4104         if ((adapter->hw.mac.type != ixgbe_mac_82598EB) &&
4105             (ifp->if_capenable & IFCAP_RXCSUM) &&
4106             (ifp->if_capenable & IFCAP_LRO))
4107                 ixgbe_setup_hw_rsc(rxr);
4108         else if (ifp->if_capenable & IFCAP_LRO) {
4109                 int err = tcp_lro_init(lro);
4110                 if (err) {
4111                         device_printf(dev, "LRO Initialization failed!\n");
4112                         goto fail;
4113                 }
4114                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4115                 rxr->lro_enabled = TRUE;
4116                 lro->ifp = adapter->ifp;
4117         }
4118 #endif
4119
4120         IXGBE_RX_UNLOCK(rxr);
4121         return (0);
4122
4123 fail:
4124         ixgbe_free_receive_ring(rxr);
4125         IXGBE_RX_UNLOCK(rxr);
4126         return (error);
4127 }
4128
4129 /*********************************************************************
4130  *
4131  *  Initialize all receive rings.
4132  *
4133  **********************************************************************/
4134 static int
4135 ixgbe_setup_receive_structures(struct adapter *adapter)
4136 {
4137         struct rx_ring *rxr = adapter->rx_rings;
4138         int j;
4139
4140         for (j = 0; j < adapter->num_queues; j++, rxr++)
4141                 if (ixgbe_setup_receive_ring(rxr))
4142                         goto fail;
4143
4144         return (0);
4145 fail:
4146         /*
4147          * Free RX buffers allocated so far, we will only handle
4148          * the rings that completed, the failing case will have
4149          * cleaned up for itself. 'j' failed, so its the terminus.
4150          */
4151         for (int i = 0; i < j; ++i) {
4152                 rxr = &adapter->rx_rings[i];
4153                 ixgbe_free_receive_ring(rxr);
4154         }
4155
4156         return (ENOBUFS);
4157 }
4158
4159 /*********************************************************************
4160  *
4161  *  Setup receive registers and features.
4162  *
4163  **********************************************************************/
4164 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4165
4166 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4167         
4168 static void
4169 ixgbe_initialize_receive_units(struct adapter *adapter)
4170 {
4171         struct  rx_ring *rxr = adapter->rx_rings;
4172         struct ixgbe_hw *hw = &adapter->hw;
4173         struct ifnet   *ifp = adapter->ifp;
4174         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4175         u32             reta, mrqc = 0, hlreg, random[10];
4176
4177
4178         /*
4179          * Make sure receives are disabled while
4180          * setting up the descriptor ring
4181          */
4182         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4183         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4184             rxctrl & ~IXGBE_RXCTRL_RXEN);
4185
4186         /* Enable broadcasts */
4187         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4188         fctrl |= IXGBE_FCTRL_BAM;
4189         fctrl |= IXGBE_FCTRL_DPF;
4190         fctrl |= IXGBE_FCTRL_PMCF;
4191         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4192
4193         /* Set for Jumbo Frames? */
4194         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4195         if (ifp->if_mtu > ETHERMTU)
4196                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4197         else
4198                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4199 #ifdef DEV_NETMAP
4200         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4201         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4202                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4203         else
4204                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4205 #endif /* DEV_NETMAP */
4206         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4207
4208         bufsz = (adapter->rx_mbuf_sz +
4209             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4210
4211         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4212                 u64 rdba = rxr->rxdma.dma_paddr;
4213
4214                 /* Setup the Base and Length of the Rx Descriptor Ring */
4215                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4216                                (rdba & 0x00000000ffffffffULL));
4217                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4218                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4219                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4220
4221                 /* Set up the SRRCTL register */
4222                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4223                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4224                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4225                 srrctl |= bufsz;
4226                 if (rxr->hdr_split) {
4227                         /* Use a standard mbuf for the header */
4228                         srrctl |= ((IXGBE_RX_HDR <<
4229                             IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT)
4230                             & IXGBE_SRRCTL_BSIZEHDR_MASK);
4231                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4232                 } else
4233                         srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4234                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4235
4236                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4237                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4238                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4239         }
4240
4241         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4242                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4243                               IXGBE_PSRTYPE_UDPHDR |
4244                               IXGBE_PSRTYPE_IPV4HDR |
4245                               IXGBE_PSRTYPE_IPV6HDR;
4246                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4247         }
4248
4249         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4250
4251         /* Setup RSS */
4252         if (adapter->num_queues > 1) {
4253                 int i, j;
4254                 reta = 0;
4255
4256                 /* set up random bits */
4257                 karc4rand(&random, sizeof(random));
4258
4259                 /* Set up the redirection table */
4260                 for (i = 0, j = 0; i < 128; i++, j++) {
4261                         if (j == adapter->num_queues) j = 0;
4262                         reta = (reta << 8) | (j * 0x11);
4263                         if ((i & 3) == 3)
4264                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4265                 }
4266
4267                 /* Now fill our hash function seeds */
4268                 for (int i = 0; i < 10; i++)
4269                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4270
4271                 /* Perform hash on these packet types */
4272                 mrqc = IXGBE_MRQC_RSSEN
4273                      | IXGBE_MRQC_RSS_FIELD_IPV4
4274                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4275                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4276                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4277                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4278                      | IXGBE_MRQC_RSS_FIELD_IPV6
4279                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4280                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4281                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4282                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4283
4284                 /* RSS and RX IPP Checksum are mutually exclusive */
4285                 rxcsum |= IXGBE_RXCSUM_PCSD;
4286         }
4287
4288         if (ifp->if_capenable & IFCAP_RXCSUM)
4289                 rxcsum |= IXGBE_RXCSUM_PCSD;
4290
4291         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4292                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4293
4294         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4295
4296         return;
4297 }
4298
4299 /*********************************************************************
4300  *
4301  *  Free all receive rings.
4302  *
4303  **********************************************************************/
4304 static void
4305 ixgbe_free_receive_structures(struct adapter *adapter)
4306 {
4307         struct rx_ring *rxr = adapter->rx_rings;
4308
4309         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4310 #if 0   /* NET_LRO */
4311                 struct lro_ctrl         *lro = &rxr->lro;
4312 #endif
4313                 ixgbe_free_receive_buffers(rxr);
4314                 /* Free LRO memory */
4315 #if 0   /* NET_LRO */
4316                 tcp_lro_free(lro);
4317 #endif
4318                 /* Free the ring memory as well */
4319                 ixgbe_dma_free(adapter, &rxr->rxdma);
4320         }
4321
4322         kfree(adapter->rx_rings, M_DEVBUF);
4323 }
4324
4325
4326 /*********************************************************************
4327  *
4328  *  Free receive ring data structures
4329  *
4330  **********************************************************************/
4331 static void
4332 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4333 {
4334         struct adapter          *adapter = rxr->adapter;
4335         struct ixgbe_rx_buf     *rxbuf;
4336
4337         INIT_DEBUGOUT("free_receive_structures: begin");
4338
4339         /* Cleanup any existing buffers */
4340         if (rxr->rx_buffers != NULL) {
4341                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4342                         rxbuf = &rxr->rx_buffers[i];
4343                         if (rxbuf->m_head != NULL) {
4344                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4345                                     BUS_DMASYNC_POSTREAD);
4346                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4347                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4348                                 m_freem(rxbuf->m_head);
4349                         }
4350                         if (rxbuf->m_pack != NULL) {
4351                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4352                                     BUS_DMASYNC_POSTREAD);
4353                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4354                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4355                                 m_freem(rxbuf->m_pack);
4356                         }
4357                         rxbuf->m_head = NULL;
4358                         rxbuf->m_pack = NULL;
4359                         if (rxbuf->hmap != NULL) {
4360                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4361                                 rxbuf->hmap = NULL;
4362                         }
4363                         if (rxbuf->pmap != NULL) {
4364                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4365                                 rxbuf->pmap = NULL;
4366                         }
4367                 }
4368                 if (rxr->rx_buffers != NULL) {
4369                         kfree(rxr->rx_buffers, M_DEVBUF);
4370                         rxr->rx_buffers = NULL;
4371                 }
4372         }
4373
4374         if (rxr->htag != NULL) {
4375                 bus_dma_tag_destroy(rxr->htag);
4376                 rxr->htag = NULL;
4377         }
4378         if (rxr->ptag != NULL) {
4379                 bus_dma_tag_destroy(rxr->ptag);
4380                 rxr->ptag = NULL;
4381         }
4382
4383         return;
4384 }
4385
4386 static __inline void
4387 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4388 {
4389                  
4390         /*
4391          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4392          * should be computed by hardware. Also it should not have VLAN tag in
4393          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4394          */
4395 #if 0   /* NET_LRO */
4396         if (rxr->lro_enabled &&
4397             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4398             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4399             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4400             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4401             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4402             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4403             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4404             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4405                 /*
4406                  * Send to the stack if:
4407                  **  - LRO not enabled, or
4408                  **  - no LRO resources, or
4409                  **  - lro enqueue fails
4410                  */
4411                 if (rxr->lro.lro_cnt != 0)
4412                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4413                                 return;
4414         }
4415 #endif
4416         IXGBE_RX_UNLOCK(rxr);
4417         (*ifp->if_input)(ifp, m);
4418         IXGBE_RX_LOCK(rxr);
4419 }
4420
4421 static __inline void
4422 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4423 {
4424         struct ixgbe_rx_buf     *rbuf;
4425
4426         rbuf = &rxr->rx_buffers[i];
4427
4428         if (rbuf->fmp != NULL) {/* Partial chain ? */
4429                 rbuf->fmp->m_flags |= M_PKTHDR;
4430                 m_freem(rbuf->fmp);
4431                 rbuf->fmp = NULL;
4432         }
4433
4434         /*
4435         ** With advanced descriptors the writeback
4436         ** clobbers the buffer addrs, so its easier
4437         ** to just free the existing mbufs and take
4438         ** the normal refresh path to get new buffers
4439         ** and mapping.
4440         */
4441         if (rbuf->m_head) {
4442                 m_free(rbuf->m_head);
4443                 rbuf->m_head = NULL;
4444         }
4445  
4446         if (rbuf->m_pack) {
4447                 m_free(rbuf->m_pack);
4448                 rbuf->m_pack = NULL;
4449         }
4450
4451         return;
4452 }
4453
4454
4455 /*********************************************************************
4456  *
4457  *  This routine executes in interrupt context. It replenishes
4458  *  the mbufs in the descriptor and sends data which has been
4459  *  dma'ed into host memory to upper layer.
4460  *
4461  *  We loop at most count times if count is > 0, or until done if
4462  *  count < 0.
4463  *
4464  *  Return TRUE for more work, FALSE for all clean.
4465  *********************************************************************/
4466 static bool
4467 ixgbe_rxeof(struct ix_queue *que, int count)
4468 {
4469         struct adapter          *adapter = que->adapter;
4470         struct rx_ring          *rxr = que->rxr;
4471         struct ifnet            *ifp = adapter->ifp;
4472 #if 0   /* NET_LRO */
4473         struct lro_ctrl         *lro = &rxr->lro;
4474         struct lro_entry        *queued;
4475 #endif
4476         int                     i, nextp, processed = 0;
4477         u32                     staterr = 0;
4478         union ixgbe_adv_rx_desc *cur;
4479         struct ixgbe_rx_buf     *rbuf, *nbuf;
4480
4481         IXGBE_RX_LOCK(rxr);
4482
4483 #ifdef DEV_NETMAP
4484         if (ifp->if_capenable & IFCAP_NETMAP) {
4485                 /*
4486                  * Same as the txeof routine: only wakeup clients on intr.
4487                  * NKR_PENDINTR in nr_kflags is used to implement interrupt
4488                  * mitigation (ixgbe_rxsync() will not look for new packets
4489                  * unless NKR_PENDINTR is set).
4490                  */
4491                 struct netmap_adapter *na = NA(ifp);
4492
4493                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4494                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4495                 IXGBE_RX_UNLOCK(rxr);
4496                 IXGBE_CORE_LOCK(adapter);
4497                 selwakeuppri(&na->rx_si, PI_NET);
4498                 IXGBE_CORE_UNLOCK(adapter);
4499                 return (FALSE);
4500         }
4501 #endif /* DEV_NETMAP */
4502         for (i = rxr->next_to_check; count != 0;) {
4503                 struct mbuf     *sendmp, *mh, *mp;
4504                 u32             rsc, ptype;
4505                 u16             hlen, plen, hdr;
4506                 u16             vtag = 0;
4507                 bool            eop;
4508  
4509                 /* Sync the ring. */
4510                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4511                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4512
4513                 cur = &rxr->rx_base[i];
4514                 staterr = le32toh(cur->wb.upper.status_error);
4515
4516                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4517                         break;
4518                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4519                         break;
4520
4521                 count--;
4522                 sendmp = NULL;
4523                 nbuf = NULL;
4524                 rsc = 0;
4525                 cur->wb.upper.status_error = 0;
4526                 rbuf = &rxr->rx_buffers[i];
4527                 mh = rbuf->m_head;
4528                 mp = rbuf->m_pack;
4529
4530                 plen = le16toh(cur->wb.upper.length);
4531                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4532                     IXGBE_RXDADV_PKTTYPE_MASK;
4533                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4534                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4535
4536                 /* Process vlan info */
4537                 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
4538                         vtag = le16toh(cur->wb.upper.vlan);
4539
4540                 /* Make sure bad packets are discarded */
4541                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4542                     (rxr->discard)) {
4543                         ifp->if_ierrors++;
4544                         rxr->rx_discarded++;
4545                         if (eop)
4546                                 rxr->discard = FALSE;
4547                         else
4548                                 rxr->discard = TRUE;
4549                         ixgbe_rx_discard(rxr, i);
4550                         goto next_desc;
4551                 }
4552
4553                 /*
4554                 ** On 82599 which supports a hardware
4555                 ** LRO (called HW RSC), packets need
4556                 ** not be fragmented across sequential
4557                 ** descriptors, rather the next descriptor
4558                 ** is indicated in bits of the descriptor.
4559                 ** This also means that we might proceses
4560                 ** more than one packet at a time, something
4561                 ** that has never been true before, it
4562                 ** required eliminating global chain pointers
4563                 ** in favor of what we are doing here.  -jfv
4564                 */
4565                 if (!eop) {
4566                         /*
4567                         ** Figure out the next descriptor
4568                         ** of this frame.
4569                         */
4570                         if (rxr->hw_rsc == TRUE) {
4571                                 rsc = ixgbe_rsc_count(cur);
4572                                 rxr->rsc_num += (rsc - 1);
4573                         }
4574                         if (rsc) { /* Get hardware index */
4575                                 nextp = ((staterr &
4576                                     IXGBE_RXDADV_NEXTP_MASK) >>
4577                                     IXGBE_RXDADV_NEXTP_SHIFT);
4578                         } else { /* Just sequential */
4579                                 nextp = i + 1;
4580                                 if (nextp == adapter->num_rx_desc)
4581                                         nextp = 0;
4582                         }
4583                         nbuf = &rxr->rx_buffers[nextp];
4584                         prefetch(nbuf);
4585                 }
4586                 /*
4587                 ** The header mbuf is ONLY used when header 
4588                 ** split is enabled, otherwise we get normal 
4589                 ** behavior, ie, both header and payload
4590                 ** are DMA'd into the payload buffer.
4591                 **
4592                 ** Rather than using the fmp/lmp global pointers
4593                 ** we now keep the head of a packet chain in the
4594                 ** buffer struct and pass this along from one
4595                 ** descriptor to the next, until we get EOP.
4596                 */
4597                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
4598                         /* This must be an initial descriptor */
4599                         hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
4600                             IXGBE_RXDADV_HDRBUFLEN_SHIFT;
4601                         if (hlen > IXGBE_RX_HDR)
4602                                 hlen = IXGBE_RX_HDR;
4603                         mh->m_len = hlen;
4604                         mh->m_flags |= M_PKTHDR;
4605                         mh->m_next = NULL;
4606                         mh->m_pkthdr.len = mh->m_len;
4607                         /* Null buf pointer so it is refreshed */
4608                         rbuf->m_head = NULL;
4609                         /*
4610                         ** Check the payload length, this
4611                         ** could be zero if its a small
4612                         ** packet.
4613                         */
4614                         if (plen > 0) {
4615                                 mp->m_len = plen;
4616                                 mp->m_next = NULL;
4617                                 mp->m_flags &= ~M_PKTHDR;
4618                                 mh->m_next = mp;
4619                                 mh->m_pkthdr.len += mp->m_len;
4620                                 /* Null buf pointer so it is refreshed */
4621                                 rbuf->m_pack = NULL;
4622                                 rxr->rx_split_packets++;
4623                         }
4624                         /*
4625                         ** Now create the forward
4626                         ** chain so when complete 
4627                         ** we wont have to.
4628                         */
4629                         if (eop == 0) {
4630                                 /* stash the chain head */
4631                                 nbuf->fmp = mh;
4632                                 /* Make forward chain */
4633                                 if (plen)
4634                                         mp->m_next = nbuf->m_pack;
4635                                 else
4636                                         mh->m_next = nbuf->m_pack;
4637                         } else {
4638                                 /* Singlet, prepare to send */
4639                                 sendmp = mh;
4640                                 /* If hardware handled vtag */
4641                                 if (vtag) {
4642                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4643                                         sendmp->m_flags |= M_VLANTAG;
4644                                 }
4645                         }
4646                 } else {
4647                         /*
4648                         ** Either no header split, or a
4649                         ** secondary piece of a fragmented
4650                         ** split packet.
4651                         */
4652                         mp->m_len = plen;
4653                         /*
4654                         ** See if there is a stored head
4655                         ** that determines what we are
4656                         */
4657                         sendmp = rbuf->fmp;
4658                         rbuf->m_pack = rbuf->fmp = NULL;
4659
4660                         if (sendmp != NULL) {  /* secondary frag */
4661                                 mp->m_flags &= ~M_PKTHDR;
4662                                 sendmp->m_pkthdr.len += mp->m_len;
4663                         } else {
4664                                 /* first desc of a non-ps chain */
4665                                 sendmp = mp;
4666                                 sendmp->m_flags |= M_PKTHDR;
4667                                 sendmp->m_pkthdr.len = mp->m_len;
4668                                 if (staterr & IXGBE_RXD_STAT_VP) {
4669                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4670                                         sendmp->m_flags |= M_VLANTAG;
4671                                 }
4672                         }
4673                         /* Pass the head pointer on */
4674                         if (eop == 0) {
4675                                 nbuf->fmp = sendmp;
4676                                 sendmp = NULL;
4677                                 mp->m_next = nbuf->m_pack;
4678                         }
4679                 }
4680                 ++processed;
4681                 /* Sending this frame? */
4682                 if (eop) {
4683                         sendmp->m_pkthdr.rcvif = ifp;
4684                         ifp->if_ipackets++;
4685                         rxr->rx_packets++;
4686                         /* capture data for AIM */
4687                         rxr->bytes += sendmp->m_pkthdr.len;
4688                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4689                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4690                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4691 #if 0 /* __FreeBSD_version >= 800000 */
4692                         sendmp->m_pkthdr.flowid = que->msix;
4693                         sendmp->m_flags |= M_FLOWID;
4694 #endif
4695                 }
4696 next_desc:
4697                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4698                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4699
4700                 /* Advance our pointers to the next descriptor. */
4701                 if (++i == adapter->num_rx_desc)
4702                         i = 0;
4703
4704                 /* Now send to the stack or do LRO */
4705                 if (sendmp != NULL) {
4706                         rxr->next_to_check = i;
4707                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4708                         i = rxr->next_to_check;
4709                 }
4710
4711                /* Every 8 descriptors we go to refresh mbufs */
4712                 if (processed == 8) {
4713                         ixgbe_refresh_mbufs(rxr, i);
4714                         processed = 0;
4715                 }
4716         }
4717
4718         /* Refresh any remaining buf structs */
4719         if (ixgbe_rx_unrefreshed(rxr))
4720                 ixgbe_refresh_mbufs(rxr, i);
4721
4722         rxr->next_to_check = i;
4723
4724         /*
4725          * Flush any outstanding LRO work
4726          */
4727 #if 0   /* NET_LRO */
4728         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4729                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4730                 tcp_lro_flush(lro, queued);
4731         }
4732 #endif
4733
4734         IXGBE_RX_UNLOCK(rxr);
4735
4736         /*
4737         ** We still have cleaning to do?
4738         ** Schedule another interrupt if so.
4739         */
4740         if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4741                 ixgbe_rearm_queues(adapter, (u64)(1 << que->msix));
4742                 return (TRUE);
4743         }
4744
4745         return (FALSE);
4746 }
4747
4748
4749 /*********************************************************************
4750  *
4751  *  Verify that the hardware indicated that the checksum is valid.
4752  *  Inform the stack about the status of checksum so that stack
4753  *  doesn't spend time verifying the checksum.
4754  *
4755  *********************************************************************/
4756 static void
4757 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4758 {
4759         u16     status = (u16) staterr;
4760         u8      errors = (u8) (staterr >> 24);
4761         bool    sctp = FALSE;
4762
4763         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4764             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4765                 sctp = TRUE;
4766
4767         if (status & IXGBE_RXD_STAT_IPCS) {
4768                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4769                         /* IP Checksum Good */
4770                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4771                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4772
4773                 } else
4774                         mp->m_pkthdr.csum_flags = 0;
4775         }
4776         if (status & IXGBE_RXD_STAT_L4CS) {
4777                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4778 #if 0
4779                 if (sctp)
4780                         type = CSUM_SCTP_VALID;
4781 #endif
4782                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4783                         mp->m_pkthdr.csum_flags |= type;
4784                         if (!sctp)
4785                                 mp->m_pkthdr.csum_data = htons(0xffff);
4786                 } 
4787         }
4788         return;
4789 }
4790
4791
4792 /*
4793 ** This routine is run via an vlan config EVENT,
4794 ** it enables us to use the HW Filter table since
4795 ** we can get the vlan id. This just creates the
4796 ** entry in the soft version of the VFTA, init will
4797 ** repopulate the real table.
4798 */
4799 static void
4800 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4801 {
4802         struct adapter  *adapter = ifp->if_softc;
4803         u16             index, bit;
4804
4805         if (ifp->if_softc !=  arg)   /* Not our event */
4806                 return;
4807
4808         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4809                 return;
4810
4811         IXGBE_CORE_LOCK(adapter);
4812         index = (vtag >> 5) & 0x7F;
4813         bit = vtag & 0x1F;
4814         adapter->shadow_vfta[index] |= (1 << bit);
4815         ++adapter->num_vlans;
4816         ixgbe_init_locked(adapter);
4817         IXGBE_CORE_UNLOCK(adapter);
4818 }
4819
4820 /*
4821 ** This routine is run via an vlan
4822 ** unconfig EVENT, remove our entry
4823 ** in the soft vfta.
4824 */
4825 static void
4826 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4827 {
4828         struct adapter  *adapter = ifp->if_softc;
4829         u16             index, bit;
4830
4831         if (ifp->if_softc !=  arg)
4832                 return;
4833
4834         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4835                 return;
4836
4837         IXGBE_CORE_LOCK(adapter);
4838         index = (vtag >> 5) & 0x7F;
4839         bit = vtag & 0x1F;
4840         adapter->shadow_vfta[index] &= ~(1 << bit);
4841         --adapter->num_vlans;
4842         /* Re-init to load the changes */
4843         ixgbe_init_locked(adapter);
4844         IXGBE_CORE_UNLOCK(adapter);
4845 }
4846
4847 static void
4848 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4849 {
4850         struct ifnet    *ifp = adapter->ifp;
4851         struct ixgbe_hw *hw = &adapter->hw;
4852         struct rx_ring  *rxr;
4853         u32             ctrl;
4854
4855         /*
4856         ** We get here thru init_locked, meaning
4857         ** a soft reset, this has already cleared
4858         ** the VFTA and other state, so if there
4859         ** have been no vlan's registered do nothing.
4860         */
4861         if (adapter->num_vlans == 0)
4862                 return;
4863
4864         /*
4865         ** A soft reset zero's out the VFTA, so
4866         ** we need to repopulate it now.
4867         */
4868         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4869                 if (adapter->shadow_vfta[i] != 0)
4870                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4871                             adapter->shadow_vfta[i]);
4872
4873         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4874         /* Enable the Filter Table if enabled */
4875         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4876                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4877                 ctrl |= IXGBE_VLNCTRL_VFE;
4878         }
4879         if (hw->mac.type == ixgbe_mac_82598EB)
4880                 ctrl |= IXGBE_VLNCTRL_VME;
4881         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4882
4883         /* Setup the queues for vlans */
4884         for (int i = 0; i < adapter->num_queues; i++) {
4885                 rxr = &adapter->rx_rings[i];
4886                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4887                 if (hw->mac.type != ixgbe_mac_82598EB) {
4888                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4889                         ctrl |= IXGBE_RXDCTL_VME;
4890                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4891                 }
4892                 rxr->vtag_strip = TRUE;
4893         }
4894 }
4895
4896 static void
4897 ixgbe_enable_intr(struct adapter *adapter)
4898 {
4899         struct ixgbe_hw *hw = &adapter->hw;
4900         struct ix_queue *que = adapter->queues;
4901         u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4902
4903
4904         /* Enable Fan Failure detection */
4905         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4906                     mask |= IXGBE_EIMS_GPI_SDP1;
4907         else {
4908                     mask |= IXGBE_EIMS_ECC;
4909                     mask |= IXGBE_EIMS_GPI_SDP0;
4910                     mask |= IXGBE_EIMS_GPI_SDP1;
4911                     mask |= IXGBE_EIMS_GPI_SDP2;
4912 #ifdef IXGBE_FDIR
4913                     mask |= IXGBE_EIMS_FLOW_DIR;
4914 #endif
4915         }
4916
4917         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4918
4919         /* With RSS we use auto clear */
4920         if (adapter->msix_mem) {
4921                 mask = IXGBE_EIMS_ENABLE_MASK;
4922                 /* Don't autoclear Link */
4923                 mask &= ~IXGBE_EIMS_OTHER;
4924                 mask &= ~IXGBE_EIMS_LSC;
4925                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4926         }
4927
4928         /*
4929         ** Now enable all queues, this is done separately to
4930         ** allow for handling the extended (beyond 32) MSIX
4931         ** vectors that can be used by 82599
4932         */
4933         for (int i = 0; i < adapter->num_queues; i++, que++)
4934                 ixgbe_enable_queue(adapter, que->msix);
4935
4936         IXGBE_WRITE_FLUSH(hw);
4937
4938         return;
4939 }
4940
4941 static void
4942 ixgbe_disable_intr(struct adapter *adapter)
4943 {
4944         if (adapter->msix_mem)
4945                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4946         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4947                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4948         } else {
4949                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4950                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4951                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4952         }
4953         IXGBE_WRITE_FLUSH(&adapter->hw);
4954         return;
4955 }
4956
4957 u16
4958 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4959 {
4960         u16 value;
4961
4962         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4963             reg, 2);
4964
4965         return (value);
4966 }
4967
4968 void
4969 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4970 {
4971         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4972             reg, value, 2);
4973
4974         return;
4975 }
4976
4977 /*
4978 ** Setup the correct IVAR register for a particular MSIX interrupt
4979 **   (yes this is all very magic and confusing :)
4980 **  - entry is the register array entry
4981 **  - vector is the MSIX vector for this queue
4982 **  - type is RX/TX/MISC
4983 */
4984 static void
4985 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4986 {
4987         struct ixgbe_hw *hw = &adapter->hw;
4988         u32 ivar, index;
4989
4990         vector |= IXGBE_IVAR_ALLOC_VAL;
4991
4992         switch (hw->mac.type) {
4993
4994         case ixgbe_mac_82598EB:
4995                 if (type == -1)
4996                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4997                 else
4998                         entry += (type * 64);
4999                 index = (entry >> 2) & 0x1F;
5000                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5001                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5002                 ivar |= (vector << (8 * (entry & 0x3)));
5003                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5004                 break;
5005
5006         case ixgbe_mac_82599EB:
5007         case ixgbe_mac_X540:
5008                 if (type == -1) { /* MISC IVAR */
5009                         index = (entry & 1) * 8;
5010                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5011                         ivar &= ~(0xFF << index);
5012                         ivar |= (vector << index);
5013                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5014                 } else {        /* RX/TX IVARS */
5015                         index = (16 * (entry & 1)) + (8 * type);
5016                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5017                         ivar &= ~(0xFF << index);
5018                         ivar |= (vector << index);
5019                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5020                 }
5021
5022         default:
5023                 break;
5024         }
5025 }
5026
5027 static void
5028 ixgbe_configure_ivars(struct adapter *adapter)
5029 {
5030         struct  ix_queue *que = adapter->queues;
5031         u32 newitr;
5032
5033         if (ixgbe_max_interrupt_rate > 0)
5034                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5035         else
5036                 newitr = 0;
5037
5038         for (int i = 0; i < adapter->num_queues; i++, que++) {
5039                 /* First the RX queue entry */
5040                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5041                 /* ... and the TX */
5042                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5043                 /* Set an Initial EITR value */
5044                 IXGBE_WRITE_REG(&adapter->hw,
5045                     IXGBE_EITR(que->msix), newitr);
5046         }
5047
5048         /* For the Link interrupt */
5049         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5050 }
5051
5052 /*
5053 ** ixgbe_sfp_probe - called in the local timer to
5054 ** determine if a port had optics inserted.
5055 */  
5056 static bool ixgbe_sfp_probe(struct adapter *adapter)
5057 {
5058         struct ixgbe_hw *hw = &adapter->hw;
5059         device_t        dev = adapter->dev;
5060         bool            result = FALSE;
5061
5062         if ((hw->phy.type == ixgbe_phy_nl) &&
5063             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5064                 s32 ret = hw->phy.ops.identify_sfp(hw);
5065                 if (ret)
5066                         goto out;
5067                 ret = hw->phy.ops.reset(hw);
5068                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5069                         device_printf(dev,"Unsupported SFP+ module detected!");
5070                         kprintf(" Reload driver with supported module.\n");
5071                         adapter->sfp_probe = FALSE;
5072                         goto out;
5073                 } else
5074                         device_printf(dev,"SFP+ module detected!\n");
5075                 /* We now have supported optics */
5076                 adapter->sfp_probe = FALSE;
5077                 /* Set the optics type so system reports correctly */
5078                 ixgbe_setup_optics(adapter);
5079                 result = TRUE;
5080         }
5081 out:
5082         return (result);
5083 }
5084
5085 /*
5086 ** Tasklet handler for MSIX Link interrupts
5087 **  - do outside interrupt since it might sleep
5088 */
5089 static void
5090 ixgbe_handle_link(void *context, int pending)
5091 {
5092         struct adapter  *adapter = context;
5093
5094         ixgbe_check_link(&adapter->hw,
5095             &adapter->link_speed, &adapter->link_up, 0);
5096         ixgbe_update_link_status(adapter);
5097 }
5098
5099 /*
5100 ** Tasklet for handling SFP module interrupts
5101 */
5102 static void
5103 ixgbe_handle_mod(void *context, int pending)
5104 {
5105         struct adapter  *adapter = context;
5106         struct ixgbe_hw *hw = &adapter->hw;
5107         device_t        dev = adapter->dev;
5108         u32 err;
5109
5110         err = hw->phy.ops.identify_sfp(hw);
5111         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5112                 device_printf(dev,
5113                     "Unsupported SFP+ module type was detected.\n");
5114                 return;
5115         }
5116         err = hw->mac.ops.setup_sfp(hw);
5117         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5118                 device_printf(dev,
5119                     "Setup failure - unsupported SFP+ module type.\n");
5120                 return;
5121         }
5122         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5123         return;
5124 }
5125
5126
5127 /*
5128 ** Tasklet for handling MSF (multispeed fiber) interrupts
5129 */
5130 static void
5131 ixgbe_handle_msf(void *context, int pending)
5132 {
5133         struct adapter  *adapter = context;
5134         struct ixgbe_hw *hw = &adapter->hw;
5135         u32 autoneg;
5136         bool negotiate;
5137
5138         autoneg = hw->phy.autoneg_advertised;
5139         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5140                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5141         if (hw->mac.ops.setup_link)
5142                 hw->mac.ops.setup_link(hw, autoneg, negotiate, TRUE);
5143         return;
5144 }
5145
5146 #ifdef IXGBE_FDIR
5147 /*
5148 ** Tasklet for reinitializing the Flow Director filter table
5149 */
5150 static void
5151 ixgbe_reinit_fdir(void *context, int pending)
5152 {
5153         struct adapter  *adapter = context;
5154         struct ifnet   *ifp = adapter->ifp;
5155
5156         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5157                 return;
5158         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5159         adapter->fdir_reinit = 0;
5160         /* re-enable flow director interrupts */
5161         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5162         /* Restart the interface */
5163         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5164         return;
5165 }
5166 #endif
5167
5168 /**********************************************************************
5169  *
5170  *  Update the board statistics counters.
5171  *
5172  **********************************************************************/
5173 static void
5174 ixgbe_update_stats_counters(struct adapter *adapter)
5175 {
5176         struct ifnet   *ifp = adapter->ifp;
5177         struct ixgbe_hw *hw = &adapter->hw;
5178         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5179         u64  total_missed_rx = 0;
5180
5181         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5182         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5183         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5184         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5185
5186         for (int i = 0; i < 8; i++) {
5187                 u32 mp;
5188                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5189                 /* missed_rx tallies misses for the gprc workaround */
5190                 missed_rx += mp;
5191                 /* global total per queue */
5192                 adapter->stats.mpc[i] += mp;
5193                 /* Running comprehensive total for stats display */
5194                 total_missed_rx += adapter->stats.mpc[i];
5195                 if (hw->mac.type == ixgbe_mac_82598EB)
5196                         adapter->stats.rnbc[i] +=
5197                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5198                 adapter->stats.pxontxc[i] +=
5199                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5200                 adapter->stats.pxonrxc[i] +=
5201                     IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5202                 adapter->stats.pxofftxc[i] +=
5203                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5204                 adapter->stats.pxoffrxc[i] +=
5205                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5206                 adapter->stats.pxon2offc[i] +=
5207                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5208         }
5209         for (int i = 0; i < 16; i++) {
5210                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5211                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5212                 adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5213                 adapter->stats.qbrc[i] += 
5214                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBRC(i)) << 32);
5215                 adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5216                 adapter->stats.qbtc[i] +=
5217                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBTC(i)) << 32);
5218                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5219         }
5220         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5221         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5222         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5223
5224         /* Hardware workaround, gprc counts missed packets */
5225         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5226         adapter->stats.gprc -= missed_rx;
5227
5228         if (hw->mac.type != ixgbe_mac_82598EB) {
5229                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5230                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5231                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5232                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5233                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5234                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5235                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5236                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5237         } else {
5238                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5239                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5240                 /* 82598 only has a counter in the high register */
5241                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5242                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5243                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5244         }
5245
5246         /*
5247          * Workaround: mprc hardware is incorrectly counting
5248          * broadcasts, so for now we subtract those.
5249          */
5250         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5251         adapter->stats.bprc += bprc;
5252         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5253         if (hw->mac.type == ixgbe_mac_82598EB)
5254                 adapter->stats.mprc -= bprc;
5255
5256         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5257         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5258         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5259         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5260         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5261         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5262
5263         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5264         adapter->stats.lxontxc += lxon;
5265         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5266         adapter->stats.lxofftxc += lxoff;
5267         total = lxon + lxoff;
5268
5269         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5270         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5271         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5272         adapter->stats.gptc -= total;
5273         adapter->stats.mptc -= total;
5274         adapter->stats.ptc64 -= total;
5275         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5276
5277         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5278         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5279         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5280         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5281         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5282         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5283         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5284         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5285         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5286         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5287         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5288         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5289         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5290         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5291         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5292         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5293         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5294         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5295         /* Only read FCOE on 82599 */
5296         if (hw->mac.type != ixgbe_mac_82598EB) {
5297                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5298                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5299                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5300                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5301                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5302         }
5303
5304         /* Fill out the OS statistics structure */
5305         ifp->if_ipackets = adapter->stats.gprc;
5306         ifp->if_opackets = adapter->stats.gptc;
5307         ifp->if_ibytes = adapter->stats.gorc;
5308         ifp->if_obytes = adapter->stats.gotc;
5309         ifp->if_imcasts = adapter->stats.mprc;
5310         ifp->if_collisions = 0;
5311
5312         /* Rx Errors */
5313         ifp->if_ierrors = total_missed_rx + adapter->stats.crcerrs +
5314                 adapter->stats.rlec;
5315 }
5316
5317 /** ixgbe_sysctl_tdh_handler - Handler function
5318  *  Retrieves the TDH value from the hardware
5319  */
5320 static int 
5321 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5322 {
5323         int error;
5324
5325         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5326         if (!txr) return 0;
5327
5328         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5329         error = sysctl_handle_int(oidp, &val, 0, req);
5330         if (error || !req->newptr)
5331                 return error;
5332         return 0;
5333 }
5334
5335 /** ixgbe_sysctl_tdt_handler - Handler function
5336  *  Retrieves the TDT value from the hardware
5337  */
5338 static int 
5339 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5340 {
5341         int error;
5342
5343         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5344         if (!txr) return 0;
5345
5346         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5347         error = sysctl_handle_int(oidp, &val, 0, req);
5348         if (error || !req->newptr)
5349                 return error;
5350         return 0;
5351 }
5352
5353 /** ixgbe_sysctl_rdh_handler - Handler function
5354  *  Retrieves the RDH value from the hardware
5355  */
5356 static int 
5357 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5358 {
5359         int error;
5360
5361         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5362         if (!rxr) return 0;
5363
5364         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5365         error = sysctl_handle_int(oidp, &val, 0, req);
5366         if (error || !req->newptr)
5367                 return error;
5368         return 0;
5369 }
5370
5371 /** ixgbe_sysctl_rdt_handler - Handler function
5372  *  Retrieves the RDT value from the hardware
5373  */
5374 static int 
5375 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5376 {
5377         int error;
5378
5379         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5380         if (!rxr) return 0;
5381
5382         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5383         error = sysctl_handle_int(oidp, &val, 0, req);
5384         if (error || !req->newptr)
5385                 return error;
5386         return 0;
5387 }
5388
5389 static int
5390 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5391 {
5392         int error;
5393         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5394         unsigned int reg, usec, rate;
5395
5396         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5397         usec = ((reg & 0x0FF8) >> 3);
5398         if (usec > 0)
5399                 rate = 500000 / usec;
5400         else
5401                 rate = 0;
5402         error = sysctl_handle_int(oidp, &rate, 0, req);
5403         if (error || !req->newptr)
5404                 return error;
5405         reg &= ~0xfff; /* default, no limitation */
5406         ixgbe_max_interrupt_rate = 0;
5407         if (rate > 0 && rate < 500000) {
5408                 if (rate < 1000)
5409                         rate = 1000;
5410                 ixgbe_max_interrupt_rate = rate;
5411                 reg |= ((4000000/rate) & 0xff8 );
5412         }
5413         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5414         return 0;
5415 }
5416
5417 /*
5418  * Add sysctl variables, one per statistic, to the system.
5419  */
5420 static void
5421 ixgbe_add_hw_stats(struct adapter *adapter)
5422 {
5423         struct tx_ring *txr = adapter->tx_rings;
5424         struct rx_ring *rxr = adapter->rx_rings;
5425
5426         struct sysctl_ctx_list *ctx = &adapter->sysctl_ctx;
5427         struct sysctl_oid *tree = adapter->sysctl_tree;
5428         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5429         struct ixgbe_hw_stats *stats = &adapter->stats;
5430
5431         struct sysctl_oid *stat_node, *queue_node;
5432         struct sysctl_oid_list *stat_list, *queue_list;
5433
5434 #define QUEUE_NAME_LEN 32
5435         char namebuf[QUEUE_NAME_LEN];
5436
5437         /* Driver Statistics */
5438         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5439                         CTLFLAG_RD, &adapter->dropped_pkts,
5440                         "Driver dropped packets");
5441         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5442                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5443                         "m_defrag() failed");
5444         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_dma_setup",
5445                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5446                         "Driver tx dma failure in xmit");
5447         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5448                         CTLFLAG_RD, &adapter->watchdog_events,
5449                         "Watchdog timeouts");
5450         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tso_tx",
5451                         CTLFLAG_RD, &adapter->tso_tx,
5452                         "TSO");
5453         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5454                         CTLFLAG_RD, &adapter->link_irq,
5455                         "Link MSIX IRQ Handled");
5456
5457         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5458         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5459                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5460                                             CTLFLAG_RD, NULL, "Queue Name");
5461                 queue_list = SYSCTL_CHILDREN(queue_node);
5462
5463                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5464                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5465                                 sizeof(&adapter->queues[i]),
5466                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5467                                 "Interrupt Rate");
5468                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5469                                 CTLFLAG_RD, &(adapter->queues[i].irqs), 0,
5470                                 "irqs on this queue");
5471                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5472                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5473                                 ixgbe_sysctl_tdh_handler, "IU",
5474                                 "Transmit Descriptor Head");
5475                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5476                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5477                                 ixgbe_sysctl_tdt_handler, "IU",
5478                                 "Transmit Descriptor Tail");
5479                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5480                                 CTLFLAG_RD, &txr->no_desc_avail, 0,
5481                                 "Queue No Descriptor Available");
5482                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5483                                 CTLFLAG_RD, &txr->total_packets, 0,
5484                                 "Queue Packets Transmitted");
5485         }
5486
5487         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5488         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5489                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5490                                             CTLFLAG_RD, NULL, "Queue Name");
5491                 queue_list = SYSCTL_CHILDREN(queue_node);
5492
5493 #if 0   /* NET_LRO */
5494                 struct lro_ctrl *lro = &rxr->lro;
5495 #endif
5496
5497         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5498                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5499                                             CTLFLAG_RD, NULL, "Queue Name");
5500                 queue_list = SYSCTL_CHILDREN(queue_node);
5501
5502                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5503                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5504                                 ixgbe_sysctl_rdh_handler, "IU",
5505                                 "Receive Descriptor Head");
5506                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5507                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5508                                 ixgbe_sysctl_rdt_handler, "IU",
5509                                 "Receive Descriptor Tail");
5510                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5511                                 CTLFLAG_RD, &rxr->rx_packets, 0,
5512                                 "Queue Packets Received");
5513                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5514                                 CTLFLAG_RD, &rxr->rx_bytes, 0,
5515                                 "Queue Bytes Received");
5516 #if 0   /* NET_LRO */
5517                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5518                                 CTLFLAG_RD, &lro->lro_queued, 0,
5519                                 "LRO Queued");
5520                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5521                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5522                                 "LRO Flushed");
5523 #endif
5524         }
5525
5526         /* MAC stats get the own sub node */
5527
5528         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5529                                     CTLFLAG_RD, NULL, "MAC Statistics");
5530         stat_list = SYSCTL_CHILDREN(stat_node);
5531
5532         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5533                         CTLFLAG_RD, &stats->crcerrs, 0,
5534                         "CRC Errors");
5535         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5536                         CTLFLAG_RD, &stats->illerrc, 0,
5537                         "Illegal Byte Errors");
5538         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5539                         CTLFLAG_RD, &stats->errbc, 0,
5540                         "Byte Errors");
5541         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5542                         CTLFLAG_RD, &stats->mspdc, 0,
5543                         "MAC Short Packets Discarded");
5544         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5545                         CTLFLAG_RD, &stats->mlfc, 0,
5546                         "MAC Local Faults");
5547         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5548                         CTLFLAG_RD, &stats->mrfc, 0,
5549                         "MAC Remote Faults");
5550         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5551                         CTLFLAG_RD, &stats->rlec, 0,
5552                         "Receive Length Errors");
5553         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_txd",
5554                         CTLFLAG_RD, &stats->lxontxc, 0,
5555                         "Link XON Transmitted");
5556         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_rcvd",
5557                         CTLFLAG_RD, &stats->lxonrxc, 0,
5558                         "Link XON Received");
5559         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_txd",
5560                         CTLFLAG_RD, &stats->lxofftxc, 0,
5561                         "Link XOFF Transmitted");
5562         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_rcvd",
5563                         CTLFLAG_RD, &stats->lxoffrxc, 0,
5564                         "Link XOFF Received");
5565
5566         /* Packet Reception Stats */
5567         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5568                         CTLFLAG_RD, &stats->tor, 0,
5569                         "Total Octets Received"); 
5570         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5571                         CTLFLAG_RD, &stats->gorc, 0,
5572                         "Good Octets Received"); 
5573         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5574                         CTLFLAG_RD, &stats->tpr, 0,
5575                         "Total Packets Received");
5576         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5577                         CTLFLAG_RD, &stats->gprc, 0,
5578                         "Good Packets Received");
5579         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5580                         CTLFLAG_RD, &stats->mprc, 0,
5581                         "Multicast Packets Received");
5582         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5583                         CTLFLAG_RD, &stats->bprc, 0,
5584                         "Broadcast Packets Received");
5585         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5586                         CTLFLAG_RD, &stats->prc64, 0,
5587                         "64 byte frames received ");
5588         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5589                         CTLFLAG_RD, &stats->prc127, 0,
5590                         "65-127 byte frames received");
5591         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5592                         CTLFLAG_RD, &stats->prc255, 0,
5593                         "128-255 byte frames received");
5594         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5595                         CTLFLAG_RD, &stats->prc511, 0,
5596                         "256-511 byte frames received");
5597         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5598                         CTLFLAG_RD, &stats->prc1023, 0,
5599                         "512-1023 byte frames received");
5600         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5601                         CTLFLAG_RD, &stats->prc1522, 0,
5602                         "1023-1522 byte frames received");
5603         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5604                         CTLFLAG_RD, &stats->ruc, 0,
5605                         "Receive Undersized");
5606         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5607                         CTLFLAG_RD, &stats->rfc, 0,
5608                         "Fragmented Packets Received ");
5609         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5610                         CTLFLAG_RD, &stats->roc, 0,
5611                         "Oversized Packets Received");
5612         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5613                         CTLFLAG_RD, &stats->rjc, 0,
5614                         "Received Jabber");
5615         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5616                         CTLFLAG_RD, &stats->mngprc, 0,
5617                         "Management Packets Received");
5618         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5619                         CTLFLAG_RD, &stats->mngptc, 0,
5620                         "Management Packets Dropped");
5621         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5622                         CTLFLAG_RD, &stats->xec, 0,
5623                         "Checksum Errors");
5624
5625         /* Packet Transmission Stats */
5626         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5627                         CTLFLAG_RD, &stats->gotc, 0,
5628                         "Good Octets Transmitted"); 
5629         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5630                         CTLFLAG_RD, &stats->tpt, 0,
5631                         "Total Packets Transmitted");
5632         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5633                         CTLFLAG_RD, &stats->gptc, 0,
5634                         "Good Packets Transmitted");
5635         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5636                         CTLFLAG_RD, &stats->bptc, 0,
5637                         "Broadcast Packets Transmitted");
5638         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5639                         CTLFLAG_RD, &stats->mptc, 0,
5640                         "Multicast Packets Transmitted");
5641         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5642                         CTLFLAG_RD, &stats->mngptc, 0,
5643                         "Management Packets Transmitted");
5644         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5645                         CTLFLAG_RD, &stats->ptc64, 0,
5646                         "64 byte frames transmitted ");
5647         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5648                         CTLFLAG_RD, &stats->ptc127, 0,
5649                         "65-127 byte frames transmitted");
5650         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5651                         CTLFLAG_RD, &stats->ptc255, 0,
5652                         "128-255 byte frames transmitted");
5653         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5654                         CTLFLAG_RD, &stats->ptc511, 0,
5655                         "256-511 byte frames transmitted");
5656         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5657                         CTLFLAG_RD, &stats->ptc1023, 0,
5658                         "512-1023 byte frames transmitted");
5659         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5660                         CTLFLAG_RD, &stats->ptc1522, 0,
5661                         "1024-1522 byte frames transmitted");
5662
5663         /* FC Stats */
5664         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_crc",
5665                 CTLFLAG_RD, &stats->fccrc, 0,
5666                 "FC CRC Errors");
5667         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_last",
5668                 CTLFLAG_RD, &stats->fclast, 0,
5669                 "FC Last Error");
5670         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_drpd",
5671                 CTLFLAG_RD, &stats->fcoerpdc, 0,
5672                 "FCoE Packets Dropped");
5673         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_rcvd",
5674                 CTLFLAG_RD, &stats->fcoeprc, 0,
5675                 "FCoE Packets Received");
5676         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_txd",
5677                 CTLFLAG_RD, &stats->fcoeptc, 0,
5678                 "FCoE Packets Transmitted");
5679         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_rcvd",
5680                 CTLFLAG_RD, &stats->fcoedwrc, 0,
5681                 "FCoE DWords Received");
5682         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_txd",
5683                 CTLFLAG_RD, &stats->fcoedwtc, 0,
5684                 "FCoE DWords Transmitted");
5685 }
5686
5687 /*
5688 ** Set flow control using sysctl:
5689 ** Flow control values:
5690 **      0 - off
5691 **      1 - rx pause
5692 **      2 - tx pause
5693 **      3 - full
5694 */
5695 static int
5696 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5697 {
5698         int error, last;
5699         struct adapter *adapter = (struct adapter *) arg1;
5700
5701         last = adapter->fc;
5702         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5703         if ((error) || (req->newptr == NULL))
5704                 return (error);
5705
5706         /* Don't bother if it's not changed */
5707         if (adapter->fc == last)
5708                 return (0);
5709
5710         switch (adapter->fc) {
5711                 case ixgbe_fc_rx_pause:
5712                 case ixgbe_fc_tx_pause:
5713                 case ixgbe_fc_full:
5714                         adapter->hw.fc.requested_mode = adapter->fc;
5715                         break;
5716                 case ixgbe_fc_none:
5717                 default:
5718                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5719         }
5720         /* Don't autoneg if forcing a value */
5721         adapter->hw.fc.disable_fc_autoneg = TRUE;
5722         ixgbe_fc_enable(&adapter->hw);
5723         return error;
5724 }
5725
5726 static void
5727 ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name,
5728         const char *description, int *limit, int value)
5729 {
5730         *limit = value;
5731         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5732             SYSCTL_CHILDREN(adapter->sysctl_tree),
5733             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5734 }
5735
5736 /*
5737 ** Control link advertise speed:
5738 **      1 - advertise only 1G
5739 **      2 - advertise 100Mb
5740 **      3 - advertise normal
5741 */
5742 static int
5743 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5744 {
5745         int                     error = 0;
5746         struct adapter          *adapter;
5747         device_t                dev;
5748         struct ixgbe_hw         *hw;
5749         ixgbe_link_speed        speed, last;
5750
5751         adapter = (struct adapter *) arg1;
5752         dev = adapter->dev;
5753         hw = &adapter->hw;
5754         last = adapter->advertise;
5755
5756         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5757         if ((error) || (adapter->advertise == -1))
5758                 return (error);
5759
5760         if (adapter->advertise == last) /* no change */
5761                 return (0);
5762
5763         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5764             (hw->phy.multispeed_fiber)))
5765                 return (error);
5766
5767         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5768                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5769                 return (error);
5770         }
5771
5772         if (adapter->advertise == 1)
5773                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5774         else if (adapter->advertise == 2)
5775                 speed = IXGBE_LINK_SPEED_100_FULL;
5776         else if (adapter->advertise == 3)
5777                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5778                         IXGBE_LINK_SPEED_10GB_FULL;
5779         else /* bogus value */
5780                 return (error);
5781
5782         hw->mac.autotry_restart = TRUE;
5783         hw->mac.ops.setup_link(hw, speed, TRUE, TRUE);
5784
5785         return (error);
5786 }
5787
5788 /*
5789 ** Thermal Shutdown Trigger
5790 **   - cause a Thermal Overtemp IRQ
5791 */
5792 static int
5793 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5794 {
5795         int             error, fire = 0;
5796         struct adapter  *adapter = (struct adapter *) arg1;
5797         struct ixgbe_hw *hw = &adapter->hw;
5798
5799
5800         if (hw->mac.type != ixgbe_mac_X540)
5801                 return (0);
5802
5803         error = sysctl_handle_int(oidp, &fire, 0, req);
5804         if ((error) || (req->newptr == NULL))
5805                 return (error);
5806
5807         if (fire) {
5808                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5809                 reg |= IXGBE_EICR_TS;
5810                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5811         }
5812
5813         return (0);
5814 }
5815
5816 /* rearrange mbuf chain to get contiguous bytes */
5817 static int
5818 ixgbe_tso_pullup(struct tx_ring *txr, struct mbuf **mp)
5819 {
5820         int hoff, iphlen, thoff;
5821         struct mbuf *m;
5822
5823         m = *mp;
5824         KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5825
5826         iphlen = m->m_pkthdr.csum_iphlen;
5827         thoff = m->m_pkthdr.csum_thlen;
5828         hoff = m->m_pkthdr.csum_lhlen;
5829
5830         KASSERT(iphlen > 0, ("invalid ip hlen"));
5831         KASSERT(thoff > 0, ("invalid tcp hlen"));
5832         KASSERT(hoff > 0, ("invalid ether hlen"));
5833
5834         if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5835                 m = m_pullup(m, hoff + iphlen + thoff);
5836                 if (m == NULL) {
5837                         *mp = NULL;
5838                         return ENOBUFS;
5839                 }
5840                 *mp = m;
5841         }
5842
5843         return 0;
5844 }