ixgbe: add tso_pullup function
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 #if 0   /* HAVE_MSIX */
125 static int      ixgbe_setup_msix(struct adapter *);
126 #endif
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 #if 0   /* NET_LRO */
146 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
147 #endif
148
149 static void     ixgbe_enable_intr(struct adapter *);
150 static void     ixgbe_disable_intr(struct adapter *);
151 static void     ixgbe_update_stats_counters(struct adapter *);
152 static bool     ixgbe_txeof(struct tx_ring *);
153 static bool     ixgbe_rxeof(struct ix_queue *, int);
154 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
155 static void     ixgbe_set_promisc(struct adapter *);
156 static void     ixgbe_set_multi(struct adapter *);
157 static void     ixgbe_update_link_status(struct adapter *);
158 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
159 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
160 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
162 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
163 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
164                     struct ixgbe_dma_alloc *, int);
165 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
166 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
167                     const char *, int *, int);
168 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
169 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
170 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
171 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
172 static void     ixgbe_configure_ivars(struct adapter *);
173 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
174
175 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
176 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
177 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
178
179 static void     ixgbe_add_hw_stats(struct adapter *adapter);
180
181 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
182 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
183                     struct mbuf *, u32);
184
185 /* Support for pluggable optic modules */
186 static bool     ixgbe_sfp_probe(struct adapter *);
187 static void     ixgbe_setup_optics(struct adapter *);
188
189 /* Legacy (single vector interrupt handler */
190 static void     ixgbe_legacy_irq(void *);
191
192 /* The MSI/X Interrupt handlers */
193 static void     ixgbe_msix_que(void *);
194 static void     ixgbe_msix_link(void *);
195
196 /* Deferred interrupt tasklets */
197 static void     ixgbe_handle_que(void *, int);
198 static void     ixgbe_handle_link(void *, int);
199 static void     ixgbe_handle_msf(void *, int);
200 static void     ixgbe_handle_mod(void *, int);
201
202 #ifdef IXGBE_FDIR
203 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
204 static void     ixgbe_reinit_fdir(void *, int);
205 #endif
206
207 /*********************************************************************
208  *  FreeBSD Device Interface Entry Points
209  *********************************************************************/
210
211 static device_method_t ixgbe_methods[] = {
212         /* Device interface */
213         DEVMETHOD(device_probe, ixgbe_probe),
214         DEVMETHOD(device_attach, ixgbe_attach),
215         DEVMETHOD(device_detach, ixgbe_detach),
216         DEVMETHOD(device_shutdown, ixgbe_shutdown),
217         {0, 0}
218 };
219
220 static driver_t ixgbe_driver = {
221         "ix", ixgbe_methods, sizeof(struct adapter),
222 };
223
224 devclass_t ixgbe_devclass;
225 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
226
227 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
228 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
229
230 /*
231 ** TUNEABLE PARAMETERS:
232 */
233
234 /*
235 ** AIM: Adaptive Interrupt Moderation
236 ** which means that the interrupt rate
237 ** is varied over time based on the
238 ** traffic for that interrupt vector
239 */
240 static int ixgbe_enable_aim = TRUE;
241 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
242
243 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
244 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
245
246 /* How many packets rxeof tries to clean at a time */
247 static int ixgbe_rx_process_limit = 128;
248 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
249
250 /*
251 ** Smart speed setting, default to on
252 ** this only works as a compile option
253 ** right now as its during attach, set
254 ** this to 'ixgbe_smart_speed_off' to
255 ** disable.
256 */
257 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
258
259 static int ixgbe_msi_enable = 1;
260 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
261
262 /*
263  * MSIX should be the default for best performance,
264  * but this allows it to be forced off for testing.
265  */
266 static int ixgbe_enable_msix = 1;
267 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
268
269 /*
270  * Header split: this causes the hardware to DMA
271  * the header into a separate mbuf from the payload,
272  * it can be a performance win in some workloads, but
273  * in others it actually hurts, its off by default. 
274  */
275 static int ixgbe_header_split = FALSE;
276 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
277
278 /*
279  * Number of Queues, can be set to 0,
280  * it then autoconfigures based on the
281  * number of cpus with a max of 8. This
282  * can be overriden manually here.
283  */
284 static int ixgbe_num_queues = 0;
285 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
286
287 /*
288 ** Number of TX descriptors per ring,
289 ** setting higher than RX as this seems
290 ** the better performing choice.
291 */
292 static int ixgbe_txd = PERFORM_TXD;
293 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
294
295 /* Number of RX descriptors per ring */
296 static int ixgbe_rxd = PERFORM_RXD;
297 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
298
299 /* Keep running tab on them for sanity check */
300 static int ixgbe_total_ports;
301
302 #ifdef IXGBE_FDIR
303 /*
304 ** For Flow Director: this is the
305 ** number of TX packets we sample
306 ** for the filter pool, this means
307 ** every 20th packet will be probed.
308 **
309 ** This feature can be disabled by 
310 ** setting this to 0.
311 */
312 static int atr_sample_rate = 20;
313 /* 
314 ** Flow Director actually 'steals'
315 ** part of the packet buffer as its
316 ** filter pool, this variable controls
317 ** how much it uses:
318 **  0 = 64K, 1 = 128K, 2 = 256K
319 */
320 static int fdir_pballoc = 1;
321 #endif
322
323 #ifdef DEV_NETMAP
324 /*
325  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
326  * be a reference on how to implement netmap support in a driver.
327  * Additional comments are in ixgbe_netmap.h .
328  *
329  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
330  * that extend the standard driver.
331  */
332 #include <dev/netmap/ixgbe_netmap.h>
333 #endif /* DEV_NETMAP */
334
335 /*********************************************************************
336  *  Device identification routine
337  *
338  *  ixgbe_probe determines if the driver should be loaded on
339  *  adapter based on PCI vendor/device id of the adapter.
340  *
341  *  return BUS_PROBE_DEFAULT on success, positive on failure
342  *********************************************************************/
343
344 static int
345 ixgbe_probe(device_t dev)
346 {
347         ixgbe_vendor_info_t *ent;
348
349         u16     pci_vendor_id = 0;
350         u16     pci_device_id = 0;
351         u16     pci_subvendor_id = 0;
352         u16     pci_subdevice_id = 0;
353         char    adapter_name[256];
354
355         INIT_DEBUGOUT("ixgbe_probe: begin");
356
357         pci_vendor_id = pci_get_vendor(dev);
358         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
359                 return (ENXIO);
360
361         pci_device_id = pci_get_device(dev);
362         pci_subvendor_id = pci_get_subvendor(dev);
363         pci_subdevice_id = pci_get_subdevice(dev);
364
365         ent = ixgbe_vendor_info_array;
366         while (ent->vendor_id != 0) {
367                 if ((pci_vendor_id == ent->vendor_id) &&
368                     (pci_device_id == ent->device_id) &&
369
370                     ((pci_subvendor_id == ent->subvendor_id) ||
371                      (ent->subvendor_id == 0)) &&
372
373                     ((pci_subdevice_id == ent->subdevice_id) ||
374                      (ent->subdevice_id == 0))) {
375                         ksprintf(adapter_name, "%s, Version - %s",
376                                 ixgbe_strings[ent->index],
377                                 ixgbe_driver_version);
378                         device_set_desc_copy(dev, adapter_name);
379                         ++ixgbe_total_ports;
380                         return (BUS_PROBE_DEFAULT);
381                 }
382                 ent++;
383         }
384         return (ENXIO);
385 }
386
387 /*********************************************************************
388  *  Device initialization routine
389  *
390  *  The attach entry point is called when the driver is being loaded.
391  *  This routine identifies the type of hardware, allocates all resources
392  *  and initializes the hardware.
393  *
394  *  return 0 on success, positive on failure
395  *********************************************************************/
396
397 static int
398 ixgbe_attach(device_t dev)
399 {
400         struct adapter *adapter;
401         struct ixgbe_hw *hw;
402         int             error = 0;
403         u16             csum;
404         u32             ctrl_ext;
405
406         INIT_DEBUGOUT("ixgbe_attach: begin");
407
408         if (resource_disabled("ixgbe", device_get_unit(dev))) {
409                 device_printf(dev, "Disabled by device hint\n");
410                 return (ENXIO);
411         }
412
413         /* Allocate, clear, and link in our adapter structure */
414         adapter = device_get_softc(dev);
415         adapter->dev = adapter->osdep.dev = dev;
416         hw = &adapter->hw;
417
418         /* Core Lock Init*/
419         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
420
421         /* SYSCTL APIs */
422
423         sysctl_ctx_init(&adapter->sysctl_ctx);
424         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
425             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
426             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
427         if (adapter->sysctl_tree == NULL) {
428                 device_printf(adapter->dev, "can't add sysctl node\n");
429                 return (EINVAL);
430         }
431         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
432                         SYSCTL_CHILDREN(adapter->sysctl_tree),
433                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
434                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
435
436         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
437                         SYSCTL_CHILDREN(adapter->sysctl_tree),
438                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
439                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
440
441         /*
442         ** Allow a kind of speed control by forcing the autoneg
443         ** advertised speed list to only a certain value, this
444         ** supports 1G on 82599 devices, and 100Mb on x540.
445         */
446         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
447                         SYSCTL_CHILDREN(adapter->sysctl_tree),
448                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
449                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
450
451         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
452                         SYSCTL_CHILDREN(adapter->sysctl_tree),
453                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
454                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
455
456         /* Set up the timer callout */
457         callout_init_mp(&adapter->timer);
458
459         /* Determine hardware revision */
460         ixgbe_identify_hardware(adapter);
461
462         /* Do base PCI setup - map BAR0 */
463         if (ixgbe_allocate_pci_resources(adapter)) {
464                 device_printf(dev, "Allocation of PCI resources failed\n");
465                 error = ENXIO;
466                 goto err_out;
467         }
468
469         /* Do descriptor calc and sanity checks */
470         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
471             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
472                 device_printf(dev, "TXD config issue, using default!\n");
473                 adapter->num_tx_desc = DEFAULT_TXD;
474         } else
475                 adapter->num_tx_desc = ixgbe_txd;
476
477         /*
478         ** With many RX rings it is easy to exceed the
479         ** system mbuf allocation. Tuning nmbclusters
480         ** can alleviate this.
481         */
482         if (nmbclusters > 0 ) {
483                 int s;
484                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
485                 if (s > nmbclusters) {
486                         device_printf(dev, "RX Descriptors exceed "
487                             "system mbuf max, using default instead!\n");
488                         ixgbe_rxd = DEFAULT_RXD;
489                 }
490         }
491
492         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
493             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
494                 device_printf(dev, "RXD config issue, using default!\n");
495                 adapter->num_rx_desc = DEFAULT_RXD;
496         } else
497                 adapter->num_rx_desc = ixgbe_rxd;
498
499         /* Allocate our TX/RX Queues */
500         if (ixgbe_allocate_queues(adapter)) {
501                 error = ENOMEM;
502                 goto err_out;
503         }
504
505         /* Allocate multicast array memory. */
506         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
507             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
508         if (adapter->mta == NULL) {
509                 device_printf(dev, "Can not allocate multicast setup array\n");
510                 error = ENOMEM;
511                 goto err_late;
512         }
513
514         /* Initialize the shared code */
515         error = ixgbe_init_shared_code(hw);
516         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
517                 /*
518                 ** No optics in this port, set up
519                 ** so the timer routine will probe 
520                 ** for later insertion.
521                 */
522                 adapter->sfp_probe = TRUE;
523                 error = 0;
524         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
525                 device_printf(dev,"Unsupported SFP+ module detected!\n");
526                 error = EIO;
527                 goto err_late;
528         } else if (error) {
529                 device_printf(dev,"Unable to initialize the shared code\n");
530                 error = EIO;
531                 goto err_late;
532         }
533
534         /* Make sure we have a good EEPROM before we read from it */
535         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
536                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
537                 error = EIO;
538                 goto err_late;
539         }
540
541         error = ixgbe_init_hw(hw);
542         switch (error) {
543         case IXGBE_ERR_EEPROM_VERSION:
544                 device_printf(dev, "This device is a pre-production adapter/"
545                     "LOM.  Please be aware there may be issues associated "
546                     "with your hardware.\n If you are experiencing problems "
547                     "please contact your Intel or hardware representative "
548                     "who provided you with this hardware.\n");
549                 break;
550         case IXGBE_ERR_SFP_NOT_SUPPORTED:
551                 device_printf(dev,"Unsupported SFP+ Module\n");
552                 error = EIO;
553                 device_printf(dev,"Hardware Initialization Failure\n");
554                 goto err_late;
555         case IXGBE_ERR_SFP_NOT_PRESENT:
556                 device_printf(dev,"No SFP+ Module found\n");
557                 /* falls thru */
558         default:
559                 break;
560         }
561
562         /* Detect and set physical type */
563         ixgbe_setup_optics(adapter);
564
565         if ((adapter->msix > 1) && (ixgbe_enable_msix))
566                 error = ixgbe_allocate_msix(adapter); 
567         else
568                 error = ixgbe_allocate_legacy(adapter); 
569         if (error) 
570                 goto err_late;
571
572         /* Setup OS specific network interface */
573         if (ixgbe_setup_interface(dev, adapter) != 0)
574                 goto err_late;
575
576         /* Sysctl for limiting the amount of work done in the taskqueue */
577         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
578             "max number of rx packets to process", &adapter->rx_process_limit,
579             ixgbe_rx_process_limit);
580
581         /* Initialize statistics */
582         ixgbe_update_stats_counters(adapter);
583
584         /* Register for VLAN events */
585         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
586             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
587         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
588             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
589
590         /* Print PCIE bus type/speed/width info */
591         ixgbe_get_bus_info(hw);
592         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
593             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
594             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
595             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
596             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
597             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
598             ("Unknown"));
599
600         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
601             (hw->bus.speed == ixgbe_bus_speed_2500)) {
602                 device_printf(dev, "PCI-Express bandwidth available"
603                     " for this card\n     is not sufficient for"
604                     " optimal performance.\n");
605                 device_printf(dev, "For optimal performance a x8 "
606                     "PCIE, or x4 PCIE 2 slot is required.\n");
607         }
608
609         /* let hardware know driver is loaded */
610         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
611         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
612         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
613
614         ixgbe_add_hw_stats(adapter);
615
616 #ifdef DEV_NETMAP
617         ixgbe_netmap_attach(adapter);
618 #endif /* DEV_NETMAP */
619         INIT_DEBUGOUT("ixgbe_attach: end");
620         return (0);
621 err_late:
622         ixgbe_free_transmit_structures(adapter);
623         ixgbe_free_receive_structures(adapter);
624 err_out:
625         if (adapter->ifp != NULL)
626                 if_free(adapter->ifp);
627         ixgbe_free_pci_resources(adapter);
628         kfree(adapter->mta, M_DEVBUF);
629         return (error);
630
631 }
632
633 /*********************************************************************
634  *  Device removal routine
635  *
636  *  The detach entry point is called when the driver is being removed.
637  *  This routine stops the adapter and deallocates all the resources
638  *  that were allocated for driver operation.
639  *
640  *  return 0 on success, positive on failure
641  *********************************************************************/
642
643 static int
644 ixgbe_detach(device_t dev)
645 {
646         struct adapter *adapter = device_get_softc(dev);
647         struct ix_queue *que = adapter->queues;
648         u32     ctrl_ext;
649
650         INIT_DEBUGOUT("ixgbe_detach: begin");
651
652         /* Make sure VLANS are not using driver */
653         if (adapter->ifp->if_vlantrunks != NULL) {
654                 device_printf(dev,"Vlan in use, detach first\n");
655                 return (EBUSY);
656         }
657
658         IXGBE_CORE_LOCK(adapter);
659         ixgbe_stop(adapter);
660         IXGBE_CORE_UNLOCK(adapter);
661
662         for (int i = 0; i < adapter->num_queues; i++, que++) {
663                 if (que->tq) {
664                         taskqueue_drain(que->tq, &que->que_task);
665                         taskqueue_free(que->tq);
666                 }
667         }
668
669         /* Drain the Link queue */
670         if (adapter->tq) {
671                 taskqueue_drain(adapter->tq, &adapter->link_task);
672                 taskqueue_drain(adapter->tq, &adapter->mod_task);
673                 taskqueue_drain(adapter->tq, &adapter->msf_task);
674 #ifdef IXGBE_FDIR
675                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
676 #endif
677                 taskqueue_free(adapter->tq);
678         }
679
680         /* let hardware know driver is unloading */
681         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
682         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
683         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
684
685         /* Unregister VLAN events */
686         if (adapter->vlan_attach != NULL)
687                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
688         if (adapter->vlan_detach != NULL)
689                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
690
691         ether_ifdetach(adapter->ifp);
692         callout_stop(&adapter->timer);
693 #ifdef DEV_NETMAP
694         netmap_detach(adapter->ifp);
695 #endif /* DEV_NETMAP */
696         ixgbe_free_pci_resources(adapter);
697         bus_generic_detach(dev);
698         if_free(adapter->ifp);
699
700         ixgbe_free_transmit_structures(adapter);
701         ixgbe_free_receive_structures(adapter);
702         kfree(adapter->mta, M_DEVBUF);
703         sysctl_ctx_free(&adapter->sysctl_ctx);
704         
705         IXGBE_CORE_LOCK_DESTROY(adapter);
706         return (0);
707 }
708
709 /*********************************************************************
710  *
711  *  Shutdown entry point
712  *
713  **********************************************************************/
714
715 static int
716 ixgbe_shutdown(device_t dev)
717 {
718         struct adapter *adapter = device_get_softc(dev);
719         IXGBE_CORE_LOCK(adapter);
720         ixgbe_stop(adapter);
721         IXGBE_CORE_UNLOCK(adapter);
722         return (0);
723 }
724
725
726 /*********************************************************************
727  *  Transmit entry point
728  *
729  *  ixgbe_start is called by the stack to initiate a transmit.
730  *  The driver will remain in this routine as long as there are
731  *  packets to transmit and transmit resources are available.
732  *  In case resources are not available stack is notified and
733  *  the packet is requeued.
734  **********************************************************************/
735
736 static void
737 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
738 {
739         struct mbuf    *m_head;
740         struct adapter *adapter = txr->adapter;
741
742         IXGBE_TX_LOCK_ASSERT(txr);
743
744         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
745                 return;
746         if (!adapter->link_active)
747                 return;
748
749         while (!ifq_is_empty(&ifp->if_snd)) {
750                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
751                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
752                         break;
753                 }
754
755                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
756                 if (m_head == NULL)
757                         break;
758
759                 if (ixgbe_xmit(txr, &m_head)) {
760 #if 0 /* XXX: prepend to an ALTQ queue ? */
761                         if (m_head != NULL)
762                                 IF_PREPEND(&ifp->if_snd, m_head);
763 #endif
764                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
765                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
766                         break;
767                 }
768                 /* Send a copy of the frame to the BPF listener */
769                 ETHER_BPF_MTAP(ifp, m_head);
770
771                 /* Set watchdog on */
772                 txr->watchdog_time = ticks;
773                 txr->queue_status = IXGBE_QUEUE_WORKING;
774
775         }
776         return;
777 }
778
779 /*
780  * Legacy TX start - called by the stack, this
781  * always uses the first tx ring, and should
782  * not be used with multiqueue tx enabled.
783  */
784 static void
785 ixgbe_start(struct ifnet *ifp)
786 {
787         struct adapter *adapter = ifp->if_softc;
788         struct tx_ring  *txr = adapter->tx_rings;
789
790         if (ifp->if_flags & IFF_RUNNING) {
791                 IXGBE_TX_LOCK(txr);
792                 ixgbe_start_locked(txr, ifp);
793                 IXGBE_TX_UNLOCK(txr);
794         }
795         return;
796 }
797
798 #if 0 /* __FreeBSD_version >= 800000 */
799 /*
800 ** Multiqueue Transmit driver
801 **
802 */
803 static int
804 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
805 {
806         struct adapter  *adapter = ifp->if_softc;
807         struct ix_queue *que;
808         struct tx_ring  *txr;
809         int             i = 0, err = 0;
810
811         /* Which queue to use */
812         if ((m->m_flags & M_FLOWID) != 0)
813                 i = m->m_pkthdr.flowid % adapter->num_queues;
814         else
815                 i = curcpu % adapter->num_queues;
816
817         txr = &adapter->tx_rings[i];
818         que = &adapter->queues[i];
819
820         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
821             IXGBE_TX_TRYLOCK(txr)) {
822                 err = ixgbe_mq_start_locked(ifp, txr, m);
823                 IXGBE_TX_UNLOCK(txr);
824         } else {
825                 err = drbr_enqueue(ifp, txr->br, m);
826                 taskqueue_enqueue(que->tq, &que->que_task);
827         }
828
829         return (err);
830 }
831
832 static int
833 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
834 {
835         struct adapter  *adapter = txr->adapter;
836         struct mbuf     *next;
837         int             enqueued, err = 0;
838
839         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
840             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
841             adapter->link_active == 0) {
842                 if (m != NULL)
843                         err = drbr_enqueue(ifp, txr->br, m);
844                 return (err);
845         }
846
847         enqueued = 0;
848         if (m == NULL) {
849                 next = drbr_dequeue(ifp, txr->br);
850         } else if (drbr_needs_enqueue(ifp, txr->br)) {
851                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
852                         return (err);
853                 next = drbr_dequeue(ifp, txr->br);
854         } else
855                 next = m;
856
857         /* Process the queue */
858         while (next != NULL) {
859                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
860                         if (next != NULL)
861                                 err = drbr_enqueue(ifp, txr->br, next);
862                         break;
863                 }
864                 enqueued++;
865                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
866                 /* Send a copy of the frame to the BPF listener */
867                 ETHER_BPF_MTAP(ifp, next);
868                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
869                         break;
870                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
871                         ixgbe_txeof(txr);
872                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
873                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
874                         break;
875                 }
876                 next = drbr_dequeue(ifp, txr->br);
877         }
878
879         if (enqueued > 0) {
880                 /* Set watchdog on */
881                 txr->queue_status |= IXGBE_QUEUE_WORKING;
882                 txr->watchdog_time = ticks;
883         }
884
885         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
886                 ixgbe_txeof(txr);
887
888         return (err);
889 }
890
891 /*
892 ** Flush all ring buffers
893 */
894 static void
895 ixgbe_qflush(struct ifnet *ifp)
896 {
897         struct adapter  *adapter = ifp->if_softc;
898         struct tx_ring  *txr = adapter->tx_rings;
899         struct mbuf     *m;
900
901         for (int i = 0; i < adapter->num_queues; i++, txr++) {
902                 IXGBE_TX_LOCK(txr);
903                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
904                         m_freem(m);
905                 IXGBE_TX_UNLOCK(txr);
906         }
907         if_qflush(ifp);
908 }
909 #endif /* __FreeBSD_version >= 800000 */
910
911 /*********************************************************************
912  *  Ioctl entry point
913  *
914  *  ixgbe_ioctl is called when the user wants to configure the
915  *  interface.
916  *
917  *  return 0 on success, positive on failure
918  **********************************************************************/
919
920 static int
921 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
922 {
923         struct adapter  *adapter = ifp->if_softc;
924         struct ifreq    *ifr = (struct ifreq *) data;
925 #if defined(INET) || defined(INET6)
926         struct ifaddr *ifa = (struct ifaddr *)data;
927         bool            avoid_reset = FALSE;
928 #endif
929         int             error = 0;
930
931         switch (command) {
932
933         case SIOCSIFADDR:
934 #ifdef INET
935                 if (ifa->ifa_addr->sa_family == AF_INET)
936                         avoid_reset = TRUE;
937 #endif
938 #ifdef INET6
939                 if (ifa->ifa_addr->sa_family == AF_INET6)
940                         avoid_reset = TRUE;
941 #endif
942 #if defined(INET) || defined(INET6)
943                 /*
944                 ** Calling init results in link renegotiation,
945                 ** so we avoid doing it when possible.
946                 */
947                 if (avoid_reset) {
948                         ifp->if_flags |= IFF_UP;
949                         if (!(ifp->if_flags & IFF_RUNNING))
950                                 ixgbe_init(adapter);
951                         if (!(ifp->if_flags & IFF_NOARP))
952                                 arp_ifinit(ifp, ifa);
953                 } else
954                         error = ether_ioctl(ifp, command, data);
955 #endif
956                 break;
957         case SIOCSIFMTU:
958                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
959                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
960                         error = EINVAL;
961                 } else {
962                         IXGBE_CORE_LOCK(adapter);
963                         ifp->if_mtu = ifr->ifr_mtu;
964                         adapter->max_frame_size =
965                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
966                         ixgbe_init_locked(adapter);
967                         IXGBE_CORE_UNLOCK(adapter);
968                 }
969                 break;
970         case SIOCSIFFLAGS:
971                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
972                 IXGBE_CORE_LOCK(adapter);
973                 if (ifp->if_flags & IFF_UP) {
974                         if ((ifp->if_flags & IFF_RUNNING)) {
975                                 if ((ifp->if_flags ^ adapter->if_flags) &
976                                     (IFF_PROMISC | IFF_ALLMULTI)) {
977                                         ixgbe_set_promisc(adapter);
978                                 }
979                         } else
980                                 ixgbe_init_locked(adapter);
981                 } else
982                         if (ifp->if_flags & IFF_RUNNING)
983                                 ixgbe_stop(adapter);
984                 adapter->if_flags = ifp->if_flags;
985                 IXGBE_CORE_UNLOCK(adapter);
986                 break;
987         case SIOCADDMULTI:
988         case SIOCDELMULTI:
989                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
990                 if (ifp->if_flags & IFF_RUNNING) {
991                         IXGBE_CORE_LOCK(adapter);
992                         ixgbe_disable_intr(adapter);
993                         ixgbe_set_multi(adapter);
994                         ixgbe_enable_intr(adapter);
995                         IXGBE_CORE_UNLOCK(adapter);
996                 }
997                 break;
998         case SIOCSIFMEDIA:
999         case SIOCGIFMEDIA:
1000                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1001                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1002                 break;
1003         case SIOCSIFCAP:
1004         {
1005                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1006                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1007                 if (mask & IFCAP_HWCSUM)
1008                         ifp->if_capenable ^= IFCAP_HWCSUM;
1009                 if (mask & IFCAP_TSO4)
1010                         ifp->if_capenable ^= IFCAP_TSO4;
1011                 if (mask & IFCAP_TSO6)
1012                         ifp->if_capenable ^= IFCAP_TSO6;
1013 #if 0 /* NET_LRO */
1014                 if (mask & IFCAP_LRO)
1015                         ifp->if_capenable ^= IFCAP_LRO;
1016 #endif
1017                 if (mask & IFCAP_VLAN_HWTAGGING)
1018                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1019                 if (mask & IFCAP_VLAN_HWFILTER)
1020                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1021 #if 0 /* NET_TSO */
1022                 if (mask & IFCAP_VLAN_HWTSO)
1023                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1024 #endif
1025                 if (ifp->if_flags & IFF_RUNNING) {
1026                         IXGBE_CORE_LOCK(adapter);
1027                         ixgbe_init_locked(adapter);
1028                         IXGBE_CORE_UNLOCK(adapter);
1029                 }
1030 #if 0
1031                 VLAN_CAPABILITIES(ifp);
1032 #endif
1033                 break;
1034         }
1035
1036         default:
1037                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1038                 error = ether_ioctl(ifp, command, data);
1039                 break;
1040         }
1041
1042         return (error);
1043 }
1044
1045 /*********************************************************************
1046  *  Init entry point
1047  *
1048  *  This routine is used in two ways. It is used by the stack as
1049  *  init entry point in network interface structure. It is also used
1050  *  by the driver as a hw/sw initialization routine to get to a
1051  *  consistent state.
1052  *
1053  *  return 0 on success, positive on failure
1054  **********************************************************************/
1055 #define IXGBE_MHADD_MFS_SHIFT 16
1056
1057 static void
1058 ixgbe_init_locked(struct adapter *adapter)
1059 {
1060         struct ifnet   *ifp = adapter->ifp;
1061         device_t        dev = adapter->dev;
1062         struct ixgbe_hw *hw = &adapter->hw;
1063         u32             k, txdctl, mhadd, gpie;
1064         u32             rxdctl, rxctrl;
1065
1066         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1067         INIT_DEBUGOUT("ixgbe_init: begin");
1068         hw->adapter_stopped = FALSE;
1069         ixgbe_stop_adapter(hw);
1070         callout_stop(&adapter->timer);
1071
1072         /* reprogram the RAR[0] in case user changed it. */
1073         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1074
1075         /* Get the latest mac address, User can use a LAA */
1076         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1077               IXGBE_ETH_LENGTH_OF_ADDRESS);
1078         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1079         hw->addr_ctrl.rar_used_count = 1;
1080
1081         /* Set the various hardware offload abilities */
1082         ifp->if_hwassist = 0;
1083         if (ifp->if_capenable & IFCAP_TSO)
1084                 ifp->if_hwassist |= CSUM_TSO;
1085         if (ifp->if_capenable & IFCAP_TXCSUM) {
1086                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1087 #if 0
1088                 if (hw->mac.type != ixgbe_mac_82598EB)
1089                         ifp->if_hwassist |= CSUM_SCTP;
1090 #endif
1091         }
1092
1093         /* Prepare transmit descriptors and buffers */
1094         if (ixgbe_setup_transmit_structures(adapter)) {
1095                 device_printf(dev,"Could not setup transmit structures\n");
1096                 ixgbe_stop(adapter);
1097                 return;
1098         }
1099
1100         ixgbe_init_hw(hw);
1101         ixgbe_initialize_transmit_units(adapter);
1102
1103         /* Setup Multicast table */
1104         ixgbe_set_multi(adapter);
1105
1106         /*
1107         ** Determine the correct mbuf pool
1108         ** for doing jumbo/headersplit
1109         */
1110         if (adapter->max_frame_size <= 2048)
1111                 adapter->rx_mbuf_sz = MCLBYTES;
1112         else if (adapter->max_frame_size <= 4096)
1113                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1114         else if (adapter->max_frame_size <= 9216)
1115                 adapter->rx_mbuf_sz = MJUM9BYTES;
1116         else
1117                 adapter->rx_mbuf_sz = MJUM16BYTES;
1118
1119         /* Prepare receive descriptors and buffers */
1120         if (ixgbe_setup_receive_structures(adapter)) {
1121                 device_printf(dev,"Could not setup receive structures\n");
1122                 ixgbe_stop(adapter);
1123                 return;
1124         }
1125
1126         /* Configure RX settings */
1127         ixgbe_initialize_receive_units(adapter);
1128
1129         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1130
1131         /* Enable Fan Failure Interrupt */
1132         gpie |= IXGBE_SDP1_GPIEN;
1133
1134         /* Add for Module detection */
1135         if (hw->mac.type == ixgbe_mac_82599EB)
1136                 gpie |= IXGBE_SDP2_GPIEN;
1137
1138         /* Thermal Failure Detection */
1139         if (hw->mac.type == ixgbe_mac_X540)
1140                 gpie |= IXGBE_SDP0_GPIEN;
1141
1142         if (adapter->msix > 1) {
1143                 /* Enable Enhanced MSIX mode */
1144                 gpie |= IXGBE_GPIE_MSIX_MODE;
1145                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1146                     IXGBE_GPIE_OCD;
1147         }
1148         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1149
1150         /* Set MTU size */
1151         if (ifp->if_mtu > ETHERMTU) {
1152                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1153                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1154                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1155                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1156         }
1157         
1158         /* Now enable all the queues */
1159
1160         for (int i = 0; i < adapter->num_queues; i++) {
1161                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1162                 txdctl |= IXGBE_TXDCTL_ENABLE;
1163                 /* Set WTHRESH to 8, burst writeback */
1164                 txdctl |= (8 << 16);
1165                 /*
1166                  * When the internal queue falls below PTHRESH (32),
1167                  * start prefetching as long as there are at least
1168                  * HTHRESH (1) buffers ready. The values are taken
1169                  * from the Intel linux driver 3.8.21.
1170                  * Prefetching enables tx line rate even with 1 queue.
1171                  */
1172                 txdctl |= (32 << 0) | (1 << 8);
1173                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1174         }
1175
1176         for (int i = 0; i < adapter->num_queues; i++) {
1177                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1178                 if (hw->mac.type == ixgbe_mac_82598EB) {
1179                         /*
1180                         ** PTHRESH = 21
1181                         ** HTHRESH = 4
1182                         ** WTHRESH = 8
1183                         */
1184                         rxdctl &= ~0x3FFFFF;
1185                         rxdctl |= 0x080420;
1186                 }
1187                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1188                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1189                 for (k = 0; k < 10; k++) {
1190                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1191                             IXGBE_RXDCTL_ENABLE)
1192                                 break;
1193                         else
1194                                 msec_delay(1);
1195                 }
1196                 wmb();
1197 #ifdef DEV_NETMAP
1198                 /*
1199                  * In netmap mode, we must preserve the buffers made
1200                  * available to userspace before the if_init()
1201                  * (this is true by default on the TX side, because
1202                  * init makes all buffers available to userspace).
1203                  *
1204                  * netmap_reset() and the device specific routines
1205                  * (e.g. ixgbe_setup_receive_rings()) map these
1206                  * buffers at the end of the NIC ring, so here we
1207                  * must set the RDT (tail) register to make sure
1208                  * they are not overwritten.
1209                  *
1210                  * In this driver the NIC ring starts at RDH = 0,
1211                  * RDT points to the last slot available for reception (?),
1212                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1213                  */
1214                 if (ifp->if_capenable & IFCAP_NETMAP) {
1215                         struct netmap_adapter *na = NA(adapter->ifp);
1216                         struct netmap_kring *kring = &na->rx_rings[i];
1217                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1218
1219                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1220                 } else
1221 #endif /* DEV_NETMAP */
1222                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1223         }
1224
1225         /* Set up VLAN support and filter */
1226         ixgbe_setup_vlan_hw_support(adapter);
1227
1228         /* Enable Receive engine */
1229         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1230         if (hw->mac.type == ixgbe_mac_82598EB)
1231                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1232         rxctrl |= IXGBE_RXCTRL_RXEN;
1233         ixgbe_enable_rx_dma(hw, rxctrl);
1234
1235         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1236
1237         /* Set up MSI/X routing */
1238         if (ixgbe_enable_msix)  {
1239                 ixgbe_configure_ivars(adapter);
1240                 /* Set up auto-mask */
1241                 if (hw->mac.type == ixgbe_mac_82598EB)
1242                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1243                 else {
1244                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1245                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1246                 }
1247         } else {  /* Simple settings for Legacy/MSI */
1248                 ixgbe_set_ivar(adapter, 0, 0, 0);
1249                 ixgbe_set_ivar(adapter, 0, 0, 1);
1250                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1251         }
1252
1253 #ifdef IXGBE_FDIR
1254         /* Init Flow director */
1255         if (hw->mac.type != ixgbe_mac_82598EB) {
1256                 u32 hdrm = 32 << fdir_pballoc;
1257
1258                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1259                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1260         }
1261 #endif
1262
1263         /*
1264         ** Check on any SFP devices that
1265         ** need to be kick-started
1266         */
1267         if (hw->phy.type == ixgbe_phy_none) {
1268                 int err = hw->phy.ops.identify(hw);
1269                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1270                         device_printf(dev,
1271                             "Unsupported SFP+ module type was detected.\n");
1272                         return;
1273                 }
1274         }
1275
1276         /* Set moderation on the Link interrupt */
1277         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1278
1279         /* Config/Enable Link */
1280         ixgbe_config_link(adapter);
1281
1282         /* Hardware Packet Buffer & Flow Control setup */
1283         {
1284                 u32 rxpb, frame, size, tmp;
1285
1286                 frame = adapter->max_frame_size;
1287
1288                 /* Calculate High Water */
1289                 if (hw->mac.type == ixgbe_mac_X540)
1290                         tmp = IXGBE_DV_X540(frame, frame);
1291                 else
1292                         tmp = IXGBE_DV(frame, frame);
1293                 size = IXGBE_BT2KB(tmp);
1294                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1295                 hw->fc.high_water[0] = rxpb - size;
1296
1297                 /* Now calculate Low Water */
1298                 if (hw->mac.type == ixgbe_mac_X540)
1299                         tmp = IXGBE_LOW_DV_X540(frame);
1300                 else
1301                         tmp = IXGBE_LOW_DV(frame);
1302                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1303                 
1304                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1305                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1306                 hw->fc.send_xon = TRUE;
1307         }
1308         /* Initialize the FC settings */
1309         ixgbe_start_hw(hw);
1310
1311         /* And now turn on interrupts */
1312         ixgbe_enable_intr(adapter);
1313
1314         /* Now inform the stack we're ready */
1315         ifp->if_flags |= IFF_RUNNING;
1316         ifp->if_flags &= ~IFF_OACTIVE;
1317
1318         return;
1319 }
1320
1321 static void
1322 ixgbe_init(void *arg)
1323 {
1324         struct adapter *adapter = arg;
1325
1326         IXGBE_CORE_LOCK(adapter);
1327         ixgbe_init_locked(adapter);
1328         IXGBE_CORE_UNLOCK(adapter);
1329         return;
1330 }
1331
1332
1333 /*
1334 **
1335 ** MSIX Interrupt Handlers and Tasklets
1336 **
1337 */
1338
1339 static inline void
1340 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1341 {
1342         struct ixgbe_hw *hw = &adapter->hw;
1343         u64     queue = (u64)(1 << vector);
1344         u32     mask;
1345
1346         if (hw->mac.type == ixgbe_mac_82598EB) {
1347                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1348                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1349         } else {
1350                 mask = (queue & 0xFFFFFFFF);
1351                 if (mask)
1352                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1353                 mask = (queue >> 32);
1354                 if (mask)
1355                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1356         }
1357 }
1358
1359 static inline void
1360 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1361 {
1362         struct ixgbe_hw *hw = &adapter->hw;
1363         u64     queue = (u64)(1 << vector);
1364         u32     mask;
1365
1366         if (hw->mac.type == ixgbe_mac_82598EB) {
1367                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1368                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1369         } else {
1370                 mask = (queue & 0xFFFFFFFF);
1371                 if (mask)
1372                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1373                 mask = (queue >> 32);
1374                 if (mask)
1375                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1376         }
1377 }
1378
1379 static inline void
1380 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1381 {
1382         u32 mask;
1383
1384         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1385                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1386                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1387         } else {
1388                 mask = (queues & 0xFFFFFFFF);
1389                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1390                 mask = (queues >> 32);
1391                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1392         }
1393 }
1394
1395
1396 static void
1397 ixgbe_handle_que(void *context, int pending)
1398 {
1399         struct ix_queue *que = context;
1400         struct adapter  *adapter = que->adapter;
1401         struct tx_ring  *txr = que->txr;
1402         struct ifnet    *ifp = adapter->ifp;
1403         bool            more;
1404
1405         if (ifp->if_flags & IFF_RUNNING) {
1406                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1407                 IXGBE_TX_LOCK(txr);
1408                 ixgbe_txeof(txr);
1409 #if 0 /*__FreeBSD_version >= 800000*/
1410                 if (!drbr_empty(ifp, txr->br))
1411                         ixgbe_mq_start_locked(ifp, txr, NULL);
1412 #else
1413                 if (!ifq_is_empty(&ifp->if_snd))
1414                         ixgbe_start_locked(txr, ifp);
1415 #endif
1416                 IXGBE_TX_UNLOCK(txr);
1417                 if (more) {
1418                         taskqueue_enqueue(que->tq, &que->que_task);
1419                         return;
1420                 }
1421         }
1422
1423         /* Reenable this interrupt */
1424         ixgbe_enable_queue(adapter, que->msix);
1425         return;
1426 }
1427
1428
1429 /*********************************************************************
1430  *
1431  *  Legacy Interrupt Service routine
1432  *
1433  **********************************************************************/
1434
1435 static void
1436 ixgbe_legacy_irq(void *arg)
1437 {
1438         struct ix_queue *que = arg;
1439         struct adapter  *adapter = que->adapter;
1440         struct ixgbe_hw *hw = &adapter->hw;
1441         struct          tx_ring *txr = adapter->tx_rings;
1442         bool            more_tx, more_rx;
1443         u32             reg_eicr, loop = MAX_LOOP;
1444
1445
1446         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1447
1448         ++que->irqs;
1449         if (reg_eicr == 0) {
1450                 ixgbe_enable_intr(adapter);
1451                 return;
1452         }
1453
1454         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1455
1456         IXGBE_TX_LOCK(txr);
1457         do {
1458                 more_tx = ixgbe_txeof(txr);
1459         } while (loop-- && more_tx);
1460         IXGBE_TX_UNLOCK(txr);
1461
1462         if (more_rx || more_tx)
1463                 taskqueue_enqueue(que->tq, &que->que_task);
1464
1465         /* Check for fan failure */
1466         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1467             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1468                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1469                     "REPLACE IMMEDIATELY!!\n");
1470                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1471         }
1472
1473         /* Link status change */
1474         if (reg_eicr & IXGBE_EICR_LSC)
1475                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1476
1477         ixgbe_enable_intr(adapter);
1478         return;
1479 }
1480
1481
1482 /*********************************************************************
1483  *
1484  *  MSIX Queue Interrupt Service routine
1485  *
1486  **********************************************************************/
1487 void
1488 ixgbe_msix_que(void *arg)
1489 {
1490         struct ix_queue *que = arg;
1491         struct adapter  *adapter = que->adapter;
1492         struct tx_ring  *txr = que->txr;
1493         struct rx_ring  *rxr = que->rxr;
1494         bool            more_tx, more_rx;
1495         u32             newitr = 0;
1496
1497         ixgbe_disable_queue(adapter, que->msix);
1498         ++que->irqs;
1499
1500         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1501
1502         IXGBE_TX_LOCK(txr);
1503         more_tx = ixgbe_txeof(txr);
1504         /*
1505         ** Make certain that if the stack 
1506         ** has anything queued the task gets
1507         ** scheduled to handle it.
1508         */
1509 #if 0
1510 #if __FreeBSD_version < 800000
1511         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1512 #else
1513         if (!drbr_empty(adapter->ifp, txr->br))
1514 #endif
1515 #endif
1516         if (!ifq_is_empty(&adapter->ifp->if_snd))
1517                 more_tx = 1;
1518         IXGBE_TX_UNLOCK(txr);
1519
1520         /* Do AIM now? */
1521
1522         if (ixgbe_enable_aim == FALSE)
1523                 goto no_calc;
1524         /*
1525         ** Do Adaptive Interrupt Moderation:
1526         **  - Write out last calculated setting
1527         **  - Calculate based on average size over
1528         **    the last interval.
1529         */
1530         if (que->eitr_setting)
1531                 IXGBE_WRITE_REG(&adapter->hw,
1532                     IXGBE_EITR(que->msix), que->eitr_setting);
1533  
1534         que->eitr_setting = 0;
1535
1536         /* Idle, do nothing */
1537         if ((txr->bytes == 0) && (rxr->bytes == 0))
1538                 goto no_calc;
1539                                 
1540         if ((txr->bytes) && (txr->packets))
1541                 newitr = txr->bytes/txr->packets;
1542         if ((rxr->bytes) && (rxr->packets))
1543                 newitr = max(newitr,
1544                     (rxr->bytes / rxr->packets));
1545         newitr += 24; /* account for hardware frame, crc */
1546
1547         /* set an upper boundary */
1548         newitr = min(newitr, 3000);
1549
1550         /* Be nice to the mid range */
1551         if ((newitr > 300) && (newitr < 1200))
1552                 newitr = (newitr / 3);
1553         else
1554                 newitr = (newitr / 2);
1555
1556         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1557                 newitr |= newitr << 16;
1558         else
1559                 newitr |= IXGBE_EITR_CNT_WDIS;
1560                  
1561         /* save for next interrupt */
1562         que->eitr_setting = newitr;
1563
1564         /* Reset state */
1565         txr->bytes = 0;
1566         txr->packets = 0;
1567         rxr->bytes = 0;
1568         rxr->packets = 0;
1569
1570 no_calc:
1571         if (more_tx || more_rx)
1572                 taskqueue_enqueue(que->tq, &que->que_task);
1573         else /* Reenable this interrupt */
1574                 ixgbe_enable_queue(adapter, que->msix);
1575         return;
1576 }
1577
1578
1579 static void
1580 ixgbe_msix_link(void *arg)
1581 {
1582         struct adapter  *adapter = arg;
1583         struct ixgbe_hw *hw = &adapter->hw;
1584         u32             reg_eicr;
1585
1586         ++adapter->link_irq;
1587
1588         /* First get the cause */
1589         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1590         /* Clear interrupt with write */
1591         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1592
1593         /* Link status change */
1594         if (reg_eicr & IXGBE_EICR_LSC)
1595                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1596
1597         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1598 #ifdef IXGBE_FDIR
1599                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1600                         /* This is probably overkill :) */
1601                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1602                                 return;
1603                         /* Disable the interrupt */
1604                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1605                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1606                 } else
1607 #endif
1608                 if (reg_eicr & IXGBE_EICR_ECC) {
1609                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1610                             "Please Reboot!!\n");
1611                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1612                 } else
1613
1614                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1615                         /* Clear the interrupt */
1616                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1617                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1618                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1619                         /* Clear the interrupt */
1620                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1621                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1622                 }
1623         } 
1624
1625         /* Check for fan failure */
1626         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1627             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1628                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1629                     "REPLACE IMMEDIATELY!!\n");
1630                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1631         }
1632
1633         /* Check for over temp condition */
1634         if ((hw->mac.type == ixgbe_mac_X540) &&
1635             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1636                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1637                     "PHY IS SHUT DOWN!!\n");
1638                 device_printf(adapter->dev, "System shutdown required\n");
1639                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1640         }
1641
1642         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1643         return;
1644 }
1645
1646 /*********************************************************************
1647  *
1648  *  Media Ioctl callback
1649  *
1650  *  This routine is called whenever the user queries the status of
1651  *  the interface using ifconfig.
1652  *
1653  **********************************************************************/
1654 static void
1655 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1656 {
1657         struct adapter *adapter = ifp->if_softc;
1658
1659         INIT_DEBUGOUT("ixgbe_media_status: begin");
1660         IXGBE_CORE_LOCK(adapter);
1661         ixgbe_update_link_status(adapter);
1662
1663         ifmr->ifm_status = IFM_AVALID;
1664         ifmr->ifm_active = IFM_ETHER;
1665
1666         if (!adapter->link_active) {
1667                 IXGBE_CORE_UNLOCK(adapter);
1668                 return;
1669         }
1670
1671         ifmr->ifm_status |= IFM_ACTIVE;
1672
1673         switch (adapter->link_speed) {
1674                 case IXGBE_LINK_SPEED_100_FULL:
1675                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1676                         break;
1677                 case IXGBE_LINK_SPEED_1GB_FULL:
1678                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1679                         break;
1680                 case IXGBE_LINK_SPEED_10GB_FULL:
1681                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1682                         break;
1683         }
1684
1685         IXGBE_CORE_UNLOCK(adapter);
1686
1687         return;
1688 }
1689
1690 /*********************************************************************
1691  *
1692  *  Media Ioctl callback
1693  *
1694  *  This routine is called when the user changes speed/duplex using
1695  *  media/mediopt option with ifconfig.
1696  *
1697  **********************************************************************/
1698 static int
1699 ixgbe_media_change(struct ifnet * ifp)
1700 {
1701         struct adapter *adapter = ifp->if_softc;
1702         struct ifmedia *ifm = &adapter->media;
1703
1704         INIT_DEBUGOUT("ixgbe_media_change: begin");
1705
1706         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1707                 return (EINVAL);
1708
1709         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1710         case IFM_AUTO:
1711                 adapter->hw.phy.autoneg_advertised =
1712                     IXGBE_LINK_SPEED_100_FULL |
1713                     IXGBE_LINK_SPEED_1GB_FULL |
1714                     IXGBE_LINK_SPEED_10GB_FULL;
1715                 break;
1716         default:
1717                 device_printf(adapter->dev, "Only auto media type\n");
1718                 return (EINVAL);
1719         }
1720
1721         return (0);
1722 }
1723
1724 /*********************************************************************
1725  *
1726  *  This routine maps the mbufs to tx descriptors, allowing the
1727  *  TX engine to transmit the packets. 
1728  *      - return 0 on success, positive on failure
1729  *
1730  **********************************************************************/
1731
1732 static int
1733 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1734 {
1735         struct adapter  *adapter = txr->adapter;
1736         u32             olinfo_status = 0, cmd_type_len;
1737         u32             paylen = 0;
1738         int             i, j, error, nsegs, maxsegs;
1739         int             first, last = 0;
1740         struct mbuf     *m_head;
1741         bus_dma_segment_t segs[adapter->num_segs];
1742         bus_dmamap_t    map;
1743         struct ixgbe_tx_buf *txbuf;
1744         union ixgbe_adv_tx_desc *txd = NULL;
1745
1746         m_head = *m_headp;
1747
1748         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1749                 error = ixgbe_tso_pullup(txr, m_headp);
1750                 if (error)
1751                         return error;
1752                 m_head = *m_headp;
1753         }
1754
1755         /* Basic descriptor defines */
1756         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1757             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1758
1759         if (m_head->m_flags & M_VLANTAG)
1760                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1761
1762         /*
1763          * Important to capture the first descriptor
1764          * used because it will contain the index of
1765          * the one we tell the hardware to report back
1766          */
1767         first = txr->next_avail_desc;
1768         txbuf = &txr->tx_buffers[first];
1769         map = txbuf->map;
1770
1771         /*
1772          * Map the packet for DMA.
1773          */
1774         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1775         if (maxsegs > adapter->num_segs)
1776                 maxsegs = adapter->num_segs;
1777
1778         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1779             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1780         if (error) {
1781                 if (error == ENOBUFS)
1782                         adapter->mbuf_defrag_failed++;
1783                 else
1784                         adapter->no_tx_dma_setup++;
1785
1786                 m_freem(*m_headp);
1787                 *m_headp = NULL;
1788                 return (error);
1789         }
1790
1791         /* Make certain there are enough descriptors */
1792         if (nsegs > txr->tx_avail - 2) {
1793                 txr->no_desc_avail++;
1794                 error = ENOBUFS;
1795                 goto xmit_fail;
1796         }
1797         m_head = *m_headp;
1798
1799         /*
1800         ** Set up the appropriate offload context
1801         ** this becomes the first descriptor of 
1802         ** a packet.
1803         */
1804         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1805                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1806                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1807                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1808                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1809                         ++adapter->tso_tx;
1810                 } else
1811                         return (ENXIO);
1812         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1813                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1814
1815 #ifdef IXGBE_IEEE1588
1816         /* This is changing soon to an mtag detection */
1817         if (we detect this mbuf has a TSTAMP mtag)
1818                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1819 #endif
1820
1821 #ifdef IXGBE_FDIR
1822         /* Do the flow director magic */
1823         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1824                 ++txr->atr_count;
1825                 if (txr->atr_count >= atr_sample_rate) {
1826                         ixgbe_atr(txr, m_head);
1827                         txr->atr_count = 0;
1828                 }
1829         }
1830 #endif
1831         /* Record payload length */
1832         if (paylen == 0)
1833                 olinfo_status |= m_head->m_pkthdr.len <<
1834                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1835
1836         i = txr->next_avail_desc;
1837         for (j = 0; j < nsegs; j++) {
1838                 bus_size_t seglen;
1839                 bus_addr_t segaddr;
1840
1841                 txbuf = &txr->tx_buffers[i];
1842                 txd = &txr->tx_base[i];
1843                 seglen = segs[j].ds_len;
1844                 segaddr = htole64(segs[j].ds_addr);
1845
1846                 txd->read.buffer_addr = segaddr;
1847                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1848                     cmd_type_len |seglen);
1849                 txd->read.olinfo_status = htole32(olinfo_status);
1850                 last = i; /* descriptor that will get completion IRQ */
1851
1852                 if (++i == adapter->num_tx_desc)
1853                         i = 0;
1854
1855                 txbuf->m_head = NULL;
1856                 txbuf->eop_index = -1;
1857         }
1858
1859         txd->read.cmd_type_len |=
1860             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1861         txr->tx_avail -= nsegs;
1862         txr->next_avail_desc = i;
1863
1864         txbuf->m_head = m_head;
1865         /* Swap the dma map between the first and last descriptor */
1866         txr->tx_buffers[first].map = txbuf->map;
1867         txbuf->map = map;
1868         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1869
1870         /* Set the index of the descriptor that will be marked done */
1871         txbuf = &txr->tx_buffers[first];
1872         txbuf->eop_index = last;
1873
1874         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1875             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1876         /*
1877          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1878          * hardware that this frame is available to transmit.
1879          */
1880         ++txr->total_packets;
1881         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1882
1883         return (0);
1884
1885 xmit_fail:
1886         bus_dmamap_unload(txr->txtag, txbuf->map);
1887         return (error);
1888
1889 }
1890
1891 static void
1892 ixgbe_set_promisc(struct adapter *adapter)
1893 {
1894         u_int32_t       reg_rctl;
1895         struct ifnet   *ifp = adapter->ifp;
1896
1897         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1898         reg_rctl &= (~IXGBE_FCTRL_UPE);
1899         reg_rctl &= (~IXGBE_FCTRL_MPE);
1900         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1901
1902         if (ifp->if_flags & IFF_PROMISC) {
1903                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1904                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1905         } else if (ifp->if_flags & IFF_ALLMULTI) {
1906                 reg_rctl |= IXGBE_FCTRL_MPE;
1907                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1908                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1909         }
1910         return;
1911 }
1912
1913
1914 /*********************************************************************
1915  *  Multicast Update
1916  *
1917  *  This routine is called whenever multicast address list is updated.
1918  *
1919  **********************************************************************/
1920 #define IXGBE_RAR_ENTRIES 16
1921
1922 static void
1923 ixgbe_set_multi(struct adapter *adapter)
1924 {
1925         u32     fctrl;
1926         u8      *mta;
1927         u8      *update_ptr;
1928         struct  ifmultiaddr *ifma;
1929         int     mcnt = 0;
1930         struct ifnet   *ifp = adapter->ifp;
1931
1932         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1933
1934         mta = adapter->mta;
1935         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1936             MAX_NUM_MULTICAST_ADDRESSES);
1937
1938         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1939         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1940         if (ifp->if_flags & IFF_PROMISC)
1941                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1942         else if (ifp->if_flags & IFF_ALLMULTI) {
1943                 fctrl |= IXGBE_FCTRL_MPE;
1944                 fctrl &= ~IXGBE_FCTRL_UPE;
1945         } else
1946                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1947         
1948         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1949
1950         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1951                 if (ifma->ifma_addr->sa_family != AF_LINK)
1952                         continue;
1953                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1954                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1955                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1956                 mcnt++;
1957         }
1958
1959         update_ptr = mta;
1960         ixgbe_update_mc_addr_list(&adapter->hw,
1961             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1962
1963         return;
1964 }
1965
1966 /*
1967  * This is an iterator function now needed by the multicast
1968  * shared code. It simply feeds the shared code routine the
1969  * addresses in the array of ixgbe_set_multi() one by one.
1970  */
1971 static u8 *
1972 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1973 {
1974         u8 *addr = *update_ptr;
1975         u8 *newptr;
1976         *vmdq = 0;
1977
1978         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1979         *update_ptr = newptr;
1980         return addr;
1981 }
1982
1983
1984 /*********************************************************************
1985  *  Timer routine
1986  *
1987  *  This routine checks for link status,updates statistics,
1988  *  and runs the watchdog check.
1989  *
1990  **********************************************************************/
1991
1992 static void
1993 ixgbe_local_timer(void *arg)
1994 {
1995         struct adapter  *adapter = arg;
1996         device_t        dev = adapter->dev;
1997         struct ifnet    *ifp = adapter->ifp;
1998         struct ix_queue *que = adapter->queues;
1999         struct tx_ring  *txr = adapter->tx_rings;
2000         int             hung, busy, paused;
2001
2002         IXGBE_CORE_LOCK(adapter);
2003         hung = busy = paused = 0;
2004
2005         /* Check for pluggable optics */
2006         if (adapter->sfp_probe)
2007                 if (!ixgbe_sfp_probe(adapter))
2008                         goto out; /* Nothing to do */
2009
2010         ixgbe_update_link_status(adapter);
2011         ixgbe_update_stats_counters(adapter);
2012
2013         /*
2014          * If the interface has been paused
2015          * then don't do the watchdog check
2016          */
2017         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2018                 paused = 1;
2019
2020         /*
2021         ** Check the TX queues status
2022         **      - central locked handling of OACTIVE
2023         **      - watchdog only if all queues show hung
2024         */          
2025         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2026                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
2027                     (paused == 0))
2028                         ++hung;
2029                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
2030                         ++busy;
2031                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
2032                         taskqueue_enqueue(que->tq, &que->que_task);
2033         }
2034         /* Only truely watchdog if all queues show hung */
2035         if (hung == adapter->num_queues)
2036                 goto watchdog;
2037         /* Only turn off the stack flow when ALL are depleted */
2038         if (busy == adapter->num_queues)
2039                 ifp->if_flags |= IFF_OACTIVE;
2040         else if ((ifp->if_flags & IFF_OACTIVE) &&
2041             (busy < adapter->num_queues))
2042                 ifp->if_flags &= ~IFF_OACTIVE;
2043
2044 out:
2045         ixgbe_rearm_queues(adapter, adapter->que_mask);
2046         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2047         IXGBE_CORE_UNLOCK(adapter);
2048         return;
2049
2050 watchdog:
2051         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2052         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2053             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2054             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2055         device_printf(dev,"TX(%d) desc avail = %d,"
2056             "Next TX to Clean = %d\n",
2057             txr->me, txr->tx_avail, txr->next_to_clean);
2058         adapter->ifp->if_flags &= ~IFF_RUNNING;
2059         adapter->watchdog_events++;
2060         ixgbe_init_locked(adapter);
2061
2062         IXGBE_CORE_UNLOCK(adapter);
2063 }
2064
2065 /*
2066 ** Note: this routine updates the OS on the link state
2067 **      the real check of the hardware only happens with
2068 **      a link interrupt.
2069 */
2070 static void
2071 ixgbe_update_link_status(struct adapter *adapter)
2072 {
2073         struct ifnet    *ifp = adapter->ifp;
2074         struct tx_ring *txr = adapter->tx_rings;
2075         device_t dev = adapter->dev;
2076
2077
2078         if (adapter->link_up){ 
2079                 if (adapter->link_active == FALSE) {
2080                         if (bootverbose)
2081                                 device_printf(dev,"Link is up %d Gbps %s \n",
2082                                     ((adapter->link_speed == 128)? 10:1),
2083                                     "Full Duplex");
2084                         adapter->link_active = TRUE;
2085                         /* Update any Flow Control changes */
2086                         ixgbe_fc_enable(&adapter->hw);
2087                         ifp->if_link_state = LINK_STATE_UP;
2088                         if_link_state_change(ifp);
2089                 }
2090         } else { /* Link down */
2091                 if (adapter->link_active == TRUE) {
2092                         if (bootverbose)
2093                                 device_printf(dev,"Link is Down\n");
2094                         ifp->if_link_state = LINK_STATE_DOWN;
2095                         if_link_state_change(ifp);
2096                         adapter->link_active = FALSE;
2097                         for (int i = 0; i < adapter->num_queues;
2098                             i++, txr++)
2099                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2100                 }
2101         }
2102
2103         return;
2104 }
2105
2106
2107 /*********************************************************************
2108  *
2109  *  This routine disables all traffic on the adapter by issuing a
2110  *  global reset on the MAC and deallocates TX/RX buffers.
2111  *
2112  **********************************************************************/
2113
2114 static void
2115 ixgbe_stop(void *arg)
2116 {
2117         struct ifnet   *ifp;
2118         struct adapter *adapter = arg;
2119         struct ixgbe_hw *hw = &adapter->hw;
2120         ifp = adapter->ifp;
2121
2122         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2123
2124         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2125         ixgbe_disable_intr(adapter);
2126         callout_stop(&adapter->timer);
2127
2128         /* Let the stack know...*/
2129         ifp->if_flags &= ~IFF_RUNNING;
2130         ifp->if_flags |= IFF_OACTIVE;
2131
2132         ixgbe_reset_hw(hw);
2133         hw->adapter_stopped = FALSE;
2134         ixgbe_stop_adapter(hw);
2135         /* Turn off the laser */
2136         if (hw->phy.multispeed_fiber)
2137                 ixgbe_disable_tx_laser(hw);
2138
2139         /* reprogram the RAR[0] in case user changed it. */
2140         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2141
2142         return;
2143 }
2144
2145
2146 /*********************************************************************
2147  *
2148  *  Determine hardware revision.
2149  *
2150  **********************************************************************/
2151 static void
2152 ixgbe_identify_hardware(struct adapter *adapter)
2153 {
2154         device_t        dev = adapter->dev;
2155         struct ixgbe_hw *hw = &adapter->hw;
2156
2157         /* Save off the information about this board */
2158         hw->vendor_id = pci_get_vendor(dev);
2159         hw->device_id = pci_get_device(dev);
2160         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2161         hw->subsystem_vendor_id =
2162             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2163         hw->subsystem_device_id =
2164             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2165
2166         /* We need this here to set the num_segs below */
2167         ixgbe_set_mac_type(hw);
2168
2169         /* Pick up the 82599 and VF settings */
2170         if (hw->mac.type != ixgbe_mac_82598EB) {
2171                 hw->phy.smart_speed = ixgbe_smart_speed;
2172                 adapter->num_segs = IXGBE_82599_SCATTER;
2173         } else
2174                 adapter->num_segs = IXGBE_82598_SCATTER;
2175
2176         return;
2177 }
2178
2179 /*********************************************************************
2180  *
2181  *  Determine optic type
2182  *
2183  **********************************************************************/
2184 static void
2185 ixgbe_setup_optics(struct adapter *adapter)
2186 {
2187         struct ixgbe_hw *hw = &adapter->hw;
2188         int             layer;
2189         
2190         layer = ixgbe_get_supported_physical_layer(hw);
2191
2192         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2193                 adapter->optics = IFM_10G_T;
2194                 return;
2195         }
2196
2197         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2198                 adapter->optics = IFM_1000_T;
2199                 return;
2200         }
2201
2202         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2203             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2204                 adapter->optics = IFM_10G_LR;
2205                 return;
2206         }
2207
2208         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2209                 adapter->optics = IFM_10G_SR;
2210                 return;
2211         }
2212
2213         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2214                 adapter->optics = IFM_10G_TWINAX;
2215                 return;
2216         }
2217
2218         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2219             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2220                 adapter->optics = IFM_10G_CX4;
2221                 return;
2222         }
2223
2224         /* If we get here just set the default */
2225         adapter->optics = IFM_ETHER | IFM_AUTO;
2226         return;
2227 }
2228
2229 /*********************************************************************
2230  *
2231  *  Setup the Legacy or MSI Interrupt handler
2232  *
2233  **********************************************************************/
2234 static int
2235 ixgbe_allocate_legacy(struct adapter *adapter)
2236 {
2237         device_t dev = adapter->dev;
2238         struct          ix_queue *que = adapter->queues;
2239         int error, rid = 0;
2240         unsigned int intr_flags;
2241
2242         /* MSI RID at 1 */
2243         if (adapter->msix == 1)
2244                 rid = 1;
2245
2246         /* Try allocating a MSI interrupt first */
2247         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2248                 &rid, &intr_flags);
2249
2250         /* We allocate a single interrupt resource */
2251         adapter->res = bus_alloc_resource_any(dev,
2252             SYS_RES_IRQ, &rid, intr_flags);
2253         if (adapter->res == NULL) {
2254                 device_printf(dev, "Unable to allocate bus resource: "
2255                     "interrupt\n");
2256                 return (ENXIO);
2257         }
2258
2259         /*
2260          * Try allocating a fast interrupt and the associated deferred
2261          * processing contexts.
2262          */
2263         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2264         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2265             taskqueue_thread_enqueue, &que->tq);
2266         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2267             device_get_nameunit(adapter->dev));
2268
2269         /* Tasklets for Link, SFP and Multispeed Fiber */
2270         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2271         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2272         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2273 #ifdef IXGBE_FDIR
2274         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2275 #endif
2276         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2277             taskqueue_thread_enqueue, &adapter->tq);
2278         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2279             device_get_nameunit(adapter->dev));
2280
2281         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2282             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2283                 device_printf(dev, "Failed to register fast interrupt "
2284                     "handler: %d\n", error);
2285                 taskqueue_free(que->tq);
2286                 taskqueue_free(adapter->tq);
2287                 que->tq = NULL;
2288                 adapter->tq = NULL;
2289                 return (error);
2290         }
2291         /* For simplicity in the handlers */
2292         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2293
2294         return (0);
2295 }
2296
2297
2298 /*********************************************************************
2299  *
2300  *  Setup MSIX Interrupt resources and handlers 
2301  *
2302  **********************************************************************/
2303 static int
2304 ixgbe_allocate_msix(struct adapter *adapter)
2305 {
2306         device_t        dev = adapter->dev;
2307         struct          ix_queue *que = adapter->queues;
2308         int             error, rid, vector = 0;
2309
2310         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2311                 rid = vector + 1;
2312                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2313                     RF_SHAREABLE | RF_ACTIVE);
2314                 if (que->res == NULL) {
2315                         device_printf(dev,"Unable to allocate"
2316                             " bus resource: que interrupt [%d]\n", vector);
2317                         return (ENXIO);
2318                 }
2319                 /* Set the handler function */
2320                 error = bus_setup_intr(dev, que->res, INTR_MPSAFE,
2321                     ixgbe_msix_que, que, &que->tag, &que->serializer);
2322                 if (error) {
2323                         que->res = NULL;
2324                         device_printf(dev, "Failed to register QUE handler");
2325                         return (error);
2326                 }
2327 #if 0 /* __FreeBSD_version >= 800504 */
2328                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2329 #endif
2330                 que->msix = vector;
2331                 adapter->que_mask |= (u64)(1 << que->msix);
2332                 /*
2333                 ** Bind the msix vector, and thus the
2334                 ** ring to the corresponding cpu.
2335                 */
2336 #if 0 /* XXX */
2337                 if (adapter->num_queues > 1)
2338                         bus_bind_intr(dev, que->res, i);
2339 #endif
2340
2341                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2342                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2343                     taskqueue_thread_enqueue, &que->tq);
2344                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2345                     device_get_nameunit(adapter->dev));
2346         }
2347
2348         /* and Link */
2349         rid = vector + 1;
2350         adapter->res = bus_alloc_resource_any(dev,
2351             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2352         if (!adapter->res) {
2353                 device_printf(dev,"Unable to allocate"
2354             " bus resource: Link interrupt [%d]\n", rid);
2355                 return (ENXIO);
2356         }
2357         /* Set the link handler function */
2358         error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2359             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer);
2360         if (error) {
2361                 adapter->res = NULL;
2362                 device_printf(dev, "Failed to register LINK handler");
2363                 return (error);
2364         }
2365 #if 0 /* __FreeBSD_version >= 800504 */
2366         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2367 #endif
2368         adapter->linkvec = vector;
2369         /* Tasklets for Link, SFP and Multispeed Fiber */
2370         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2371         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2372         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2373 #ifdef IXGBE_FDIR
2374         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2375 #endif
2376         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2377             taskqueue_thread_enqueue, &adapter->tq);
2378         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2379             device_get_nameunit(adapter->dev));
2380
2381         return (0);
2382 }
2383
2384 #if 0   /* HAVE_MSIX */
2385 /*
2386  * Setup Either MSI/X or MSI
2387  */
2388 static int
2389 ixgbe_setup_msix(struct adapter *adapter)
2390 {
2391         device_t dev = adapter->dev;
2392         int rid, want, queues, msgs;
2393
2394         /* Override by tuneable */
2395         if (ixgbe_enable_msix == 0)
2396                 goto msi;
2397
2398         /* First try MSI/X */
2399         rid = PCIR_BAR(MSIX_82598_BAR);
2400         adapter->msix_mem = bus_alloc_resource_any(dev,
2401             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2402         if (!adapter->msix_mem) {
2403                 rid += 4;       /* 82599 maps in higher BAR */
2404                 adapter->msix_mem = bus_alloc_resource_any(dev,
2405                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2406         }
2407         if (!adapter->msix_mem) {
2408                 /* May not be enabled */
2409                 device_printf(adapter->dev,
2410                     "Unable to map MSIX table \n");
2411                 goto msi;
2412         }
2413
2414         msgs = pci_msix_count(dev); 
2415         if (msgs == 0) { /* system has msix disabled */
2416                 bus_release_resource(dev, SYS_RES_MEMORY,
2417                     rid, adapter->msix_mem);
2418                 adapter->msix_mem = NULL;
2419                 goto msi;
2420         }
2421
2422         /* Figure out a reasonable auto config value */
2423         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2424
2425         if (ixgbe_num_queues != 0)
2426                 queues = ixgbe_num_queues;
2427         /* Set max queues to 8 when autoconfiguring */
2428         else if ((ixgbe_num_queues == 0) && (queues > 8))
2429                 queues = 8;
2430
2431         /*
2432         ** Want one vector (RX/TX pair) per queue
2433         ** plus an additional for Link.
2434         */
2435         want = queues + 1;
2436         if (msgs >= want)
2437                 msgs = want;
2438         else {
2439                 device_printf(adapter->dev,
2440                     "MSIX Configuration Problem, "
2441                     "%d vectors but %d queues wanted!\n",
2442                     msgs, want);
2443                 return (0); /* Will go to Legacy setup */
2444         }
2445         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2446                 device_printf(adapter->dev,
2447                     "Using MSIX interrupts with %d vectors\n", msgs);
2448                 adapter->num_queues = queues;
2449                 return (msgs);
2450         }
2451 msi:
2452         msgs = pci_msi_count(dev);
2453         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2454                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2455         else
2456                 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2457         return (msgs);
2458 }
2459 #endif
2460
2461
2462 static int
2463 ixgbe_allocate_pci_resources(struct adapter *adapter)
2464 {
2465         int             rid;
2466         device_t        dev = adapter->dev;
2467
2468         rid = PCIR_BAR(0);
2469         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2470             &rid, RF_ACTIVE);
2471
2472         if (!(adapter->pci_mem)) {
2473                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2474                 return (ENXIO);
2475         }
2476
2477         adapter->osdep.mem_bus_space_tag =
2478                 rman_get_bustag(adapter->pci_mem);
2479         adapter->osdep.mem_bus_space_handle =
2480                 rman_get_bushandle(adapter->pci_mem);
2481         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2482
2483         /* Legacy defaults */
2484         adapter->num_queues = 1;
2485         adapter->hw.back = &adapter->osdep;
2486
2487         /*
2488         ** Now setup MSI or MSI/X, should
2489         ** return us the number of supported
2490         ** vectors. (Will be 1 for MSI)
2491         */
2492 #if 0   /* HAVE_MSIX */
2493         adapter->msix = ixgbe_setup_msix(adapter);
2494 #endif
2495         return (0);
2496 }
2497
2498 static void
2499 ixgbe_free_pci_resources(struct adapter * adapter)
2500 {
2501         struct          ix_queue *que = adapter->queues;
2502         device_t        dev = adapter->dev;
2503         int             rid, memrid;
2504
2505         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2506                 memrid = PCIR_BAR(MSIX_82598_BAR);
2507         else
2508                 memrid = PCIR_BAR(MSIX_82599_BAR);
2509
2510         /*
2511         ** There is a slight possibility of a failure mode
2512         ** in attach that will result in entering this function
2513         ** before interrupt resources have been initialized, and
2514         ** in that case we do not want to execute the loops below
2515         ** We can detect this reliably by the state of the adapter
2516         ** res pointer.
2517         */
2518         if (adapter->res == NULL)
2519                 goto mem;
2520
2521         /*
2522         **  Release all msix queue resources:
2523         */
2524         for (int i = 0; i < adapter->num_queues; i++, que++) {
2525                 rid = que->msix + 1;
2526                 if (que->tag != NULL) {
2527                         bus_teardown_intr(dev, que->res, que->tag);
2528                         que->tag = NULL;
2529                 }
2530                 if (que->res != NULL)
2531                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2532         }
2533
2534
2535         /* Clean the Legacy or Link interrupt last */
2536         if (adapter->linkvec) /* we are doing MSIX */
2537                 rid = adapter->linkvec + 1;
2538         else
2539                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2540
2541         if (adapter->tag != NULL) {
2542                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2543                 adapter->tag = NULL;
2544         }
2545         if (adapter->res != NULL)
2546                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2547         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2548                 pci_release_msi(adapter->dev);
2549
2550 mem:
2551         if (adapter->msix)
2552                 pci_release_msi(dev);
2553
2554         if (adapter->msix_mem != NULL)
2555                 bus_release_resource(dev, SYS_RES_MEMORY,
2556                     memrid, adapter->msix_mem);
2557
2558         if (adapter->pci_mem != NULL)
2559                 bus_release_resource(dev, SYS_RES_MEMORY,
2560                     PCIR_BAR(0), adapter->pci_mem);
2561
2562         return;
2563 }
2564
2565 /*********************************************************************
2566  *
2567  *  Setup networking device structure and register an interface.
2568  *
2569  **********************************************************************/
2570 static int
2571 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2572 {
2573         struct ixgbe_hw *hw = &adapter->hw;
2574         struct ifnet   *ifp;
2575
2576         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2577
2578         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2579         if (ifp == NULL) {
2580                 device_printf(dev, "can not allocate ifnet structure\n");
2581                 return (-1);
2582         }
2583         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2584         ifp->if_baudrate = 1000000000;
2585         ifp->if_init = ixgbe_init;
2586         ifp->if_softc = adapter;
2587         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2588         ifp->if_ioctl = ixgbe_ioctl;
2589         ifp->if_start = ixgbe_start;
2590 #if 0 /* __FreeBSD_version >= 800000 */
2591         ifp->if_transmit = ixgbe_mq_start;
2592         ifp->if_qflush = ixgbe_qflush;
2593 #endif
2594         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2595
2596         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2597
2598         adapter->max_frame_size =
2599             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2600
2601         /*
2602          * Tell the upper layer(s) we support long frames.
2603          */
2604         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2605
2606         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2607         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2608         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2609 #if 0 /* NET_TSO */
2610                              |  IFCAP_VLAN_HWTSO
2611 #endif
2612                              |  IFCAP_VLAN_MTU;
2613         ifp->if_capenable = ifp->if_capabilities;
2614
2615         /* Don't enable LRO by default */
2616 #if 0 /* NET_LRO */
2617         ifp->if_capabilities |= IFCAP_LRO;
2618 #endif
2619
2620         /*
2621         ** Don't turn this on by default, if vlans are
2622         ** created on another pseudo device (eg. lagg)
2623         ** then vlan events are not passed thru, breaking
2624         ** operation, but with HW FILTER off it works. If
2625         ** using vlans directly on the ixgbe driver you can
2626         ** enable this and get full hardware tag filtering.
2627         */
2628         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2629
2630         /*
2631          * Specify the media types supported by this adapter and register
2632          * callbacks to update media and link information
2633          */
2634         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2635                      ixgbe_media_status);
2636         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2637         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2638         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2639                 ifmedia_add(&adapter->media,
2640                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2641                 ifmedia_add(&adapter->media,
2642                     IFM_ETHER | IFM_1000_T, 0, NULL);
2643         }
2644         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2645         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2646
2647         return (0);
2648 }
2649
2650 static void
2651 ixgbe_config_link(struct adapter *adapter)
2652 {
2653         struct ixgbe_hw *hw = &adapter->hw;
2654         u32     autoneg, err = 0;
2655         bool    sfp, negotiate;
2656
2657         sfp = ixgbe_is_sfp(hw);
2658
2659         if (sfp) { 
2660                 if (hw->phy.multispeed_fiber) {
2661                         hw->mac.ops.setup_sfp(hw);
2662                         ixgbe_enable_tx_laser(hw);
2663                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2664                 } else
2665                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2666         } else {
2667                 if (hw->mac.ops.check_link)
2668                         err = ixgbe_check_link(hw, &autoneg,
2669                             &adapter->link_up, FALSE);
2670                 if (err)
2671                         goto out;
2672                 autoneg = hw->phy.autoneg_advertised;
2673                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2674                         err  = hw->mac.ops.get_link_capabilities(hw,
2675                             &autoneg, &negotiate);
2676                 if (err)
2677                         goto out;
2678                 if (hw->mac.ops.setup_link)
2679                         err = hw->mac.ops.setup_link(hw, autoneg,
2680                             negotiate, adapter->link_up);
2681         }
2682 out:
2683         return;
2684 }
2685
2686 /********************************************************************
2687  * Manage DMA'able memory.
2688  *******************************************************************/
2689 static void
2690 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2691 {
2692         if (error)
2693                 return;
2694         *(bus_addr_t *) arg = segs->ds_addr;
2695         return;
2696 }
2697
2698 static int
2699 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2700                 struct ixgbe_dma_alloc *dma, int mapflags)
2701 {
2702         device_t dev = adapter->dev;
2703         int             r;
2704
2705         r = bus_dma_tag_create(NULL,    /* parent */
2706                                DBA_ALIGN, 0,    /* alignment, bounds */
2707                                BUS_SPACE_MAXADDR,       /* lowaddr */
2708                                BUS_SPACE_MAXADDR,       /* highaddr */
2709                                NULL, NULL,      /* filter, filterarg */
2710                                size,    /* maxsize */
2711                                1,       /* nsegments */
2712                                size,    /* maxsegsize */
2713                                BUS_DMA_ALLOCNOW,        /* flags */
2714                                &dma->dma_tag);
2715         if (r != 0) {
2716                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2717                        "error %u\n", r);
2718                 goto fail_0;
2719         }
2720         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2721                              BUS_DMA_NOWAIT, &dma->dma_map);
2722         if (r != 0) {
2723                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2724                        "error %u\n", r);
2725                 goto fail_1;
2726         }
2727         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2728                             size,
2729                             ixgbe_dmamap_cb,
2730                             &dma->dma_paddr,
2731                             mapflags | BUS_DMA_NOWAIT);
2732         if (r != 0) {
2733                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2734                        "error %u\n", r);
2735                 goto fail_2;
2736         }
2737         dma->dma_size = size;
2738         return (0);
2739 fail_2:
2740         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2741 fail_1:
2742         bus_dma_tag_destroy(dma->dma_tag);
2743 fail_0:
2744         dma->dma_map = NULL;
2745         dma->dma_tag = NULL;
2746         return (r);
2747 }
2748
2749 static void
2750 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2751 {
2752         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2753             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2754         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2755         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2756         bus_dma_tag_destroy(dma->dma_tag);
2757 }
2758
2759
2760 /*********************************************************************
2761  *
2762  *  Allocate memory for the transmit and receive rings, and then
2763  *  the descriptors associated with each, called only once at attach.
2764  *
2765  **********************************************************************/
2766 static int
2767 ixgbe_allocate_queues(struct adapter *adapter)
2768 {
2769         device_t        dev = adapter->dev;
2770         struct ix_queue *que;
2771         struct tx_ring  *txr;
2772         struct rx_ring  *rxr;
2773         int rsize, tsize, error = IXGBE_SUCCESS;
2774         int txconf = 0, rxconf = 0;
2775
2776         /* First allocate the top level queue structs */
2777         if (!(adapter->queues =
2778             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2779             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2780                 device_printf(dev, "Unable to allocate queue memory\n");
2781                 error = ENOMEM;
2782                 goto fail;
2783         }
2784
2785         /* First allocate the TX ring struct memory */
2786         if (!(adapter->tx_rings =
2787             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2788             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2789                 device_printf(dev, "Unable to allocate TX ring memory\n");
2790                 error = ENOMEM;
2791                 goto tx_fail;
2792         }
2793
2794         /* Next allocate the RX */
2795         if (!(adapter->rx_rings =
2796             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2797             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2798                 device_printf(dev, "Unable to allocate RX ring memory\n");
2799                 error = ENOMEM;
2800                 goto rx_fail;
2801         }
2802
2803         /* For the ring itself */
2804         tsize = roundup2(adapter->num_tx_desc *
2805             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2806
2807         /*
2808          * Now set up the TX queues, txconf is needed to handle the
2809          * possibility that things fail midcourse and we need to
2810          * undo memory gracefully
2811          */ 
2812         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2813                 /* Set up some basics */
2814                 txr = &adapter->tx_rings[i];
2815                 txr->adapter = adapter;
2816                 txr->me = i;
2817
2818                 /* Initialize the TX side lock */
2819                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2820                     device_get_nameunit(dev), txr->me);
2821                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2822
2823                 if (ixgbe_dma_malloc(adapter, tsize,
2824                         &txr->txdma, BUS_DMA_NOWAIT)) {
2825                         device_printf(dev,
2826                             "Unable to allocate TX Descriptor memory\n");
2827                         error = ENOMEM;
2828                         goto err_tx_desc;
2829                 }
2830                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2831                 bzero((void *)txr->tx_base, tsize);
2832
2833                 /* Now allocate transmit buffers for the ring */
2834                 if (ixgbe_allocate_transmit_buffers(txr)) {
2835                         device_printf(dev,
2836                             "Critical Failure setting up transmit buffers\n");
2837                         error = ENOMEM;
2838                         goto err_tx_desc;
2839                 }
2840 #if 0 /* __FreeBSD_version >= 800000 */
2841                 /* Allocate a buf ring */
2842                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2843                     M_WAITOK, &txr->tx_mtx);
2844                 if (txr->br == NULL) {
2845                         device_printf(dev,
2846                             "Critical Failure setting up buf ring\n");
2847                         error = ENOMEM;
2848                         goto err_tx_desc;
2849                 }
2850 #endif
2851         }
2852
2853         /*
2854          * Next the RX queues...
2855          */ 
2856         rsize = roundup2(adapter->num_rx_desc *
2857             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2858         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2859                 rxr = &adapter->rx_rings[i];
2860                 /* Set up some basics */
2861                 rxr->adapter = adapter;
2862                 rxr->me = i;
2863
2864                 /* Initialize the RX side lock */
2865                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2866                     device_get_nameunit(dev), rxr->me);
2867                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2868
2869                 if (ixgbe_dma_malloc(adapter, rsize,
2870                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2871                         device_printf(dev,
2872                             "Unable to allocate RxDescriptor memory\n");
2873                         error = ENOMEM;
2874                         goto err_rx_desc;
2875                 }
2876                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2877                 bzero((void *)rxr->rx_base, rsize);
2878
2879                 /* Allocate receive buffers for the ring*/
2880                 if (ixgbe_allocate_receive_buffers(rxr)) {
2881                         device_printf(dev,
2882                             "Critical Failure setting up receive buffers\n");
2883                         error = ENOMEM;
2884                         goto err_rx_desc;
2885                 }
2886         }
2887
2888         /*
2889         ** Finally set up the queue holding structs
2890         */
2891         for (int i = 0; i < adapter->num_queues; i++) {
2892                 que = &adapter->queues[i];
2893                 que->adapter = adapter;
2894                 que->txr = &adapter->tx_rings[i];
2895                 que->rxr = &adapter->rx_rings[i];
2896         }
2897
2898         return (0);
2899
2900 err_rx_desc:
2901         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2902                 ixgbe_dma_free(adapter, &rxr->rxdma);
2903 err_tx_desc:
2904         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2905                 ixgbe_dma_free(adapter, &txr->txdma);
2906         kfree(adapter->rx_rings, M_DEVBUF);
2907 rx_fail:
2908         kfree(adapter->tx_rings, M_DEVBUF);
2909 tx_fail:
2910         kfree(adapter->queues, M_DEVBUF);
2911 fail:
2912         return (error);
2913 }
2914
2915 /*********************************************************************
2916  *
2917  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2918  *  the information needed to transmit a packet on the wire. This is
2919  *  called only once at attach, setup is done every reset.
2920  *
2921  **********************************************************************/
2922 static int
2923 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2924 {
2925         struct adapter *adapter = txr->adapter;
2926         device_t dev = adapter->dev;
2927         struct ixgbe_tx_buf *txbuf;
2928         int error, i;
2929
2930         /*
2931          * Setup DMA descriptor areas.
2932          */
2933         if ((error = bus_dma_tag_create(
2934                                NULL,    /* parent */
2935                                1, 0,            /* alignment, bounds */
2936                                BUS_SPACE_MAXADDR,       /* lowaddr */
2937                                BUS_SPACE_MAXADDR,       /* highaddr */
2938                                NULL, NULL,              /* filter, filterarg */
2939                                IXGBE_TSO_SIZE,          /* maxsize */
2940                                adapter->num_segs,       /* nsegments */
2941                                PAGE_SIZE,               /* maxsegsize */
2942                                0,                       /* flags */
2943                                &txr->txtag))) {
2944                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2945                 goto fail;
2946         }
2947
2948         if (!(txr->tx_buffers =
2949             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2950             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2951                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2952                 error = ENOMEM;
2953                 goto fail;
2954         }
2955
2956         /* Create the descriptor buffer dma maps */
2957         txbuf = txr->tx_buffers;
2958         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2959                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2960                 if (error != 0) {
2961                         device_printf(dev, "Unable to create TX DMA map\n");
2962                         goto fail;
2963                 }
2964         }
2965
2966         return 0;
2967 fail:
2968         /* We free all, it handles case where we are in the middle */
2969         ixgbe_free_transmit_structures(adapter);
2970         return (error);
2971 }
2972
2973 /*********************************************************************
2974  *
2975  *  Initialize a transmit ring.
2976  *
2977  **********************************************************************/
2978 static void
2979 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2980 {
2981         struct adapter *adapter = txr->adapter;
2982         struct ixgbe_tx_buf *txbuf;
2983         int i;
2984 #ifdef DEV_NETMAP
2985         struct netmap_adapter *na = NA(adapter->ifp);
2986         struct netmap_slot *slot;
2987 #endif /* DEV_NETMAP */
2988
2989         /* Clear the old ring contents */
2990         IXGBE_TX_LOCK(txr);
2991 #ifdef DEV_NETMAP
2992         /*
2993          * (under lock): if in netmap mode, do some consistency
2994          * checks and set slot to entry 0 of the netmap ring.
2995          */
2996         slot = netmap_reset(na, NR_TX, txr->me, 0);
2997 #endif /* DEV_NETMAP */
2998         bzero((void *)txr->tx_base,
2999               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3000         /* Reset indices */
3001         txr->next_avail_desc = 0;
3002         txr->next_to_clean = 0;
3003
3004         /* Free any existing tx buffers. */
3005         txbuf = txr->tx_buffers;
3006         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3007                 if (txbuf->m_head != NULL) {
3008                         bus_dmamap_sync(txr->txtag, txbuf->map,
3009                             BUS_DMASYNC_POSTWRITE);
3010                         bus_dmamap_unload(txr->txtag, txbuf->map);
3011                         m_freem(txbuf->m_head);
3012                         txbuf->m_head = NULL;
3013                 }
3014 #ifdef DEV_NETMAP
3015                 /*
3016                  * In netmap mode, set the map for the packet buffer.
3017                  * NOTE: Some drivers (not this one) also need to set
3018                  * the physical buffer address in the NIC ring.
3019                  * Slots in the netmap ring (indexed by "si") are
3020                  * kring->nkr_hwofs positions "ahead" wrt the
3021                  * corresponding slot in the NIC ring. In some drivers
3022                  * (not here) nkr_hwofs can be negative. Function
3023                  * netmap_idx_n2k() handles wraparounds properly.
3024                  */
3025                 if (slot) {
3026                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3027                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3028                 }
3029 #endif /* DEV_NETMAP */
3030                 /* Clear the EOP index */
3031                 txbuf->eop_index = -1;
3032         }
3033
3034 #ifdef IXGBE_FDIR
3035         /* Set the rate at which we sample packets */
3036         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3037                 txr->atr_sample = atr_sample_rate;
3038 #endif
3039
3040         /* Set number of descriptors available */
3041         txr->tx_avail = adapter->num_tx_desc;
3042
3043         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3044             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3045         IXGBE_TX_UNLOCK(txr);
3046 }
3047
3048 /*********************************************************************
3049  *
3050  *  Initialize all transmit rings.
3051  *
3052  **********************************************************************/
3053 static int
3054 ixgbe_setup_transmit_structures(struct adapter *adapter)
3055 {
3056         struct tx_ring *txr = adapter->tx_rings;
3057
3058         for (int i = 0; i < adapter->num_queues; i++, txr++)
3059                 ixgbe_setup_transmit_ring(txr);
3060
3061         return (0);
3062 }
3063
3064 /*********************************************************************
3065  *
3066  *  Enable transmit unit.
3067  *
3068  **********************************************************************/
3069 static void
3070 ixgbe_initialize_transmit_units(struct adapter *adapter)
3071 {
3072         struct tx_ring  *txr = adapter->tx_rings;
3073         struct ixgbe_hw *hw = &adapter->hw;
3074
3075         /* Setup the Base and Length of the Tx Descriptor Ring */
3076
3077         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3078                 u64     tdba = txr->txdma.dma_paddr;
3079                 u32     txctrl;
3080
3081                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3082                        (tdba & 0x00000000ffffffffULL));
3083                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3084                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3085                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3086
3087                 /* Setup the HW Tx Head and Tail descriptor pointers */
3088                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3089                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3090
3091                 /* Setup Transmit Descriptor Cmd Settings */
3092                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3093                 txr->queue_status = IXGBE_QUEUE_IDLE;
3094
3095                 /* Disable Head Writeback */
3096                 switch (hw->mac.type) {
3097                 case ixgbe_mac_82598EB:
3098                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3099                         break;
3100                 case ixgbe_mac_82599EB:
3101                 case ixgbe_mac_X540:
3102                 default:
3103                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3104                         break;
3105                 }
3106                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3107                 switch (hw->mac.type) {
3108                 case ixgbe_mac_82598EB:
3109                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3110                         break;
3111                 case ixgbe_mac_82599EB:
3112                 case ixgbe_mac_X540:
3113                 default:
3114                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3115                         break;
3116                 }
3117
3118         }
3119
3120         if (hw->mac.type != ixgbe_mac_82598EB) {
3121                 u32 dmatxctl, rttdcs;
3122                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3123                 dmatxctl |= IXGBE_DMATXCTL_TE;
3124                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3125                 /* Disable arbiter to set MTQC */
3126                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3127                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3128                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3129                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3130                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3131                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3132         }
3133
3134         return;
3135 }
3136
3137 /*********************************************************************
3138  *
3139  *  Free all transmit rings.
3140  *
3141  **********************************************************************/
3142 static void
3143 ixgbe_free_transmit_structures(struct adapter *adapter)
3144 {
3145         struct tx_ring *txr = adapter->tx_rings;
3146
3147         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3148                 IXGBE_TX_LOCK(txr);
3149                 ixgbe_free_transmit_buffers(txr);
3150                 ixgbe_dma_free(adapter, &txr->txdma);
3151                 IXGBE_TX_UNLOCK(txr);
3152                 IXGBE_TX_LOCK_DESTROY(txr);
3153         }
3154         kfree(adapter->tx_rings, M_DEVBUF);
3155 }
3156
3157 /*********************************************************************
3158  *
3159  *  Free transmit ring related data structures.
3160  *
3161  **********************************************************************/
3162 static void
3163 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3164 {
3165         struct adapter *adapter = txr->adapter;
3166         struct ixgbe_tx_buf *tx_buffer;
3167         int             i;
3168
3169         INIT_DEBUGOUT("free_transmit_ring: begin");
3170
3171         if (txr->tx_buffers == NULL)
3172                 return;
3173
3174         tx_buffer = txr->tx_buffers;
3175         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3176                 if (tx_buffer->m_head != NULL) {
3177                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3178                             BUS_DMASYNC_POSTWRITE);
3179                         bus_dmamap_unload(txr->txtag,
3180                             tx_buffer->map);
3181                         m_freem(tx_buffer->m_head);
3182                         tx_buffer->m_head = NULL;
3183                         if (tx_buffer->map != NULL) {
3184                                 bus_dmamap_destroy(txr->txtag,
3185                                     tx_buffer->map);
3186                                 tx_buffer->map = NULL;
3187                         }
3188                 } else if (tx_buffer->map != NULL) {
3189                         bus_dmamap_unload(txr->txtag,
3190                             tx_buffer->map);
3191                         bus_dmamap_destroy(txr->txtag,
3192                             tx_buffer->map);
3193                         tx_buffer->map = NULL;
3194                 }
3195         }
3196 #if 0 /* __FreeBSD_version >= 800000 */
3197         if (txr->br != NULL)
3198                 buf_ring_free(txr->br, M_DEVBUF);
3199 #endif
3200         if (txr->tx_buffers != NULL) {
3201                 kfree(txr->tx_buffers, M_DEVBUF);
3202                 txr->tx_buffers = NULL;
3203         }
3204         if (txr->txtag != NULL) {
3205                 bus_dma_tag_destroy(txr->txtag);
3206                 txr->txtag = NULL;
3207         }
3208         return;
3209 }
3210
3211 /*********************************************************************
3212  *
3213  *  Advanced Context Descriptor setup for VLAN or CSUM
3214  *
3215  **********************************************************************/
3216
3217 static bool
3218 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3219 {
3220         struct adapter *adapter = txr->adapter;
3221         struct ixgbe_adv_tx_context_desc *TXD;
3222         struct ixgbe_tx_buf        *tx_buffer;
3223         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3224         struct ether_vlan_header *eh;
3225         struct ip *ip;
3226         struct ip6_hdr *ip6;
3227         int  ehdrlen, ip_hlen = 0;
3228         u16     etype;
3229         u8      ipproto = 0;
3230         bool    offload = TRUE;
3231         int ctxd = txr->next_avail_desc;
3232         u16 vtag = 0;
3233
3234
3235         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3236                 offload = FALSE;
3237
3238         tx_buffer = &txr->tx_buffers[ctxd];
3239         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3240
3241         /*
3242         ** In advanced descriptors the vlan tag must 
3243         ** be placed into the descriptor itself.
3244         */
3245         if (mp->m_flags & M_VLANTAG) {
3246                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3247                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3248         } else if (offload == FALSE)
3249                 return FALSE;
3250
3251         /*
3252          * Determine where frame payload starts.
3253          * Jump over vlan headers if already present,
3254          * helpful for QinQ too.
3255          */
3256         eh = mtod(mp, struct ether_vlan_header *);
3257         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3258                 etype = ntohs(eh->evl_proto);
3259                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3260         } else {
3261                 etype = ntohs(eh->evl_encap_proto);
3262                 ehdrlen = ETHER_HDR_LEN;
3263         }
3264
3265         /* Set the ether header length */
3266         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3267
3268         switch (etype) {
3269                 case ETHERTYPE_IP:
3270                         ip = (struct ip *)(mp->m_data + ehdrlen);
3271                         ip_hlen = ip->ip_hl << 2;
3272                         ipproto = ip->ip_p;
3273                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3274                         break;
3275                 case ETHERTYPE_IPV6:
3276                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3277                         ip_hlen = sizeof(struct ip6_hdr);
3278                         /* XXX-BZ this will go badly in case of ext hdrs. */
3279                         ipproto = ip6->ip6_nxt;
3280                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3281                         break;
3282                 default:
3283                         offload = FALSE;
3284                         break;
3285         }
3286
3287         vlan_macip_lens |= ip_hlen;
3288         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3289
3290         switch (ipproto) {
3291                 case IPPROTO_TCP:
3292                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3293                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3294                         break;
3295
3296                 case IPPROTO_UDP:
3297                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3298                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3299                         break;
3300
3301 #if 0
3302                 case IPPROTO_SCTP:
3303                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3304                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3305                         break;
3306 #endif
3307                 default:
3308                         offload = FALSE;
3309                         break;
3310         }
3311
3312         /* Now copy bits into descriptor */
3313         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3314         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3315         TXD->seqnum_seed = htole32(0);
3316         TXD->mss_l4len_idx = htole32(0);
3317
3318         tx_buffer->m_head = NULL;
3319         tx_buffer->eop_index = -1;
3320
3321         /* We've consumed the first desc, adjust counters */
3322         if (++ctxd == adapter->num_tx_desc)
3323                 ctxd = 0;
3324         txr->next_avail_desc = ctxd;
3325         --txr->tx_avail;
3326
3327         return (offload);
3328 }
3329
3330 /**********************************************************************
3331  *
3332  *  Setup work for hardware segmentation offload (TSO) on
3333  *  adapters using advanced tx descriptors
3334  *
3335  **********************************************************************/
3336 static bool
3337 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3338     u32 *olinfo_status)
3339 {
3340         struct adapter *adapter = txr->adapter;
3341         struct ixgbe_adv_tx_context_desc *TXD;
3342         struct ixgbe_tx_buf        *tx_buffer;
3343         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3344         u16 vtag = 0, eh_type;
3345         u32 mss_l4len_idx = 0, len;
3346         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3347         struct ether_vlan_header *eh;
3348 #if 0 /* IPv6 TSO */
3349 #ifdef INET6
3350         struct ip6_hdr *ip6;
3351 #endif
3352 #endif
3353 #ifdef INET
3354         struct ip *ip;
3355 #endif
3356         struct tcphdr *th;
3357
3358
3359         /*
3360          * Determine where frame payload starts.
3361          * Jump over vlan headers if already present
3362          */
3363         eh = mtod(mp, struct ether_vlan_header *);
3364         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3365                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3366                 eh_type = eh->evl_proto;
3367         } else {
3368                 ehdrlen = ETHER_HDR_LEN;
3369                 eh_type = eh->evl_encap_proto;
3370         }
3371
3372         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3373         len = ehdrlen + sizeof(struct tcphdr);
3374         switch (ntohs(eh_type)) {
3375 #if 0 /* IPv6 TSO */
3376 #ifdef INET6
3377         case ETHERTYPE_IPV6:
3378                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3379                         return FALSE;
3380                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3381                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3382                 if (ip6->ip6_nxt != IPPROTO_TCP)
3383                         return FALSE;
3384                 ip_hlen = sizeof(struct ip6_hdr);
3385                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3386                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3387                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3388                 break;
3389 #endif
3390 #endif
3391 #ifdef INET
3392         case ETHERTYPE_IP:
3393                 if (mp->m_len < len + sizeof(struct ip))
3394                         return FALSE;
3395                 ip = (struct ip *)(mp->m_data + ehdrlen);
3396                 if (ip->ip_p != IPPROTO_TCP)
3397                         return FALSE;
3398                 ip->ip_sum = 0;
3399                 ip_hlen = ip->ip_hl << 2;
3400                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3401                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3402                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3403                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3404                 /* Tell transmit desc to also do IPv4 checksum. */
3405                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3406                 break;
3407 #endif
3408         default:
3409                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3410                     __func__, ntohs(eh_type));
3411                 break;
3412         }
3413
3414         ctxd = txr->next_avail_desc;
3415         tx_buffer = &txr->tx_buffers[ctxd];
3416         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3417
3418         tcp_hlen = th->th_off << 2;
3419
3420         /* This is used in the transmit desc in encap */
3421         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3422
3423         /* VLAN MACLEN IPLEN */
3424         if (mp->m_flags & M_VLANTAG) {
3425                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3426                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3427         }
3428
3429         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3430         vlan_macip_lens |= ip_hlen;
3431         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3432
3433         /* ADV DTYPE TUCMD */
3434         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3435         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3436         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3437
3438         /* MSS L4LEN IDX */
3439         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3440         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3441         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3442
3443         TXD->seqnum_seed = htole32(0);
3444         tx_buffer->m_head = NULL;
3445         tx_buffer->eop_index = -1;
3446
3447         if (++ctxd == adapter->num_tx_desc)
3448                 ctxd = 0;
3449
3450         txr->tx_avail--;
3451         txr->next_avail_desc = ctxd;
3452         return TRUE;
3453 }
3454
3455 #ifdef IXGBE_FDIR
3456 /*
3457 ** This routine parses packet headers so that Flow
3458 ** Director can make a hashed filter table entry 
3459 ** allowing traffic flows to be identified and kept
3460 ** on the same cpu.  This would be a performance
3461 ** hit, but we only do it at IXGBE_FDIR_RATE of
3462 ** packets.
3463 */
3464 static void
3465 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3466 {
3467         struct adapter                  *adapter = txr->adapter;
3468         struct ix_queue                 *que;
3469         struct ip                       *ip;
3470         struct tcphdr                   *th;
3471         struct udphdr                   *uh;
3472         struct ether_vlan_header        *eh;
3473         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3474         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3475         int                             ehdrlen, ip_hlen;
3476         u16                             etype;
3477
3478         eh = mtod(mp, struct ether_vlan_header *);
3479         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3480                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3481                 etype = eh->evl_proto;
3482         } else {
3483                 ehdrlen = ETHER_HDR_LEN;
3484                 etype = eh->evl_encap_proto;
3485         }
3486
3487         /* Only handling IPv4 */
3488         if (etype != htons(ETHERTYPE_IP))
3489                 return;
3490
3491         ip = (struct ip *)(mp->m_data + ehdrlen);
3492         ip_hlen = ip->ip_hl << 2;
3493
3494         /* check if we're UDP or TCP */
3495         switch (ip->ip_p) {
3496         case IPPROTO_TCP:
3497                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3498                 /* src and dst are inverted */
3499                 common.port.dst ^= th->th_sport;
3500                 common.port.src ^= th->th_dport;
3501                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3502                 break;
3503         case IPPROTO_UDP:
3504                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3505                 /* src and dst are inverted */
3506                 common.port.dst ^= uh->uh_sport;
3507                 common.port.src ^= uh->uh_dport;
3508                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3509                 break;
3510         default:
3511                 return;
3512         }
3513
3514         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3515         if (mp->m_pkthdr.ether_vtag)
3516                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3517         else
3518                 common.flex_bytes ^= etype;
3519         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3520
3521         que = &adapter->queues[txr->me];
3522         /*
3523         ** This assumes the Rx queue and Tx
3524         ** queue are bound to the same CPU
3525         */
3526         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3527             input, common, que->msix);
3528 }
3529 #endif /* IXGBE_FDIR */
3530
3531 /**********************************************************************
3532  *
3533  *  Examine each tx_buffer in the used queue. If the hardware is done
3534  *  processing the packet then free associated resources. The
3535  *  tx_buffer is put back on the free queue.
3536  *
3537  **********************************************************************/
3538 static bool
3539 ixgbe_txeof(struct tx_ring *txr)
3540 {
3541         struct adapter  *adapter = txr->adapter;
3542         struct ifnet    *ifp = adapter->ifp;
3543         u32     first, last, done, processed;
3544         struct ixgbe_tx_buf *tx_buffer;
3545         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3546
3547         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3548
3549 #ifdef DEV_NETMAP
3550         if (ifp->if_capenable & IFCAP_NETMAP) {
3551                 struct netmap_adapter *na = NA(ifp);
3552                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3553
3554                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3555
3556                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3557                     BUS_DMASYNC_POSTREAD);
3558                 /*
3559                  * In netmap mode, all the work is done in the context
3560                  * of the client thread. Interrupt handlers only wake up
3561                  * clients, which may be sleeping on individual rings
3562                  * or on a global resource for all rings.
3563                  * To implement tx interrupt mitigation, we wake up the client
3564                  * thread roughly every half ring, even if the NIC interrupts
3565                  * more frequently. This is implemented as follows:
3566                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3567                  *   the slot that should wake up the thread (nkr_num_slots
3568                  *   means the user thread should not be woken up);
3569                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3570                  *   or the slot has the DD bit set.
3571                  *
3572                  * When the driver has separate locks, we need to
3573                  * release and re-acquire txlock to avoid deadlocks.
3574                  * XXX see if we can find a better way.
3575                  */
3576                 if (!netmap_mitigate ||
3577                     (kring->nr_kflags < kring->nkr_num_slots &&
3578                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3579                         kring->nr_kflags = kring->nkr_num_slots;
3580                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3581                         IXGBE_TX_UNLOCK(txr);
3582                         IXGBE_CORE_LOCK(adapter);
3583                         selwakeuppri(&na->tx_si, PI_NET);
3584                         IXGBE_CORE_UNLOCK(adapter);
3585                         IXGBE_TX_LOCK(txr);
3586                 }
3587                 return FALSE;
3588         }
3589 #endif /* DEV_NETMAP */
3590
3591         if (txr->tx_avail == adapter->num_tx_desc) {
3592                 txr->queue_status = IXGBE_QUEUE_IDLE;
3593                 return FALSE;
3594         }
3595
3596         processed = 0;
3597         first = txr->next_to_clean;
3598         tx_buffer = &txr->tx_buffers[first];
3599         /* For cleanup we just use legacy struct */
3600         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3601         last = tx_buffer->eop_index;
3602         if (last == -1)
3603                 return FALSE;
3604         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3605
3606         /*
3607         ** Get the index of the first descriptor
3608         ** BEYOND the EOP and call that 'done'.
3609         ** I do this so the comparison in the
3610         ** inner while loop below can be simple
3611         */
3612         if (++last == adapter->num_tx_desc) last = 0;
3613         done = last;
3614
3615         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3616             BUS_DMASYNC_POSTREAD);
3617         /*
3618         ** Only the EOP descriptor of a packet now has the DD
3619         ** bit set, this is what we look for...
3620         */
3621         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3622                 /* We clean the range of the packet */
3623                 while (first != done) {
3624                         tx_desc->upper.data = 0;
3625                         tx_desc->lower.data = 0;
3626                         tx_desc->buffer_addr = 0;
3627                         ++txr->tx_avail;
3628                         ++processed;
3629
3630                         if (tx_buffer->m_head) {
3631                                 txr->bytes +=
3632                                     tx_buffer->m_head->m_pkthdr.len;
3633                                 bus_dmamap_sync(txr->txtag,
3634                                     tx_buffer->map,
3635                                     BUS_DMASYNC_POSTWRITE);
3636                                 bus_dmamap_unload(txr->txtag,
3637                                     tx_buffer->map);
3638                                 m_freem(tx_buffer->m_head);
3639                                 tx_buffer->m_head = NULL;
3640                                 tx_buffer->map = NULL;
3641                         }
3642                         tx_buffer->eop_index = -1;
3643                         txr->watchdog_time = ticks;
3644
3645                         if (++first == adapter->num_tx_desc)
3646                                 first = 0;
3647
3648                         tx_buffer = &txr->tx_buffers[first];
3649                         tx_desc =
3650                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3651                 }
3652                 ++txr->packets;
3653                 ++ifp->if_opackets;
3654                 /* See if there is more work now */
3655                 last = tx_buffer->eop_index;
3656                 if (last != -1) {
3657                         eop_desc =
3658                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3659                         /* Get next done point */
3660                         if (++last == adapter->num_tx_desc) last = 0;
3661                         done = last;
3662                 } else
3663                         break;
3664         }
3665         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3666             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3667
3668         txr->next_to_clean = first;
3669
3670         /*
3671         ** Watchdog calculation, we know there's
3672         ** work outstanding or the first return
3673         ** would have been taken, so none processed
3674         ** for too long indicates a hang.
3675         */
3676         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3677                 txr->queue_status = IXGBE_QUEUE_HUNG;
3678
3679         /* With a minimum free clear the depleted state bit.  */
3680         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3681                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3682
3683         if (txr->tx_avail == adapter->num_tx_desc) {
3684                 txr->queue_status = IXGBE_QUEUE_IDLE;
3685                 return (FALSE);
3686         }
3687
3688         return TRUE;
3689 }
3690
3691 /*********************************************************************
3692  *
3693  *  Refresh mbuf buffers for RX descriptor rings
3694  *   - now keeps its own state so discards due to resource
3695  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3696  *     it just returns, keeping its placeholder, thus it can simply
3697  *     be recalled to try again.
3698  *
3699  **********************************************************************/
3700 static void
3701 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3702 {
3703         struct adapter          *adapter = rxr->adapter;
3704         bus_dma_segment_t       hseg[1];
3705         bus_dma_segment_t       pseg[1];
3706         struct ixgbe_rx_buf     *rxbuf;
3707         struct mbuf             *mh, *mp;
3708         int                     i, j, nsegs, error;
3709         bool                    refreshed = FALSE;
3710
3711         i = j = rxr->next_to_refresh;
3712         /* Control the loop with one beyond */
3713         if (++j == adapter->num_rx_desc)
3714                 j = 0;
3715
3716         while (j != limit) {
3717                 rxbuf = &rxr->rx_buffers[i];
3718                 if (rxr->hdr_split == FALSE)
3719                         goto no_split;
3720
3721                 if (rxbuf->m_head == NULL) {
3722                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3723                         if (mh == NULL)
3724                                 goto update;
3725                 } else
3726                         mh = rxbuf->m_head;
3727
3728                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3729                 mh->m_len = MHLEN;
3730                 mh->m_flags |= M_PKTHDR;
3731                 /* Get the memory mapping */
3732                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3733                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3734                 if (error != 0) {
3735                         kprintf("Refresh mbufs: hdr dmamap load"
3736                             " failure - %d\n", error);
3737                         m_free(mh);
3738                         rxbuf->m_head = NULL;
3739                         goto update;
3740                 }
3741                 rxbuf->m_head = mh;
3742                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3743                     BUS_DMASYNC_PREREAD);
3744                 rxr->rx_base[i].read.hdr_addr =
3745                     htole64(hseg[0].ds_addr);
3746
3747 no_split:
3748                 if (rxbuf->m_pack == NULL) {
3749                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3750                             M_PKTHDR, adapter->rx_mbuf_sz);
3751                         if (mp == NULL)
3752                                 goto update;
3753                 } else
3754                         mp = rxbuf->m_pack;
3755
3756                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3757                 /* Get the memory mapping */
3758                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3759                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3760                 if (error != 0) {
3761                         kprintf("Refresh mbufs: payload dmamap load"
3762                             " failure - %d\n", error);
3763                         m_free(mp);
3764                         rxbuf->m_pack = NULL;
3765                         goto update;
3766                 }
3767                 rxbuf->m_pack = mp;
3768                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3769                     BUS_DMASYNC_PREREAD);
3770                 rxr->rx_base[i].read.pkt_addr =
3771                     htole64(pseg[0].ds_addr);
3772
3773                 refreshed = TRUE;
3774                 /* Next is precalculated */
3775                 i = j;
3776                 rxr->next_to_refresh = i;
3777                 if (++j == adapter->num_rx_desc)
3778                         j = 0;
3779         }
3780 update:
3781         if (refreshed) /* Update hardware tail index */
3782                 IXGBE_WRITE_REG(&adapter->hw,
3783                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3784         return;
3785 }
3786
3787 /*********************************************************************
3788  *
3789  *  Allocate memory for rx_buffer structures. Since we use one
3790  *  rx_buffer per received packet, the maximum number of rx_buffer's
3791  *  that we'll need is equal to the number of receive descriptors
3792  *  that we've allocated.
3793  *
3794  **********************************************************************/
3795 static int
3796 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3797 {
3798         struct  adapter         *adapter = rxr->adapter;
3799         device_t                dev = adapter->dev;
3800         struct ixgbe_rx_buf     *rxbuf;
3801         int                     i, bsize, error;
3802
3803         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3804         if (!(rxr->rx_buffers =
3805             (struct ixgbe_rx_buf *) kmalloc(bsize,
3806             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3807                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3808                 error = ENOMEM;
3809                 goto fail;
3810         }
3811
3812         if ((error = bus_dma_tag_create(NULL,   /* parent */
3813                                    1, 0,        /* alignment, bounds */
3814                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3815                                    BUS_SPACE_MAXADDR,   /* highaddr */
3816                                    NULL, NULL,          /* filter, filterarg */
3817                                    MSIZE,               /* maxsize */
3818                                    1,                   /* nsegments */
3819                                    MSIZE,               /* maxsegsize */
3820                                    0,                   /* flags */
3821                                    &rxr->htag))) {
3822                 device_printf(dev, "Unable to create RX DMA tag\n");
3823                 goto fail;
3824         }
3825
3826         if ((error = bus_dma_tag_create(NULL,   /* parent */
3827                                    1, 0,        /* alignment, bounds */
3828                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3829                                    BUS_SPACE_MAXADDR,   /* highaddr */
3830                                    NULL, NULL,          /* filter, filterarg */
3831                                    MJUM16BYTES,         /* maxsize */
3832                                    1,                   /* nsegments */
3833                                    MJUM16BYTES,         /* maxsegsize */
3834                                    0,                   /* flags */
3835                                    &rxr->ptag))) {
3836                 device_printf(dev, "Unable to create RX DMA tag\n");
3837                 goto fail;
3838         }
3839
3840         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3841                 rxbuf = &rxr->rx_buffers[i];
3842                 error = bus_dmamap_create(rxr->htag,
3843                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3844                 if (error) {
3845                         device_printf(dev, "Unable to create RX head map\n");
3846                         goto fail;
3847                 }
3848                 error = bus_dmamap_create(rxr->ptag,
3849                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3850                 if (error) {
3851                         device_printf(dev, "Unable to create RX pkt map\n");
3852                         goto fail;
3853                 }
3854         }
3855
3856         return (0);
3857
3858 fail:
3859         /* Frees all, but can handle partial completion */
3860         ixgbe_free_receive_structures(adapter);
3861         return (error);
3862 }
3863
3864 /*
3865 ** Used to detect a descriptor that has
3866 ** been merged by Hardware RSC.
3867 */
3868 static inline u32
3869 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3870 {
3871         return (le32toh(rx->wb.lower.lo_dword.data) &
3872             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3873 }
3874
3875 /*********************************************************************
3876  *
3877  *  Initialize Hardware RSC (LRO) feature on 82599
3878  *  for an RX ring, this is toggled by the LRO capability
3879  *  even though it is transparent to the stack.
3880  *
3881  **********************************************************************/
3882 #if 0   /* NET_LRO */
3883 static void
3884 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3885 {
3886         struct  adapter         *adapter = rxr->adapter;
3887         struct  ixgbe_hw        *hw = &adapter->hw;
3888         u32                     rscctrl, rdrxctl;
3889
3890         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3891         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3892 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3893         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3894 #endif /* DEV_NETMAP */
3895         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3896         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3897         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3898
3899         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3900         rscctrl |= IXGBE_RSCCTL_RSCEN;
3901         /*
3902         ** Limit the total number of descriptors that
3903         ** can be combined, so it does not exceed 64K
3904         */
3905         if (adapter->rx_mbuf_sz == MCLBYTES)
3906                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3907         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3908                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3909         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3910                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3911         else  /* Using 16K cluster */
3912                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3913
3914         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3915
3916         /* Enable TCP header recognition */
3917         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3918             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3919             IXGBE_PSRTYPE_TCPHDR));
3920
3921         /* Disable RSC for ACK packets */
3922         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3923             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3924
3925         rxr->hw_rsc = TRUE;
3926 }
3927 #endif
3928
3929 static void     
3930 ixgbe_free_receive_ring(struct rx_ring *rxr)
3931
3932         struct  adapter         *adapter;
3933         struct ixgbe_rx_buf       *rxbuf;
3934         int i;
3935
3936         adapter = rxr->adapter;
3937         for (i = 0; i < adapter->num_rx_desc; i++) {
3938                 rxbuf = &rxr->rx_buffers[i];
3939                 if (rxbuf->m_head != NULL) {
3940                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3941                             BUS_DMASYNC_POSTREAD);
3942                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3943                         rxbuf->m_head->m_flags |= M_PKTHDR;
3944                         m_freem(rxbuf->m_head);
3945                 }
3946                 if (rxbuf->m_pack != NULL) {
3947                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3948                             BUS_DMASYNC_POSTREAD);
3949                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3950                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3951                         m_freem(rxbuf->m_pack);
3952                 }
3953                 rxbuf->m_head = NULL;
3954                 rxbuf->m_pack = NULL;
3955         }
3956 }
3957
3958
3959 /*********************************************************************
3960  *
3961  *  Initialize a receive ring and its buffers.
3962  *
3963  **********************************************************************/
3964 static int
3965 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3966 {
3967         struct  adapter         *adapter;
3968         struct ifnet            *ifp;
3969         device_t                dev;
3970         struct ixgbe_rx_buf     *rxbuf;
3971         bus_dma_segment_t       pseg[1], hseg[1];
3972 #if 0   /* NET_LRO */
3973         struct lro_ctrl         *lro = &rxr->lro;
3974 #endif
3975         int                     rsize, nsegs, error = 0;
3976 #ifdef DEV_NETMAP
3977         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3978         struct netmap_slot *slot;
3979 #endif /* DEV_NETMAP */
3980
3981         adapter = rxr->adapter;
3982         ifp = adapter->ifp;
3983         dev = adapter->dev;
3984
3985         /* Clear the ring contents */
3986         IXGBE_RX_LOCK(rxr);
3987 #ifdef DEV_NETMAP
3988         /* same as in ixgbe_setup_transmit_ring() */
3989         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3990 #endif /* DEV_NETMAP */
3991         rsize = roundup2(adapter->num_rx_desc *
3992             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3993         bzero((void *)rxr->rx_base, rsize);
3994
3995         /* Free current RX buffer structs and their mbufs */
3996         ixgbe_free_receive_ring(rxr);
3997
3998         /* Configure header split? */
3999         if (ixgbe_header_split)
4000                 rxr->hdr_split = TRUE;
4001
4002         /* Now replenish the mbufs */
4003         for (int j = 0; j != adapter->num_rx_desc; ++j) {
4004                 struct mbuf     *mh, *mp;
4005
4006                 rxbuf = &rxr->rx_buffers[j];
4007 #ifdef DEV_NETMAP
4008                 /*
4009                  * In netmap mode, fill the map and set the buffer
4010                  * address in the NIC ring, considering the offset
4011                  * between the netmap and NIC rings (see comment in
4012                  * ixgbe_setup_transmit_ring() ). No need to allocate
4013                  * an mbuf, so end the block with a continue;
4014                  */
4015                 if (slot) {
4016                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4017                         uint64_t paddr;
4018                         void *addr;
4019
4020                         addr = PNMB(slot + sj, &paddr);
4021                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4022                         /* Update descriptor */
4023                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4024                         continue;
4025                 }
4026 #endif /* DEV_NETMAP */
4027                 /*
4028                 ** Don't allocate mbufs if not
4029                 ** doing header split, its wasteful
4030                 */ 
4031                 if (rxr->hdr_split == FALSE)
4032                         goto skip_head;
4033
4034                 /* First the header */
4035                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4036                 if (rxbuf->m_head == NULL) {
4037                         error = ENOBUFS;
4038                         goto fail;
4039                 }
4040                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4041                 mh = rxbuf->m_head;
4042                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4043                 mh->m_flags |= M_PKTHDR;
4044                 /* Get the memory mapping */
4045                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
4046                     rxbuf->hmap, rxbuf->m_head, hseg, 1,
4047                     &nsegs, BUS_DMA_NOWAIT);
4048
4049                 if (error != 0) /* Nothing elegant to do here */
4050                         goto fail;
4051                 bus_dmamap_sync(rxr->htag,
4052                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4053                 /* Update descriptor */
4054                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4055
4056 skip_head:
4057                 /* Now the payload cluster */
4058                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4059                     M_PKTHDR, adapter->rx_mbuf_sz);
4060                 if (rxbuf->m_pack == NULL) {
4061                         error = ENOBUFS;
4062                         goto fail;
4063                 }
4064                 mp = rxbuf->m_pack;
4065                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4066                 /* Get the memory mapping */
4067                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
4068                     rxbuf->pmap, mp, hseg, 1,
4069                     &nsegs, BUS_DMA_NOWAIT);
4070                 if (error != 0)
4071                         goto fail;
4072                 bus_dmamap_sync(rxr->ptag,
4073                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4074                 /* Update descriptor */
4075                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4076         }
4077
4078
4079         /* Setup our descriptor indices */
4080         rxr->next_to_check = 0;
4081         rxr->next_to_refresh = 0;
4082         rxr->lro_enabled = FALSE;
4083         rxr->rx_split_packets = 0;
4084         rxr->rx_bytes = 0;
4085         rxr->discard = FALSE;
4086         rxr->vtag_strip = FALSE;
4087
4088         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4089             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4090
4091         /*
4092         ** Now set up the LRO interface:
4093         ** 82598 uses software LRO, the
4094         ** 82599 and X540 use a hardware assist.
4095         */
4096 #if 0 /* NET_LRO */
4097         if ((adapter->hw.mac.type != ixgbe_mac_82598EB) &&
4098             (ifp->if_capenable & IFCAP_RXCSUM) &&
4099             (ifp->if_capenable & IFCAP_LRO))
4100                 ixgbe_setup_hw_rsc(rxr);
4101         else if (ifp->if_capenable & IFCAP_LRO) {
4102                 int err = tcp_lro_init(lro);
4103                 if (err) {
4104                         device_printf(dev, "LRO Initialization failed!\n");
4105                         goto fail;
4106                 }
4107                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4108                 rxr->lro_enabled = TRUE;
4109                 lro->ifp = adapter->ifp;
4110         }
4111 #endif
4112
4113         IXGBE_RX_UNLOCK(rxr);
4114         return (0);
4115
4116 fail:
4117         ixgbe_free_receive_ring(rxr);
4118         IXGBE_RX_UNLOCK(rxr);
4119         return (error);
4120 }
4121
4122 /*********************************************************************
4123  *
4124  *  Initialize all receive rings.
4125  *
4126  **********************************************************************/
4127 static int
4128 ixgbe_setup_receive_structures(struct adapter *adapter)
4129 {
4130         struct rx_ring *rxr = adapter->rx_rings;
4131         int j;
4132
4133         for (j = 0; j < adapter->num_queues; j++, rxr++)
4134                 if (ixgbe_setup_receive_ring(rxr))
4135                         goto fail;
4136
4137         return (0);
4138 fail:
4139         /*
4140          * Free RX buffers allocated so far, we will only handle
4141          * the rings that completed, the failing case will have
4142          * cleaned up for itself. 'j' failed, so its the terminus.
4143          */
4144         for (int i = 0; i < j; ++i) {
4145                 rxr = &adapter->rx_rings[i];
4146                 ixgbe_free_receive_ring(rxr);
4147         }
4148
4149         return (ENOBUFS);
4150 }
4151
4152 /*********************************************************************
4153  *
4154  *  Setup receive registers and features.
4155  *
4156  **********************************************************************/
4157 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4158
4159 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4160         
4161 static void
4162 ixgbe_initialize_receive_units(struct adapter *adapter)
4163 {
4164         struct  rx_ring *rxr = adapter->rx_rings;
4165         struct ixgbe_hw *hw = &adapter->hw;
4166         struct ifnet   *ifp = adapter->ifp;
4167         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4168         u32             reta, mrqc = 0, hlreg, random[10];
4169
4170
4171         /*
4172          * Make sure receives are disabled while
4173          * setting up the descriptor ring
4174          */
4175         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4176         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4177             rxctrl & ~IXGBE_RXCTRL_RXEN);
4178
4179         /* Enable broadcasts */
4180         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4181         fctrl |= IXGBE_FCTRL_BAM;
4182         fctrl |= IXGBE_FCTRL_DPF;
4183         fctrl |= IXGBE_FCTRL_PMCF;
4184         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4185
4186         /* Set for Jumbo Frames? */
4187         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4188         if (ifp->if_mtu > ETHERMTU)
4189                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4190         else
4191                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4192 #ifdef DEV_NETMAP
4193         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4194         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4195                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4196         else
4197                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4198 #endif /* DEV_NETMAP */
4199         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4200
4201         bufsz = (adapter->rx_mbuf_sz +
4202             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4203
4204         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4205                 u64 rdba = rxr->rxdma.dma_paddr;
4206
4207                 /* Setup the Base and Length of the Rx Descriptor Ring */
4208                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4209                                (rdba & 0x00000000ffffffffULL));
4210                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4211                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4212                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4213
4214                 /* Set up the SRRCTL register */
4215                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4216                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4217                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4218                 srrctl |= bufsz;
4219                 if (rxr->hdr_split) {
4220                         /* Use a standard mbuf for the header */
4221                         srrctl |= ((IXGBE_RX_HDR <<
4222                             IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT)
4223                             & IXGBE_SRRCTL_BSIZEHDR_MASK);
4224                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4225                 } else
4226                         srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4227                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4228
4229                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4230                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4231                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4232         }
4233
4234         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4235                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4236                               IXGBE_PSRTYPE_UDPHDR |
4237                               IXGBE_PSRTYPE_IPV4HDR |
4238                               IXGBE_PSRTYPE_IPV6HDR;
4239                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4240         }
4241
4242         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4243
4244         /* Setup RSS */
4245         if (adapter->num_queues > 1) {
4246                 int i, j;
4247                 reta = 0;
4248
4249                 /* set up random bits */
4250                 karc4rand(&random, sizeof(random));
4251
4252                 /* Set up the redirection table */
4253                 for (i = 0, j = 0; i < 128; i++, j++) {
4254                         if (j == adapter->num_queues) j = 0;
4255                         reta = (reta << 8) | (j * 0x11);
4256                         if ((i & 3) == 3)
4257                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4258                 }
4259
4260                 /* Now fill our hash function seeds */
4261                 for (int i = 0; i < 10; i++)
4262                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4263
4264                 /* Perform hash on these packet types */
4265                 mrqc = IXGBE_MRQC_RSSEN
4266                      | IXGBE_MRQC_RSS_FIELD_IPV4
4267                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4268                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4269                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4270                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4271                      | IXGBE_MRQC_RSS_FIELD_IPV6
4272                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4273                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4274                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4275                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4276
4277                 /* RSS and RX IPP Checksum are mutually exclusive */
4278                 rxcsum |= IXGBE_RXCSUM_PCSD;
4279         }
4280
4281         if (ifp->if_capenable & IFCAP_RXCSUM)
4282                 rxcsum |= IXGBE_RXCSUM_PCSD;
4283
4284         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4285                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4286
4287         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4288
4289         return;
4290 }
4291
4292 /*********************************************************************
4293  *
4294  *  Free all receive rings.
4295  *
4296  **********************************************************************/
4297 static void
4298 ixgbe_free_receive_structures(struct adapter *adapter)
4299 {
4300         struct rx_ring *rxr = adapter->rx_rings;
4301
4302         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4303 #if 0   /* NET_LRO */
4304                 struct lro_ctrl         *lro = &rxr->lro;
4305 #endif
4306                 ixgbe_free_receive_buffers(rxr);
4307                 /* Free LRO memory */
4308 #if 0   /* NET_LRO */
4309                 tcp_lro_free(lro);
4310 #endif
4311                 /* Free the ring memory as well */
4312                 ixgbe_dma_free(adapter, &rxr->rxdma);
4313         }
4314
4315         kfree(adapter->rx_rings, M_DEVBUF);
4316 }
4317
4318
4319 /*********************************************************************
4320  *
4321  *  Free receive ring data structures
4322  *
4323  **********************************************************************/
4324 static void
4325 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4326 {
4327         struct adapter          *adapter = rxr->adapter;
4328         struct ixgbe_rx_buf     *rxbuf;
4329
4330         INIT_DEBUGOUT("free_receive_structures: begin");
4331
4332         /* Cleanup any existing buffers */
4333         if (rxr->rx_buffers != NULL) {
4334                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4335                         rxbuf = &rxr->rx_buffers[i];
4336                         if (rxbuf->m_head != NULL) {
4337                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4338                                     BUS_DMASYNC_POSTREAD);
4339                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4340                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4341                                 m_freem(rxbuf->m_head);
4342                         }
4343                         if (rxbuf->m_pack != NULL) {
4344                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4345                                     BUS_DMASYNC_POSTREAD);
4346                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4347                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4348                                 m_freem(rxbuf->m_pack);
4349                         }
4350                         rxbuf->m_head = NULL;
4351                         rxbuf->m_pack = NULL;
4352                         if (rxbuf->hmap != NULL) {
4353                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4354                                 rxbuf->hmap = NULL;
4355                         }
4356                         if (rxbuf->pmap != NULL) {
4357                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4358                                 rxbuf->pmap = NULL;
4359                         }
4360                 }
4361                 if (rxr->rx_buffers != NULL) {
4362                         kfree(rxr->rx_buffers, M_DEVBUF);
4363                         rxr->rx_buffers = NULL;
4364                 }
4365         }
4366
4367         if (rxr->htag != NULL) {
4368                 bus_dma_tag_destroy(rxr->htag);
4369                 rxr->htag = NULL;
4370         }
4371         if (rxr->ptag != NULL) {
4372                 bus_dma_tag_destroy(rxr->ptag);
4373                 rxr->ptag = NULL;
4374         }
4375
4376         return;
4377 }
4378
4379 static __inline void
4380 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4381 {
4382                  
4383         /*
4384          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4385          * should be computed by hardware. Also it should not have VLAN tag in
4386          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4387          */
4388 #if 0   /* NET_LRO */
4389         if (rxr->lro_enabled &&
4390             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4391             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4392             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4393             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4394             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4395             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4396             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4397             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4398                 /*
4399                  * Send to the stack if:
4400                  **  - LRO not enabled, or
4401                  **  - no LRO resources, or
4402                  **  - lro enqueue fails
4403                  */
4404                 if (rxr->lro.lro_cnt != 0)
4405                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4406                                 return;
4407         }
4408 #endif
4409         IXGBE_RX_UNLOCK(rxr);
4410         (*ifp->if_input)(ifp, m);
4411         IXGBE_RX_LOCK(rxr);
4412 }
4413
4414 static __inline void
4415 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4416 {
4417         struct ixgbe_rx_buf     *rbuf;
4418
4419         rbuf = &rxr->rx_buffers[i];
4420
4421         if (rbuf->fmp != NULL) {/* Partial chain ? */
4422                 rbuf->fmp->m_flags |= M_PKTHDR;
4423                 m_freem(rbuf->fmp);
4424                 rbuf->fmp = NULL;
4425         }
4426
4427         /*
4428         ** With advanced descriptors the writeback
4429         ** clobbers the buffer addrs, so its easier
4430         ** to just free the existing mbufs and take
4431         ** the normal refresh path to get new buffers
4432         ** and mapping.
4433         */
4434         if (rbuf->m_head) {
4435                 m_free(rbuf->m_head);
4436                 rbuf->m_head = NULL;
4437         }
4438  
4439         if (rbuf->m_pack) {
4440                 m_free(rbuf->m_pack);
4441                 rbuf->m_pack = NULL;
4442         }
4443
4444         return;
4445 }
4446
4447
4448 /*********************************************************************
4449  *
4450  *  This routine executes in interrupt context. It replenishes
4451  *  the mbufs in the descriptor and sends data which has been
4452  *  dma'ed into host memory to upper layer.
4453  *
4454  *  We loop at most count times if count is > 0, or until done if
4455  *  count < 0.
4456  *
4457  *  Return TRUE for more work, FALSE for all clean.
4458  *********************************************************************/
4459 static bool
4460 ixgbe_rxeof(struct ix_queue *que, int count)
4461 {
4462         struct adapter          *adapter = que->adapter;
4463         struct rx_ring          *rxr = que->rxr;
4464         struct ifnet            *ifp = adapter->ifp;
4465 #if 0   /* NET_LRO */
4466         struct lro_ctrl         *lro = &rxr->lro;
4467         struct lro_entry        *queued;
4468 #endif
4469         int                     i, nextp, processed = 0;
4470         u32                     staterr = 0;
4471         union ixgbe_adv_rx_desc *cur;
4472         struct ixgbe_rx_buf     *rbuf, *nbuf;
4473
4474         IXGBE_RX_LOCK(rxr);
4475
4476 #ifdef DEV_NETMAP
4477         if (ifp->if_capenable & IFCAP_NETMAP) {
4478                 /*
4479                  * Same as the txeof routine: only wakeup clients on intr.
4480                  * NKR_PENDINTR in nr_kflags is used to implement interrupt
4481                  * mitigation (ixgbe_rxsync() will not look for new packets
4482                  * unless NKR_PENDINTR is set).
4483                  */
4484                 struct netmap_adapter *na = NA(ifp);
4485
4486                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4487                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4488                 IXGBE_RX_UNLOCK(rxr);
4489                 IXGBE_CORE_LOCK(adapter);
4490                 selwakeuppri(&na->rx_si, PI_NET);
4491                 IXGBE_CORE_UNLOCK(adapter);
4492                 return (FALSE);
4493         }
4494 #endif /* DEV_NETMAP */
4495         for (i = rxr->next_to_check; count != 0;) {
4496                 struct mbuf     *sendmp, *mh, *mp;
4497                 u32             rsc, ptype;
4498                 u16             hlen, plen, hdr;
4499                 u16             vtag = 0;
4500                 bool            eop;
4501  
4502                 /* Sync the ring. */
4503                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4504                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4505
4506                 cur = &rxr->rx_base[i];
4507                 staterr = le32toh(cur->wb.upper.status_error);
4508
4509                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4510                         break;
4511                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4512                         break;
4513
4514                 count--;
4515                 sendmp = NULL;
4516                 nbuf = NULL;
4517                 rsc = 0;
4518                 cur->wb.upper.status_error = 0;
4519                 rbuf = &rxr->rx_buffers[i];
4520                 mh = rbuf->m_head;
4521                 mp = rbuf->m_pack;
4522
4523                 plen = le16toh(cur->wb.upper.length);
4524                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4525                     IXGBE_RXDADV_PKTTYPE_MASK;
4526                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4527                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4528
4529                 /* Process vlan info */
4530                 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
4531                         vtag = le16toh(cur->wb.upper.vlan);
4532
4533                 /* Make sure bad packets are discarded */
4534                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4535                     (rxr->discard)) {
4536                         ifp->if_ierrors++;
4537                         rxr->rx_discarded++;
4538                         if (eop)
4539                                 rxr->discard = FALSE;
4540                         else
4541                                 rxr->discard = TRUE;
4542                         ixgbe_rx_discard(rxr, i);
4543                         goto next_desc;
4544                 }
4545
4546                 /*
4547                 ** On 82599 which supports a hardware
4548                 ** LRO (called HW RSC), packets need
4549                 ** not be fragmented across sequential
4550                 ** descriptors, rather the next descriptor
4551                 ** is indicated in bits of the descriptor.
4552                 ** This also means that we might proceses
4553                 ** more than one packet at a time, something
4554                 ** that has never been true before, it
4555                 ** required eliminating global chain pointers
4556                 ** in favor of what we are doing here.  -jfv
4557                 */
4558                 if (!eop) {
4559                         /*
4560                         ** Figure out the next descriptor
4561                         ** of this frame.
4562                         */
4563                         if (rxr->hw_rsc == TRUE) {
4564                                 rsc = ixgbe_rsc_count(cur);
4565                                 rxr->rsc_num += (rsc - 1);
4566                         }
4567                         if (rsc) { /* Get hardware index */
4568                                 nextp = ((staterr &
4569                                     IXGBE_RXDADV_NEXTP_MASK) >>
4570                                     IXGBE_RXDADV_NEXTP_SHIFT);
4571                         } else { /* Just sequential */
4572                                 nextp = i + 1;
4573                                 if (nextp == adapter->num_rx_desc)
4574                                         nextp = 0;
4575                         }
4576                         nbuf = &rxr->rx_buffers[nextp];
4577                         prefetch(nbuf);
4578                 }
4579                 /*
4580                 ** The header mbuf is ONLY used when header 
4581                 ** split is enabled, otherwise we get normal 
4582                 ** behavior, ie, both header and payload
4583                 ** are DMA'd into the payload buffer.
4584                 **
4585                 ** Rather than using the fmp/lmp global pointers
4586                 ** we now keep the head of a packet chain in the
4587                 ** buffer struct and pass this along from one
4588                 ** descriptor to the next, until we get EOP.
4589                 */
4590                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
4591                         /* This must be an initial descriptor */
4592                         hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
4593                             IXGBE_RXDADV_HDRBUFLEN_SHIFT;
4594                         if (hlen > IXGBE_RX_HDR)
4595                                 hlen = IXGBE_RX_HDR;
4596                         mh->m_len = hlen;
4597                         mh->m_flags |= M_PKTHDR;
4598                         mh->m_next = NULL;
4599                         mh->m_pkthdr.len = mh->m_len;
4600                         /* Null buf pointer so it is refreshed */
4601                         rbuf->m_head = NULL;
4602                         /*
4603                         ** Check the payload length, this
4604                         ** could be zero if its a small
4605                         ** packet.
4606                         */
4607                         if (plen > 0) {
4608                                 mp->m_len = plen;
4609                                 mp->m_next = NULL;
4610                                 mp->m_flags &= ~M_PKTHDR;
4611                                 mh->m_next = mp;
4612                                 mh->m_pkthdr.len += mp->m_len;
4613                                 /* Null buf pointer so it is refreshed */
4614                                 rbuf->m_pack = NULL;
4615                                 rxr->rx_split_packets++;
4616                         }
4617                         /*
4618                         ** Now create the forward
4619                         ** chain so when complete 
4620                         ** we wont have to.
4621                         */
4622                         if (eop == 0) {
4623                                 /* stash the chain head */
4624                                 nbuf->fmp = mh;
4625                                 /* Make forward chain */
4626                                 if (plen)
4627                                         mp->m_next = nbuf->m_pack;
4628                                 else
4629                                         mh->m_next = nbuf->m_pack;
4630                         } else {
4631                                 /* Singlet, prepare to send */
4632                                 sendmp = mh;
4633                                 /* If hardware handled vtag */
4634                                 if (vtag) {
4635                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4636                                         sendmp->m_flags |= M_VLANTAG;
4637                                 }
4638                         }
4639                 } else {
4640                         /*
4641                         ** Either no header split, or a
4642                         ** secondary piece of a fragmented
4643                         ** split packet.
4644                         */
4645                         mp->m_len = plen;
4646                         /*
4647                         ** See if there is a stored head
4648                         ** that determines what we are
4649                         */
4650                         sendmp = rbuf->fmp;
4651                         rbuf->m_pack = rbuf->fmp = NULL;
4652
4653                         if (sendmp != NULL) {  /* secondary frag */
4654                                 mp->m_flags &= ~M_PKTHDR;
4655                                 sendmp->m_pkthdr.len += mp->m_len;
4656                         } else {
4657                                 /* first desc of a non-ps chain */
4658                                 sendmp = mp;
4659                                 sendmp->m_flags |= M_PKTHDR;
4660                                 sendmp->m_pkthdr.len = mp->m_len;
4661                                 if (staterr & IXGBE_RXD_STAT_VP) {
4662                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4663                                         sendmp->m_flags |= M_VLANTAG;
4664                                 }
4665                         }
4666                         /* Pass the head pointer on */
4667                         if (eop == 0) {
4668                                 nbuf->fmp = sendmp;
4669                                 sendmp = NULL;
4670                                 mp->m_next = nbuf->m_pack;
4671                         }
4672                 }
4673                 ++processed;
4674                 /* Sending this frame? */
4675                 if (eop) {
4676                         sendmp->m_pkthdr.rcvif = ifp;
4677                         ifp->if_ipackets++;
4678                         rxr->rx_packets++;
4679                         /* capture data for AIM */
4680                         rxr->bytes += sendmp->m_pkthdr.len;
4681                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4682                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4683                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4684 #if 0 /* __FreeBSD_version >= 800000 */
4685                         sendmp->m_pkthdr.flowid = que->msix;
4686                         sendmp->m_flags |= M_FLOWID;
4687 #endif
4688                 }
4689 next_desc:
4690                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4691                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4692
4693                 /* Advance our pointers to the next descriptor. */
4694                 if (++i == adapter->num_rx_desc)
4695                         i = 0;
4696
4697                 /* Now send to the stack or do LRO */
4698                 if (sendmp != NULL) {
4699                         rxr->next_to_check = i;
4700                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4701                         i = rxr->next_to_check;
4702                 }
4703
4704                /* Every 8 descriptors we go to refresh mbufs */
4705                 if (processed == 8) {
4706                         ixgbe_refresh_mbufs(rxr, i);
4707                         processed = 0;
4708                 }
4709         }
4710
4711         /* Refresh any remaining buf structs */
4712         if (ixgbe_rx_unrefreshed(rxr))
4713                 ixgbe_refresh_mbufs(rxr, i);
4714
4715         rxr->next_to_check = i;
4716
4717         /*
4718          * Flush any outstanding LRO work
4719          */
4720 #if 0   /* NET_LRO */
4721         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4722                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4723                 tcp_lro_flush(lro, queued);
4724         }
4725 #endif
4726
4727         IXGBE_RX_UNLOCK(rxr);
4728
4729         /*
4730         ** We still have cleaning to do?
4731         ** Schedule another interrupt if so.
4732         */
4733         if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4734                 ixgbe_rearm_queues(adapter, (u64)(1 << que->msix));
4735                 return (TRUE);
4736         }
4737
4738         return (FALSE);
4739 }
4740
4741
4742 /*********************************************************************
4743  *
4744  *  Verify that the hardware indicated that the checksum is valid.
4745  *  Inform the stack about the status of checksum so that stack
4746  *  doesn't spend time verifying the checksum.
4747  *
4748  *********************************************************************/
4749 static void
4750 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4751 {
4752         u16     status = (u16) staterr;
4753         u8      errors = (u8) (staterr >> 24);
4754         bool    sctp = FALSE;
4755
4756         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4757             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4758                 sctp = TRUE;
4759
4760         if (status & IXGBE_RXD_STAT_IPCS) {
4761                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4762                         /* IP Checksum Good */
4763                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4764                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4765
4766                 } else
4767                         mp->m_pkthdr.csum_flags = 0;
4768         }
4769         if (status & IXGBE_RXD_STAT_L4CS) {
4770                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4771 #if 0
4772                 if (sctp)
4773                         type = CSUM_SCTP_VALID;
4774 #endif
4775                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4776                         mp->m_pkthdr.csum_flags |= type;
4777                         if (!sctp)
4778                                 mp->m_pkthdr.csum_data = htons(0xffff);
4779                 } 
4780         }
4781         return;
4782 }
4783
4784
4785 /*
4786 ** This routine is run via an vlan config EVENT,
4787 ** it enables us to use the HW Filter table since
4788 ** we can get the vlan id. This just creates the
4789 ** entry in the soft version of the VFTA, init will
4790 ** repopulate the real table.
4791 */
4792 static void
4793 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4794 {
4795         struct adapter  *adapter = ifp->if_softc;
4796         u16             index, bit;
4797
4798         if (ifp->if_softc !=  arg)   /* Not our event */
4799                 return;
4800
4801         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4802                 return;
4803
4804         IXGBE_CORE_LOCK(adapter);
4805         index = (vtag >> 5) & 0x7F;
4806         bit = vtag & 0x1F;
4807         adapter->shadow_vfta[index] |= (1 << bit);
4808         ++adapter->num_vlans;
4809         ixgbe_init_locked(adapter);
4810         IXGBE_CORE_UNLOCK(adapter);
4811 }
4812
4813 /*
4814 ** This routine is run via an vlan
4815 ** unconfig EVENT, remove our entry
4816 ** in the soft vfta.
4817 */
4818 static void
4819 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4820 {
4821         struct adapter  *adapter = ifp->if_softc;
4822         u16             index, bit;
4823
4824         if (ifp->if_softc !=  arg)
4825                 return;
4826
4827         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4828                 return;
4829
4830         IXGBE_CORE_LOCK(adapter);
4831         index = (vtag >> 5) & 0x7F;
4832         bit = vtag & 0x1F;
4833         adapter->shadow_vfta[index] &= ~(1 << bit);
4834         --adapter->num_vlans;
4835         /* Re-init to load the changes */
4836         ixgbe_init_locked(adapter);
4837         IXGBE_CORE_UNLOCK(adapter);
4838 }
4839
4840 static void
4841 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4842 {
4843         struct ifnet    *ifp = adapter->ifp;
4844         struct ixgbe_hw *hw = &adapter->hw;
4845         struct rx_ring  *rxr;
4846         u32             ctrl;
4847
4848         /*
4849         ** We get here thru init_locked, meaning
4850         ** a soft reset, this has already cleared
4851         ** the VFTA and other state, so if there
4852         ** have been no vlan's registered do nothing.
4853         */
4854         if (adapter->num_vlans == 0)
4855                 return;
4856
4857         /*
4858         ** A soft reset zero's out the VFTA, so
4859         ** we need to repopulate it now.
4860         */
4861         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4862                 if (adapter->shadow_vfta[i] != 0)
4863                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4864                             adapter->shadow_vfta[i]);
4865
4866         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4867         /* Enable the Filter Table if enabled */
4868         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4869                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4870                 ctrl |= IXGBE_VLNCTRL_VFE;
4871         }
4872         if (hw->mac.type == ixgbe_mac_82598EB)
4873                 ctrl |= IXGBE_VLNCTRL_VME;
4874         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4875
4876         /* Setup the queues for vlans */
4877         for (int i = 0; i < adapter->num_queues; i++) {
4878                 rxr = &adapter->rx_rings[i];
4879                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4880                 if (hw->mac.type != ixgbe_mac_82598EB) {
4881                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4882                         ctrl |= IXGBE_RXDCTL_VME;
4883                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4884                 }
4885                 rxr->vtag_strip = TRUE;
4886         }
4887 }
4888
4889 static void
4890 ixgbe_enable_intr(struct adapter *adapter)
4891 {
4892         struct ixgbe_hw *hw = &adapter->hw;
4893         struct ix_queue *que = adapter->queues;
4894         u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4895
4896
4897         /* Enable Fan Failure detection */
4898         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4899                     mask |= IXGBE_EIMS_GPI_SDP1;
4900         else {
4901                     mask |= IXGBE_EIMS_ECC;
4902                     mask |= IXGBE_EIMS_GPI_SDP0;
4903                     mask |= IXGBE_EIMS_GPI_SDP1;
4904                     mask |= IXGBE_EIMS_GPI_SDP2;
4905 #ifdef IXGBE_FDIR
4906                     mask |= IXGBE_EIMS_FLOW_DIR;
4907 #endif
4908         }
4909
4910         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4911
4912         /* With RSS we use auto clear */
4913         if (adapter->msix_mem) {
4914                 mask = IXGBE_EIMS_ENABLE_MASK;
4915                 /* Don't autoclear Link */
4916                 mask &= ~IXGBE_EIMS_OTHER;
4917                 mask &= ~IXGBE_EIMS_LSC;
4918                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4919         }
4920
4921         /*
4922         ** Now enable all queues, this is done separately to
4923         ** allow for handling the extended (beyond 32) MSIX
4924         ** vectors that can be used by 82599
4925         */
4926         for (int i = 0; i < adapter->num_queues; i++, que++)
4927                 ixgbe_enable_queue(adapter, que->msix);
4928
4929         IXGBE_WRITE_FLUSH(hw);
4930
4931         return;
4932 }
4933
4934 static void
4935 ixgbe_disable_intr(struct adapter *adapter)
4936 {
4937         if (adapter->msix_mem)
4938                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4939         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4940                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4941         } else {
4942                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4943                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4944                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4945         }
4946         IXGBE_WRITE_FLUSH(&adapter->hw);
4947         return;
4948 }
4949
4950 u16
4951 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4952 {
4953         u16 value;
4954
4955         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4956             reg, 2);
4957
4958         return (value);
4959 }
4960
4961 void
4962 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4963 {
4964         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4965             reg, value, 2);
4966
4967         return;
4968 }
4969
4970 /*
4971 ** Setup the correct IVAR register for a particular MSIX interrupt
4972 **   (yes this is all very magic and confusing :)
4973 **  - entry is the register array entry
4974 **  - vector is the MSIX vector for this queue
4975 **  - type is RX/TX/MISC
4976 */
4977 static void
4978 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4979 {
4980         struct ixgbe_hw *hw = &adapter->hw;
4981         u32 ivar, index;
4982
4983         vector |= IXGBE_IVAR_ALLOC_VAL;
4984
4985         switch (hw->mac.type) {
4986
4987         case ixgbe_mac_82598EB:
4988                 if (type == -1)
4989                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4990                 else
4991                         entry += (type * 64);
4992                 index = (entry >> 2) & 0x1F;
4993                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4994                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4995                 ivar |= (vector << (8 * (entry & 0x3)));
4996                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4997                 break;
4998
4999         case ixgbe_mac_82599EB:
5000         case ixgbe_mac_X540:
5001                 if (type == -1) { /* MISC IVAR */
5002                         index = (entry & 1) * 8;
5003                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5004                         ivar &= ~(0xFF << index);
5005                         ivar |= (vector << index);
5006                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5007                 } else {        /* RX/TX IVARS */
5008                         index = (16 * (entry & 1)) + (8 * type);
5009                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5010                         ivar &= ~(0xFF << index);
5011                         ivar |= (vector << index);
5012                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5013                 }
5014
5015         default:
5016                 break;
5017         }
5018 }
5019
5020 static void
5021 ixgbe_configure_ivars(struct adapter *adapter)
5022 {
5023         struct  ix_queue *que = adapter->queues;
5024         u32 newitr;
5025
5026         if (ixgbe_max_interrupt_rate > 0)
5027                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5028         else
5029                 newitr = 0;
5030
5031         for (int i = 0; i < adapter->num_queues; i++, que++) {
5032                 /* First the RX queue entry */
5033                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5034                 /* ... and the TX */
5035                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5036                 /* Set an Initial EITR value */
5037                 IXGBE_WRITE_REG(&adapter->hw,
5038                     IXGBE_EITR(que->msix), newitr);
5039         }
5040
5041         /* For the Link interrupt */
5042         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5043 }
5044
5045 /*
5046 ** ixgbe_sfp_probe - called in the local timer to
5047 ** determine if a port had optics inserted.
5048 */  
5049 static bool ixgbe_sfp_probe(struct adapter *adapter)
5050 {
5051         struct ixgbe_hw *hw = &adapter->hw;
5052         device_t        dev = adapter->dev;
5053         bool            result = FALSE;
5054
5055         if ((hw->phy.type == ixgbe_phy_nl) &&
5056             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5057                 s32 ret = hw->phy.ops.identify_sfp(hw);
5058                 if (ret)
5059                         goto out;
5060                 ret = hw->phy.ops.reset(hw);
5061                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5062                         device_printf(dev,"Unsupported SFP+ module detected!");
5063                         kprintf(" Reload driver with supported module.\n");
5064                         adapter->sfp_probe = FALSE;
5065                         goto out;
5066                 } else
5067                         device_printf(dev,"SFP+ module detected!\n");
5068                 /* We now have supported optics */
5069                 adapter->sfp_probe = FALSE;
5070                 /* Set the optics type so system reports correctly */
5071                 ixgbe_setup_optics(adapter);
5072                 result = TRUE;
5073         }
5074 out:
5075         return (result);
5076 }
5077
5078 /*
5079 ** Tasklet handler for MSIX Link interrupts
5080 **  - do outside interrupt since it might sleep
5081 */
5082 static void
5083 ixgbe_handle_link(void *context, int pending)
5084 {
5085         struct adapter  *adapter = context;
5086
5087         ixgbe_check_link(&adapter->hw,
5088             &adapter->link_speed, &adapter->link_up, 0);
5089         ixgbe_update_link_status(adapter);
5090 }
5091
5092 /*
5093 ** Tasklet for handling SFP module interrupts
5094 */
5095 static void
5096 ixgbe_handle_mod(void *context, int pending)
5097 {
5098         struct adapter  *adapter = context;
5099         struct ixgbe_hw *hw = &adapter->hw;
5100         device_t        dev = adapter->dev;
5101         u32 err;
5102
5103         err = hw->phy.ops.identify_sfp(hw);
5104         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5105                 device_printf(dev,
5106                     "Unsupported SFP+ module type was detected.\n");
5107                 return;
5108         }
5109         err = hw->mac.ops.setup_sfp(hw);
5110         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5111                 device_printf(dev,
5112                     "Setup failure - unsupported SFP+ module type.\n");
5113                 return;
5114         }
5115         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5116         return;
5117 }
5118
5119
5120 /*
5121 ** Tasklet for handling MSF (multispeed fiber) interrupts
5122 */
5123 static void
5124 ixgbe_handle_msf(void *context, int pending)
5125 {
5126         struct adapter  *adapter = context;
5127         struct ixgbe_hw *hw = &adapter->hw;
5128         u32 autoneg;
5129         bool negotiate;
5130
5131         autoneg = hw->phy.autoneg_advertised;
5132         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5133                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5134         if (hw->mac.ops.setup_link)
5135                 hw->mac.ops.setup_link(hw, autoneg, negotiate, TRUE);
5136         return;
5137 }
5138
5139 #ifdef IXGBE_FDIR
5140 /*
5141 ** Tasklet for reinitializing the Flow Director filter table
5142 */
5143 static void
5144 ixgbe_reinit_fdir(void *context, int pending)
5145 {
5146         struct adapter  *adapter = context;
5147         struct ifnet   *ifp = adapter->ifp;
5148
5149         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5150                 return;
5151         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5152         adapter->fdir_reinit = 0;
5153         /* re-enable flow director interrupts */
5154         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5155         /* Restart the interface */
5156         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5157         return;
5158 }
5159 #endif
5160
5161 /**********************************************************************
5162  *
5163  *  Update the board statistics counters.
5164  *
5165  **********************************************************************/
5166 static void
5167 ixgbe_update_stats_counters(struct adapter *adapter)
5168 {
5169         struct ifnet   *ifp = adapter->ifp;
5170         struct ixgbe_hw *hw = &adapter->hw;
5171         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5172         u64  total_missed_rx = 0;
5173
5174         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5175         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5176         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5177         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5178
5179         for (int i = 0; i < 8; i++) {
5180                 u32 mp;
5181                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5182                 /* missed_rx tallies misses for the gprc workaround */
5183                 missed_rx += mp;
5184                 /* global total per queue */
5185                 adapter->stats.mpc[i] += mp;
5186                 /* Running comprehensive total for stats display */
5187                 total_missed_rx += adapter->stats.mpc[i];
5188                 if (hw->mac.type == ixgbe_mac_82598EB)
5189                         adapter->stats.rnbc[i] +=
5190                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5191                 adapter->stats.pxontxc[i] +=
5192                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5193                 adapter->stats.pxonrxc[i] +=
5194                     IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5195                 adapter->stats.pxofftxc[i] +=
5196                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5197                 adapter->stats.pxoffrxc[i] +=
5198                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5199                 adapter->stats.pxon2offc[i] +=
5200                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5201         }
5202         for (int i = 0; i < 16; i++) {
5203                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5204                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5205                 adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5206                 adapter->stats.qbrc[i] += 
5207                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBRC(i)) << 32);
5208                 adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5209                 adapter->stats.qbtc[i] +=
5210                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBTC(i)) << 32);
5211                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5212         }
5213         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5214         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5215         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5216
5217         /* Hardware workaround, gprc counts missed packets */
5218         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5219         adapter->stats.gprc -= missed_rx;
5220
5221         if (hw->mac.type != ixgbe_mac_82598EB) {
5222                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5223                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5224                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5225                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5226                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5227                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5228                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5229                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5230         } else {
5231                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5232                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5233                 /* 82598 only has a counter in the high register */
5234                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5235                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5236                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5237         }
5238
5239         /*
5240          * Workaround: mprc hardware is incorrectly counting
5241          * broadcasts, so for now we subtract those.
5242          */
5243         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5244         adapter->stats.bprc += bprc;
5245         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5246         if (hw->mac.type == ixgbe_mac_82598EB)
5247                 adapter->stats.mprc -= bprc;
5248
5249         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5250         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5251         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5252         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5253         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5254         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5255
5256         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5257         adapter->stats.lxontxc += lxon;
5258         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5259         adapter->stats.lxofftxc += lxoff;
5260         total = lxon + lxoff;
5261
5262         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5263         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5264         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5265         adapter->stats.gptc -= total;
5266         adapter->stats.mptc -= total;
5267         adapter->stats.ptc64 -= total;
5268         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5269
5270         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5271         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5272         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5273         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5274         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5275         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5276         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5277         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5278         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5279         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5280         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5281         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5282         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5283         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5284         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5285         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5286         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5287         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5288         /* Only read FCOE on 82599 */
5289         if (hw->mac.type != ixgbe_mac_82598EB) {
5290                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5291                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5292                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5293                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5294                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5295         }
5296
5297         /* Fill out the OS statistics structure */
5298         ifp->if_ipackets = adapter->stats.gprc;
5299         ifp->if_opackets = adapter->stats.gptc;
5300         ifp->if_ibytes = adapter->stats.gorc;
5301         ifp->if_obytes = adapter->stats.gotc;
5302         ifp->if_imcasts = adapter->stats.mprc;
5303         ifp->if_collisions = 0;
5304
5305         /* Rx Errors */
5306         ifp->if_ierrors = total_missed_rx + adapter->stats.crcerrs +
5307                 adapter->stats.rlec;
5308 }
5309
5310 /** ixgbe_sysctl_tdh_handler - Handler function
5311  *  Retrieves the TDH value from the hardware
5312  */
5313 static int 
5314 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5315 {
5316         int error;
5317
5318         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5319         if (!txr) return 0;
5320
5321         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5322         error = sysctl_handle_int(oidp, &val, 0, req);
5323         if (error || !req->newptr)
5324                 return error;
5325         return 0;
5326 }
5327
5328 /** ixgbe_sysctl_tdt_handler - Handler function
5329  *  Retrieves the TDT value from the hardware
5330  */
5331 static int 
5332 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5333 {
5334         int error;
5335
5336         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5337         if (!txr) return 0;
5338
5339         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5340         error = sysctl_handle_int(oidp, &val, 0, req);
5341         if (error || !req->newptr)
5342                 return error;
5343         return 0;
5344 }
5345
5346 /** ixgbe_sysctl_rdh_handler - Handler function
5347  *  Retrieves the RDH value from the hardware
5348  */
5349 static int 
5350 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5351 {
5352         int error;
5353
5354         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5355         if (!rxr) return 0;
5356
5357         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5358         error = sysctl_handle_int(oidp, &val, 0, req);
5359         if (error || !req->newptr)
5360                 return error;
5361         return 0;
5362 }
5363
5364 /** ixgbe_sysctl_rdt_handler - Handler function
5365  *  Retrieves the RDT value from the hardware
5366  */
5367 static int 
5368 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5369 {
5370         int error;
5371
5372         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5373         if (!rxr) return 0;
5374
5375         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5376         error = sysctl_handle_int(oidp, &val, 0, req);
5377         if (error || !req->newptr)
5378                 return error;
5379         return 0;
5380 }
5381
5382 static int
5383 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5384 {
5385         int error;
5386         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5387         unsigned int reg, usec, rate;
5388
5389         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5390         usec = ((reg & 0x0FF8) >> 3);
5391         if (usec > 0)
5392                 rate = 500000 / usec;
5393         else
5394                 rate = 0;
5395         error = sysctl_handle_int(oidp, &rate, 0, req);
5396         if (error || !req->newptr)
5397                 return error;
5398         reg &= ~0xfff; /* default, no limitation */
5399         ixgbe_max_interrupt_rate = 0;
5400         if (rate > 0 && rate < 500000) {
5401                 if (rate < 1000)
5402                         rate = 1000;
5403                 ixgbe_max_interrupt_rate = rate;
5404                 reg |= ((4000000/rate) & 0xff8 );
5405         }
5406         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5407         return 0;
5408 }
5409
5410 /*
5411  * Add sysctl variables, one per statistic, to the system.
5412  */
5413 static void
5414 ixgbe_add_hw_stats(struct adapter *adapter)
5415 {
5416         struct tx_ring *txr = adapter->tx_rings;
5417         struct rx_ring *rxr = adapter->rx_rings;
5418
5419         struct sysctl_ctx_list *ctx = &adapter->sysctl_ctx;
5420         struct sysctl_oid *tree = adapter->sysctl_tree;
5421         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5422         struct ixgbe_hw_stats *stats = &adapter->stats;
5423
5424         struct sysctl_oid *stat_node, *queue_node;
5425         struct sysctl_oid_list *stat_list, *queue_list;
5426
5427 #define QUEUE_NAME_LEN 32
5428         char namebuf[QUEUE_NAME_LEN];
5429
5430         /* Driver Statistics */
5431         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5432                         CTLFLAG_RD, &adapter->dropped_pkts,
5433                         "Driver dropped packets");
5434         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5435                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5436                         "m_defrag() failed");
5437         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_dma_setup",
5438                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5439                         "Driver tx dma failure in xmit");
5440         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5441                         CTLFLAG_RD, &adapter->watchdog_events,
5442                         "Watchdog timeouts");
5443         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tso_tx",
5444                         CTLFLAG_RD, &adapter->tso_tx,
5445                         "TSO");
5446         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5447                         CTLFLAG_RD, &adapter->link_irq,
5448                         "Link MSIX IRQ Handled");
5449
5450         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5451         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5452                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5453                                             CTLFLAG_RD, NULL, "Queue Name");
5454                 queue_list = SYSCTL_CHILDREN(queue_node);
5455
5456                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5457                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5458                                 sizeof(&adapter->queues[i]),
5459                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5460                                 "Interrupt Rate");
5461                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5462                                 CTLFLAG_RD, &(adapter->queues[i].irqs), 0,
5463                                 "irqs on this queue");
5464                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5465                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5466                                 ixgbe_sysctl_tdh_handler, "IU",
5467                                 "Transmit Descriptor Head");
5468                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5469                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5470                                 ixgbe_sysctl_tdt_handler, "IU",
5471                                 "Transmit Descriptor Tail");
5472                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5473                                 CTLFLAG_RD, &txr->no_desc_avail, 0,
5474                                 "Queue No Descriptor Available");
5475                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5476                                 CTLFLAG_RD, &txr->total_packets, 0,
5477                                 "Queue Packets Transmitted");
5478         }
5479
5480         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5481         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5482                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5483                                             CTLFLAG_RD, NULL, "Queue Name");
5484                 queue_list = SYSCTL_CHILDREN(queue_node);
5485
5486 #if 0   /* NET_LRO */
5487                 struct lro_ctrl *lro = &rxr->lro;
5488 #endif
5489
5490         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5491                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5492                                             CTLFLAG_RD, NULL, "Queue Name");
5493                 queue_list = SYSCTL_CHILDREN(queue_node);
5494
5495                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5496                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5497                                 ixgbe_sysctl_rdh_handler, "IU",
5498                                 "Receive Descriptor Head");
5499                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5500                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5501                                 ixgbe_sysctl_rdt_handler, "IU",
5502                                 "Receive Descriptor Tail");
5503                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5504                                 CTLFLAG_RD, &rxr->rx_packets, 0,
5505                                 "Queue Packets Received");
5506                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5507                                 CTLFLAG_RD, &rxr->rx_bytes, 0,
5508                                 "Queue Bytes Received");
5509 #if 0   /* NET_LRO */
5510                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5511                                 CTLFLAG_RD, &lro->lro_queued, 0,
5512                                 "LRO Queued");
5513                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5514                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5515                                 "LRO Flushed");
5516 #endif
5517         }
5518
5519         /* MAC stats get the own sub node */
5520
5521         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5522                                     CTLFLAG_RD, NULL, "MAC Statistics");
5523         stat_list = SYSCTL_CHILDREN(stat_node);
5524
5525         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5526                         CTLFLAG_RD, &stats->crcerrs, 0,
5527                         "CRC Errors");
5528         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5529                         CTLFLAG_RD, &stats->illerrc, 0,
5530                         "Illegal Byte Errors");
5531         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5532                         CTLFLAG_RD, &stats->errbc, 0,
5533                         "Byte Errors");
5534         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5535                         CTLFLAG_RD, &stats->mspdc, 0,
5536                         "MAC Short Packets Discarded");
5537         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5538                         CTLFLAG_RD, &stats->mlfc, 0,
5539                         "MAC Local Faults");
5540         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5541                         CTLFLAG_RD, &stats->mrfc, 0,
5542                         "MAC Remote Faults");
5543         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5544                         CTLFLAG_RD, &stats->rlec, 0,
5545                         "Receive Length Errors");
5546         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_txd",
5547                         CTLFLAG_RD, &stats->lxontxc, 0,
5548                         "Link XON Transmitted");
5549         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_rcvd",
5550                         CTLFLAG_RD, &stats->lxonrxc, 0,
5551                         "Link XON Received");
5552         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_txd",
5553                         CTLFLAG_RD, &stats->lxofftxc, 0,
5554                         "Link XOFF Transmitted");
5555         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_rcvd",
5556                         CTLFLAG_RD, &stats->lxoffrxc, 0,
5557                         "Link XOFF Received");
5558
5559         /* Packet Reception Stats */
5560         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5561                         CTLFLAG_RD, &stats->tor, 0,
5562                         "Total Octets Received"); 
5563         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5564                         CTLFLAG_RD, &stats->gorc, 0,
5565                         "Good Octets Received"); 
5566         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5567                         CTLFLAG_RD, &stats->tpr, 0,
5568                         "Total Packets Received");
5569         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5570                         CTLFLAG_RD, &stats->gprc, 0,
5571                         "Good Packets Received");
5572         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5573                         CTLFLAG_RD, &stats->mprc, 0,
5574                         "Multicast Packets Received");
5575         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5576                         CTLFLAG_RD, &stats->bprc, 0,
5577                         "Broadcast Packets Received");
5578         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5579                         CTLFLAG_RD, &stats->prc64, 0,
5580                         "64 byte frames received ");
5581         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5582                         CTLFLAG_RD, &stats->prc127, 0,
5583                         "65-127 byte frames received");
5584         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5585                         CTLFLAG_RD, &stats->prc255, 0,
5586                         "128-255 byte frames received");
5587         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5588                         CTLFLAG_RD, &stats->prc511, 0,
5589                         "256-511 byte frames received");
5590         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5591                         CTLFLAG_RD, &stats->prc1023, 0,
5592                         "512-1023 byte frames received");
5593         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5594                         CTLFLAG_RD, &stats->prc1522, 0,
5595                         "1023-1522 byte frames received");
5596         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5597                         CTLFLAG_RD, &stats->ruc, 0,
5598                         "Receive Undersized");
5599         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5600                         CTLFLAG_RD, &stats->rfc, 0,
5601                         "Fragmented Packets Received ");
5602         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5603                         CTLFLAG_RD, &stats->roc, 0,
5604                         "Oversized Packets Received");
5605         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5606                         CTLFLAG_RD, &stats->rjc, 0,
5607                         "Received Jabber");
5608         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5609                         CTLFLAG_RD, &stats->mngprc, 0,
5610                         "Management Packets Received");
5611         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5612                         CTLFLAG_RD, &stats->mngptc, 0,
5613                         "Management Packets Dropped");
5614         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5615                         CTLFLAG_RD, &stats->xec, 0,
5616                         "Checksum Errors");
5617
5618         /* Packet Transmission Stats */
5619         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5620                         CTLFLAG_RD, &stats->gotc, 0,
5621                         "Good Octets Transmitted"); 
5622         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5623                         CTLFLAG_RD, &stats->tpt, 0,
5624                         "Total Packets Transmitted");
5625         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5626                         CTLFLAG_RD, &stats->gptc, 0,
5627                         "Good Packets Transmitted");
5628         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5629                         CTLFLAG_RD, &stats->bptc, 0,
5630                         "Broadcast Packets Transmitted");
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5632                         CTLFLAG_RD, &stats->mptc, 0,
5633                         "Multicast Packets Transmitted");
5634         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5635                         CTLFLAG_RD, &stats->mngptc, 0,
5636                         "Management Packets Transmitted");
5637         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5638                         CTLFLAG_RD, &stats->ptc64, 0,
5639                         "64 byte frames transmitted ");
5640         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5641                         CTLFLAG_RD, &stats->ptc127, 0,
5642                         "65-127 byte frames transmitted");
5643         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5644                         CTLFLAG_RD, &stats->ptc255, 0,
5645                         "128-255 byte frames transmitted");
5646         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5647                         CTLFLAG_RD, &stats->ptc511, 0,
5648                         "256-511 byte frames transmitted");
5649         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5650                         CTLFLAG_RD, &stats->ptc1023, 0,
5651                         "512-1023 byte frames transmitted");
5652         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5653                         CTLFLAG_RD, &stats->ptc1522, 0,
5654                         "1024-1522 byte frames transmitted");
5655
5656         /* FC Stats */
5657         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_crc",
5658                 CTLFLAG_RD, &stats->fccrc, 0,
5659                 "FC CRC Errors");
5660         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_last",
5661                 CTLFLAG_RD, &stats->fclast, 0,
5662                 "FC Last Error");
5663         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_drpd",
5664                 CTLFLAG_RD, &stats->fcoerpdc, 0,
5665                 "FCoE Packets Dropped");
5666         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_rcvd",
5667                 CTLFLAG_RD, &stats->fcoeprc, 0,
5668                 "FCoE Packets Received");
5669         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_txd",
5670                 CTLFLAG_RD, &stats->fcoeptc, 0,
5671                 "FCoE Packets Transmitted");
5672         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_rcvd",
5673                 CTLFLAG_RD, &stats->fcoedwrc, 0,
5674                 "FCoE DWords Received");
5675         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_txd",
5676                 CTLFLAG_RD, &stats->fcoedwtc, 0,
5677                 "FCoE DWords Transmitted");
5678 }
5679
5680 /*
5681 ** Set flow control using sysctl:
5682 ** Flow control values:
5683 **      0 - off
5684 **      1 - rx pause
5685 **      2 - tx pause
5686 **      3 - full
5687 */
5688 static int
5689 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5690 {
5691         int error, last;
5692         struct adapter *adapter = (struct adapter *) arg1;
5693
5694         last = adapter->fc;
5695         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5696         if ((error) || (req->newptr == NULL))
5697                 return (error);
5698
5699         /* Don't bother if it's not changed */
5700         if (adapter->fc == last)
5701                 return (0);
5702
5703         switch (adapter->fc) {
5704                 case ixgbe_fc_rx_pause:
5705                 case ixgbe_fc_tx_pause:
5706                 case ixgbe_fc_full:
5707                         adapter->hw.fc.requested_mode = adapter->fc;
5708                         break;
5709                 case ixgbe_fc_none:
5710                 default:
5711                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5712         }
5713         /* Don't autoneg if forcing a value */
5714         adapter->hw.fc.disable_fc_autoneg = TRUE;
5715         ixgbe_fc_enable(&adapter->hw);
5716         return error;
5717 }
5718
5719 static void
5720 ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name,
5721         const char *description, int *limit, int value)
5722 {
5723         *limit = value;
5724         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5725             SYSCTL_CHILDREN(adapter->sysctl_tree),
5726             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5727 }
5728
5729 /*
5730 ** Control link advertise speed:
5731 **      1 - advertise only 1G
5732 **      2 - advertise 100Mb
5733 **      3 - advertise normal
5734 */
5735 static int
5736 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5737 {
5738         int                     error = 0;
5739         struct adapter          *adapter;
5740         device_t                dev;
5741         struct ixgbe_hw         *hw;
5742         ixgbe_link_speed        speed, last;
5743
5744         adapter = (struct adapter *) arg1;
5745         dev = adapter->dev;
5746         hw = &adapter->hw;
5747         last = adapter->advertise;
5748
5749         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5750         if ((error) || (adapter->advertise == -1))
5751                 return (error);
5752
5753         if (adapter->advertise == last) /* no change */
5754                 return (0);
5755
5756         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5757             (hw->phy.multispeed_fiber)))
5758                 return (error);
5759
5760         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5761                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5762                 return (error);
5763         }
5764
5765         if (adapter->advertise == 1)
5766                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5767         else if (adapter->advertise == 2)
5768                 speed = IXGBE_LINK_SPEED_100_FULL;
5769         else if (adapter->advertise == 3)
5770                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5771                         IXGBE_LINK_SPEED_10GB_FULL;
5772         else /* bogus value */
5773                 return (error);
5774
5775         hw->mac.autotry_restart = TRUE;
5776         hw->mac.ops.setup_link(hw, speed, TRUE, TRUE);
5777
5778         return (error);
5779 }
5780
5781 /*
5782 ** Thermal Shutdown Trigger
5783 **   - cause a Thermal Overtemp IRQ
5784 */
5785 static int
5786 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5787 {
5788         int             error, fire = 0;
5789         struct adapter  *adapter = (struct adapter *) arg1;
5790         struct ixgbe_hw *hw = &adapter->hw;
5791
5792
5793         if (hw->mac.type != ixgbe_mac_X540)
5794                 return (0);
5795
5796         error = sysctl_handle_int(oidp, &fire, 0, req);
5797         if ((error) || (req->newptr == NULL))
5798                 return (error);
5799
5800         if (fire) {
5801                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5802                 reg |= IXGBE_EICR_TS;
5803                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5804         }
5805
5806         return (0);
5807 }
5808
5809 /* rearrange mbuf chain to get contiguous bytes */
5810 static int
5811 ixgbe_tso_pullup(struct tx_ring *txr, struct mbuf **mp)
5812 {
5813         int hoff, iphlen, thoff;
5814         struct mbuf *m;
5815
5816         m = *mp;
5817         KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5818
5819         iphlen = m->m_pkthdr.csum_iphlen;
5820         thoff = m->m_pkthdr.csum_thlen;
5821         hoff = m->m_pkthdr.csum_lhlen;
5822
5823         KASSERT(iphlen > 0, ("invalid ip hlen"));
5824         KASSERT(thoff > 0, ("invalid tcp hlen"));
5825         KASSERT(hoff > 0, ("invalid ether hlen"));
5826
5827         if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5828                 m = m_pullup(m, hoff + iphlen + thoff);
5829                 if (m == NULL) {
5830                         *mp = NULL;
5831                         return ENOBUFS;
5832                 }
5833                 *mp = m;
5834         }
5835
5836         return 0;
5837 }