ixgbe: Move sysctl creation to a separate function
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 static int      ixgbe_setup_msix(struct adapter *);
125 static void     ixgbe_free_pci_resources(struct adapter *);
126 static void     ixgbe_local_timer(void *);
127 static int      ixgbe_setup_interface(device_t, struct adapter *);
128 static void     ixgbe_config_link(struct adapter *);
129
130 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
131 static int      ixgbe_setup_transmit_structures(struct adapter *);
132 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
133 static void     ixgbe_initialize_transmit_units(struct adapter *);
134 static void     ixgbe_free_transmit_structures(struct adapter *);
135 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
136
137 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
138 static int      ixgbe_setup_receive_structures(struct adapter *);
139 static int      ixgbe_setup_receive_ring(struct rx_ring *);
140 static void     ixgbe_initialize_receive_units(struct adapter *);
141 static void     ixgbe_free_receive_structures(struct adapter *);
142 static void     ixgbe_free_receive_buffers(struct rx_ring *);
143 #if 0   /* NET_LRO */
144 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
145 #endif
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static bool     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *, int);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
165                     const char *, int *, int);
166 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
167 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
168 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
169 static void     ixgbe_add_sysctl(struct adapter *);
170 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
171 static void     ixgbe_configure_ivars(struct adapter *);
172 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
173
174 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
175 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
176 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
177
178 static void     ixgbe_add_hw_stats(struct adapter *adapter);
179
180 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
181 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
182                     struct mbuf *, u32);
183
184 /* Support for pluggable optic modules */
185 static bool     ixgbe_sfp_probe(struct adapter *);
186 static void     ixgbe_setup_optics(struct adapter *);
187
188 /* Legacy (single vector interrupt handler */
189 static void     ixgbe_legacy_irq(void *);
190
191 /* The MSI/X Interrupt handlers */
192 static void     ixgbe_msix_que(void *);
193 static void     ixgbe_msix_link(void *);
194
195 /* Deferred interrupt tasklets */
196 static void     ixgbe_handle_que(void *, int);
197 static void     ixgbe_handle_link(void *, int);
198 static void     ixgbe_handle_msf(void *, int);
199 static void     ixgbe_handle_mod(void *, int);
200
201 #ifdef IXGBE_FDIR
202 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
203 static void     ixgbe_reinit_fdir(void *, int);
204 #endif
205
206 /*********************************************************************
207  *  FreeBSD Device Interface Entry Points
208  *********************************************************************/
209
210 static device_method_t ixgbe_methods[] = {
211         /* Device interface */
212         DEVMETHOD(device_probe, ixgbe_probe),
213         DEVMETHOD(device_attach, ixgbe_attach),
214         DEVMETHOD(device_detach, ixgbe_detach),
215         DEVMETHOD(device_shutdown, ixgbe_shutdown),
216         {0, 0}
217 };
218
219 static driver_t ixgbe_driver = {
220         "ix", ixgbe_methods, sizeof(struct adapter),
221 };
222
223 devclass_t ixgbe_devclass;
224 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
225
226 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
227 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
228
229 /*
230 ** TUNEABLE PARAMETERS:
231 */
232
233 /*
234 ** AIM: Adaptive Interrupt Moderation
235 ** which means that the interrupt rate
236 ** is varied over time based on the
237 ** traffic for that interrupt vector
238 */
239 static int ixgbe_enable_aim = TRUE;
240 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
241
242 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
243 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
244
245 /* How many packets rxeof tries to clean at a time */
246 static int ixgbe_rx_process_limit = 128;
247 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
248
249 /*
250 ** Smart speed setting, default to on
251 ** this only works as a compile option
252 ** right now as its during attach, set
253 ** this to 'ixgbe_smart_speed_off' to
254 ** disable.
255 */
256 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
257
258 static int ixgbe_msi_enable = 1;
259 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
260
261 /*
262  * MSIX should be the default for best performance,
263  * but this allows it to be forced off for testing.
264  */
265 static int ixgbe_enable_msix = 1;
266 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
267
268 /*
269  * Header split: this causes the hardware to DMA
270  * the header into a separate mbuf from the payload,
271  * it can be a performance win in some workloads, but
272  * in others it actually hurts, its off by default. 
273  */
274 static int ixgbe_header_split = FALSE;
275 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
276
277 /*
278  * Number of Queues, can be set to 0,
279  * it then autoconfigures based on the
280  * number of cpus with a max of 8. This
281  * can be overriden manually here.
282  */
283 static int ixgbe_num_queues = 0;
284 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
285
286 /*
287 ** Number of TX descriptors per ring,
288 ** setting higher than RX as this seems
289 ** the better performing choice.
290 */
291 static int ixgbe_txd = PERFORM_TXD;
292 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
293
294 /* Number of RX descriptors per ring */
295 static int ixgbe_rxd = PERFORM_RXD;
296 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
297
298 /* Keep running tab on them for sanity check */
299 static int ixgbe_total_ports;
300
301 #ifdef IXGBE_FDIR
302 /*
303 ** For Flow Director: this is the
304 ** number of TX packets we sample
305 ** for the filter pool, this means
306 ** every 20th packet will be probed.
307 **
308 ** This feature can be disabled by 
309 ** setting this to 0.
310 */
311 static int atr_sample_rate = 20;
312 /* 
313 ** Flow Director actually 'steals'
314 ** part of the packet buffer as its
315 ** filter pool, this variable controls
316 ** how much it uses:
317 **  0 = 64K, 1 = 128K, 2 = 256K
318 */
319 static int fdir_pballoc = 1;
320 #endif
321
322 #ifdef DEV_NETMAP
323 /*
324  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
325  * be a reference on how to implement netmap support in a driver.
326  * Additional comments are in ixgbe_netmap.h .
327  *
328  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
329  * that extend the standard driver.
330  */
331 #include <dev/netmap/ixgbe_netmap.h>
332 #endif /* DEV_NETMAP */
333
334 /*********************************************************************
335  *  Device identification routine
336  *
337  *  ixgbe_probe determines if the driver should be loaded on
338  *  adapter based on PCI vendor/device id of the adapter.
339  *
340  *  return BUS_PROBE_DEFAULT on success, positive on failure
341  *********************************************************************/
342
343 static int
344 ixgbe_probe(device_t dev)
345 {
346         ixgbe_vendor_info_t *ent;
347
348         u16     pci_vendor_id = 0;
349         u16     pci_device_id = 0;
350         u16     pci_subvendor_id = 0;
351         u16     pci_subdevice_id = 0;
352         char    adapter_name[256];
353
354         INIT_DEBUGOUT("ixgbe_probe: begin");
355
356         pci_vendor_id = pci_get_vendor(dev);
357         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
358                 return (ENXIO);
359
360         pci_device_id = pci_get_device(dev);
361         pci_subvendor_id = pci_get_subvendor(dev);
362         pci_subdevice_id = pci_get_subdevice(dev);
363
364         ent = ixgbe_vendor_info_array;
365         while (ent->vendor_id != 0) {
366                 if ((pci_vendor_id == ent->vendor_id) &&
367                     (pci_device_id == ent->device_id) &&
368
369                     ((pci_subvendor_id == ent->subvendor_id) ||
370                      (ent->subvendor_id == 0)) &&
371
372                     ((pci_subdevice_id == ent->subdevice_id) ||
373                      (ent->subdevice_id == 0))) {
374                         ksprintf(adapter_name, "%s, Version - %s",
375                                 ixgbe_strings[ent->index],
376                                 ixgbe_driver_version);
377                         device_set_desc_copy(dev, adapter_name);
378                         ++ixgbe_total_ports;
379                         return (BUS_PROBE_DEFAULT);
380                 }
381                 ent++;
382         }
383         return (ENXIO);
384 }
385
386 /*********************************************************************
387  *  Device initialization routine
388  *
389  *  The attach entry point is called when the driver is being loaded.
390  *  This routine identifies the type of hardware, allocates all resources
391  *  and initializes the hardware.
392  *
393  *  return 0 on success, positive on failure
394  *********************************************************************/
395
396 static int
397 ixgbe_attach(device_t dev)
398 {
399         struct adapter *adapter;
400         struct ixgbe_hw *hw;
401         int             error = 0;
402         u16             csum;
403         u32             ctrl_ext;
404
405         INIT_DEBUGOUT("ixgbe_attach: begin");
406
407         if (resource_disabled("ixgbe", device_get_unit(dev))) {
408                 device_printf(dev, "Disabled by device hint\n");
409                 return (ENXIO);
410         }
411
412         /* Allocate, clear, and link in our adapter structure */
413         adapter = device_get_softc(dev);
414         adapter->dev = adapter->osdep.dev = dev;
415         hw = &adapter->hw;
416
417         /* Core Lock Init*/
418         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
419
420         /* Set up the timer callout */
421         callout_init_mp(&adapter->timer);
422
423         /* Determine hardware revision */
424         ixgbe_identify_hardware(adapter);
425
426         /* Enable bus mastering */
427         pci_enable_busmaster(dev);
428
429         /* Do base PCI setup - map BAR0 */
430         if (ixgbe_allocate_pci_resources(adapter)) {
431                 device_printf(dev, "Allocation of PCI resources failed\n");
432                 error = ENXIO;
433                 goto err_out;
434         }
435
436         /* Do descriptor calc and sanity checks */
437         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
438             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
439                 device_printf(dev, "TXD config issue, using default!\n");
440                 adapter->num_tx_desc = DEFAULT_TXD;
441         } else
442                 adapter->num_tx_desc = ixgbe_txd;
443
444         /*
445         ** With many RX rings it is easy to exceed the
446         ** system mbuf allocation. Tuning nmbclusters
447         ** can alleviate this.
448         */
449         if (nmbclusters > 0 ) {
450                 int s;
451                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
452                 if (s > nmbclusters) {
453                         device_printf(dev, "RX Descriptors exceed "
454                             "system mbuf max, using default instead!\n");
455                         ixgbe_rxd = DEFAULT_RXD;
456                 }
457         }
458
459         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
460             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
461                 device_printf(dev, "RXD config issue, using default!\n");
462                 adapter->num_rx_desc = DEFAULT_RXD;
463         } else
464                 adapter->num_rx_desc = ixgbe_rxd;
465
466         /* Allocate our TX/RX Queues */
467         if (ixgbe_allocate_queues(adapter)) {
468                 error = ENOMEM;
469                 goto err_out;
470         }
471
472         /* Allocate multicast array memory. */
473         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
474             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
475         if (adapter->mta == NULL) {
476                 device_printf(dev, "Can not allocate multicast setup array\n");
477                 error = ENOMEM;
478                 goto err_late;
479         }
480
481         /* Initialize the shared code */
482         error = ixgbe_init_shared_code(hw);
483         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
484                 /*
485                 ** No optics in this port, set up
486                 ** so the timer routine will probe 
487                 ** for later insertion.
488                 */
489                 adapter->sfp_probe = TRUE;
490                 error = 0;
491         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
492                 device_printf(dev,"Unsupported SFP+ module detected!\n");
493                 error = EIO;
494                 goto err_late;
495         } else if (error) {
496                 device_printf(dev,"Unable to initialize the shared code\n");
497                 error = EIO;
498                 goto err_late;
499         }
500
501         /* Make sure we have a good EEPROM before we read from it */
502         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
503                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
504                 error = EIO;
505                 goto err_late;
506         }
507
508         error = ixgbe_init_hw(hw);
509         switch (error) {
510         case IXGBE_ERR_EEPROM_VERSION:
511                 device_printf(dev, "This device is a pre-production adapter/"
512                     "LOM.  Please be aware there may be issues associated "
513                     "with your hardware.\n If you are experiencing problems "
514                     "please contact your Intel or hardware representative "
515                     "who provided you with this hardware.\n");
516                 break;
517         case IXGBE_ERR_SFP_NOT_SUPPORTED:
518                 device_printf(dev,"Unsupported SFP+ Module\n");
519                 error = EIO;
520                 device_printf(dev,"Hardware Initialization Failure\n");
521                 goto err_late;
522         case IXGBE_ERR_SFP_NOT_PRESENT:
523                 device_printf(dev,"No SFP+ Module found\n");
524                 /* falls thru */
525         default:
526                 break;
527         }
528
529         /* Detect and set physical type */
530         ixgbe_setup_optics(adapter);
531
532         if ((adapter->msix > 1) && (ixgbe_enable_msix))
533                 error = ixgbe_allocate_msix(adapter); 
534         else
535                 error = ixgbe_allocate_legacy(adapter); 
536         if (error) 
537                 goto err_late;
538
539         /* Setup OS specific network interface */
540         if (ixgbe_setup_interface(dev, adapter) != 0)
541                 goto err_late;
542
543         /* Add sysctl tree */
544         ixgbe_add_sysctl(adapter);
545
546         /* Initialize statistics */
547         ixgbe_update_stats_counters(adapter);
548
549         /* Register for VLAN events */
550         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
551             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
552         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
553             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
554
555         /* Print PCIE bus type/speed/width info */
556         ixgbe_get_bus_info(hw);
557         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
558             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
559             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
560             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
561             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
562             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
563             ("Unknown"));
564
565         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
566             (hw->bus.speed == ixgbe_bus_speed_2500)) {
567                 device_printf(dev, "PCI-Express bandwidth available"
568                     " for this card\n     is not sufficient for"
569                     " optimal performance.\n");
570                 device_printf(dev, "For optimal performance a x8 "
571                     "PCIE, or x4 PCIE 2 slot is required.\n");
572         }
573
574         /* let hardware know driver is loaded */
575         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
576         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
577         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
578
579         ixgbe_add_hw_stats(adapter);
580
581 #ifdef DEV_NETMAP
582         ixgbe_netmap_attach(adapter);
583 #endif /* DEV_NETMAP */
584         INIT_DEBUGOUT("ixgbe_attach: end");
585         return (0);
586 err_late:
587         ixgbe_free_transmit_structures(adapter);
588         ixgbe_free_receive_structures(adapter);
589 err_out:
590         if (adapter->ifp != NULL)
591                 if_free(adapter->ifp);
592         ixgbe_free_pci_resources(adapter);
593         kfree(adapter->mta, M_DEVBUF);
594         return (error);
595
596 }
597
598 /*********************************************************************
599  *  Device removal routine
600  *
601  *  The detach entry point is called when the driver is being removed.
602  *  This routine stops the adapter and deallocates all the resources
603  *  that were allocated for driver operation.
604  *
605  *  return 0 on success, positive on failure
606  *********************************************************************/
607
608 static int
609 ixgbe_detach(device_t dev)
610 {
611         struct adapter *adapter = device_get_softc(dev);
612         struct ix_queue *que = adapter->queues;
613         u32     ctrl_ext;
614
615         INIT_DEBUGOUT("ixgbe_detach: begin");
616
617         /* Make sure VLANS are not using driver */
618         if (adapter->ifp->if_vlantrunks != NULL) {
619                 device_printf(dev,"Vlan in use, detach first\n");
620                 return (EBUSY);
621         }
622
623         IXGBE_CORE_LOCK(adapter);
624         ixgbe_stop(adapter);
625         IXGBE_CORE_UNLOCK(adapter);
626
627         for (int i = 0; i < adapter->num_queues; i++, que++) {
628                 if (que->tq) {
629                         taskqueue_drain(que->tq, &que->que_task);
630                         taskqueue_free(que->tq);
631                 }
632         }
633
634         /* Drain the Link queue */
635         if (adapter->tq) {
636                 taskqueue_drain(adapter->tq, &adapter->link_task);
637                 taskqueue_drain(adapter->tq, &adapter->mod_task);
638                 taskqueue_drain(adapter->tq, &adapter->msf_task);
639 #ifdef IXGBE_FDIR
640                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
641 #endif
642                 taskqueue_free(adapter->tq);
643         }
644
645         /* let hardware know driver is unloading */
646         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
647         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
648         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
649
650         /* Unregister VLAN events */
651         if (adapter->vlan_attach != NULL)
652                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
653         if (adapter->vlan_detach != NULL)
654                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
655
656         ether_ifdetach(adapter->ifp);
657         callout_stop(&adapter->timer);
658 #ifdef DEV_NETMAP
659         netmap_detach(adapter->ifp);
660 #endif /* DEV_NETMAP */
661         ixgbe_free_pci_resources(adapter);
662         bus_generic_detach(dev);
663         if_free(adapter->ifp);
664
665         ixgbe_free_transmit_structures(adapter);
666         ixgbe_free_receive_structures(adapter);
667         kfree(adapter->mta, M_DEVBUF);
668         sysctl_ctx_free(&adapter->sysctl_ctx);
669         
670         IXGBE_CORE_LOCK_DESTROY(adapter);
671         return (0);
672 }
673
674 /*********************************************************************
675  *
676  *  Shutdown entry point
677  *
678  **********************************************************************/
679
680 static int
681 ixgbe_shutdown(device_t dev)
682 {
683         struct adapter *adapter = device_get_softc(dev);
684         IXGBE_CORE_LOCK(adapter);
685         ixgbe_stop(adapter);
686         IXGBE_CORE_UNLOCK(adapter);
687         return (0);
688 }
689
690
691 /*********************************************************************
692  *  Transmit entry point
693  *
694  *  ixgbe_start is called by the stack to initiate a transmit.
695  *  The driver will remain in this routine as long as there are
696  *  packets to transmit and transmit resources are available.
697  *  In case resources are not available stack is notified and
698  *  the packet is requeued.
699  **********************************************************************/
700
701 static void
702 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
703 {
704         struct mbuf    *m_head;
705         struct adapter *adapter = txr->adapter;
706
707         IXGBE_TX_LOCK_ASSERT(txr);
708
709         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
710                 return;
711         if (!adapter->link_active)
712                 return;
713
714         while (!ifq_is_empty(&ifp->if_snd)) {
715                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
716                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
717                         break;
718                 }
719
720                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
721                 if (m_head == NULL)
722                         break;
723
724                 if (ixgbe_xmit(txr, &m_head)) {
725 #if 0 /* XXX: prepend to an ALTQ queue ? */
726                         if (m_head != NULL)
727                                 IF_PREPEND(&ifp->if_snd, m_head);
728 #endif
729                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
730                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
731                         break;
732                 }
733                 /* Send a copy of the frame to the BPF listener */
734                 ETHER_BPF_MTAP(ifp, m_head);
735
736                 /* Set watchdog on */
737                 txr->watchdog_time = ticks;
738                 txr->queue_status = IXGBE_QUEUE_WORKING;
739
740         }
741         return;
742 }
743
744 /*
745  * Legacy TX start - called by the stack, this
746  * always uses the first tx ring, and should
747  * not be used with multiqueue tx enabled.
748  */
749 static void
750 ixgbe_start(struct ifnet *ifp)
751 {
752         struct adapter *adapter = ifp->if_softc;
753         struct tx_ring  *txr = adapter->tx_rings;
754
755         if (ifp->if_flags & IFF_RUNNING) {
756                 IXGBE_TX_LOCK(txr);
757                 ixgbe_start_locked(txr, ifp);
758                 IXGBE_TX_UNLOCK(txr);
759         }
760         return;
761 }
762
763 #if 0 /* __FreeBSD_version >= 800000 */
764 /*
765 ** Multiqueue Transmit driver
766 **
767 */
768 static int
769 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
770 {
771         struct adapter  *adapter = ifp->if_softc;
772         struct ix_queue *que;
773         struct tx_ring  *txr;
774         int             i = 0, err = 0;
775
776         /* Which queue to use */
777         if ((m->m_flags & M_FLOWID) != 0)
778                 i = m->m_pkthdr.flowid % adapter->num_queues;
779         else
780                 i = curcpu % adapter->num_queues;
781
782         txr = &adapter->tx_rings[i];
783         que = &adapter->queues[i];
784
785         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
786             IXGBE_TX_TRYLOCK(txr)) {
787                 err = ixgbe_mq_start_locked(ifp, txr, m);
788                 IXGBE_TX_UNLOCK(txr);
789         } else {
790                 err = drbr_enqueue(ifp, txr->br, m);
791                 taskqueue_enqueue(que->tq, &que->que_task);
792         }
793
794         return (err);
795 }
796
797 static int
798 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
799 {
800         struct adapter  *adapter = txr->adapter;
801         struct mbuf     *next;
802         int             enqueued, err = 0;
803
804         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
805             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
806             adapter->link_active == 0) {
807                 if (m != NULL)
808                         err = drbr_enqueue(ifp, txr->br, m);
809                 return (err);
810         }
811
812         enqueued = 0;
813         if (m == NULL) {
814                 next = drbr_dequeue(ifp, txr->br);
815         } else if (drbr_needs_enqueue(ifp, txr->br)) {
816                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
817                         return (err);
818                 next = drbr_dequeue(ifp, txr->br);
819         } else
820                 next = m;
821
822         /* Process the queue */
823         while (next != NULL) {
824                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
825                         if (next != NULL)
826                                 err = drbr_enqueue(ifp, txr->br, next);
827                         break;
828                 }
829                 enqueued++;
830                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
831                 /* Send a copy of the frame to the BPF listener */
832                 ETHER_BPF_MTAP(ifp, next);
833                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
834                         break;
835                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
836                         ixgbe_txeof(txr);
837                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
838                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
839                         break;
840                 }
841                 next = drbr_dequeue(ifp, txr->br);
842         }
843
844         if (enqueued > 0) {
845                 /* Set watchdog on */
846                 txr->queue_status |= IXGBE_QUEUE_WORKING;
847                 txr->watchdog_time = ticks;
848         }
849
850         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
851                 ixgbe_txeof(txr);
852
853         return (err);
854 }
855
856 /*
857 ** Flush all ring buffers
858 */
859 static void
860 ixgbe_qflush(struct ifnet *ifp)
861 {
862         struct adapter  *adapter = ifp->if_softc;
863         struct tx_ring  *txr = adapter->tx_rings;
864         struct mbuf     *m;
865
866         for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                 IXGBE_TX_LOCK(txr);
868                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
869                         m_freem(m);
870                 IXGBE_TX_UNLOCK(txr);
871         }
872         if_qflush(ifp);
873 }
874 #endif /* __FreeBSD_version >= 800000 */
875
876 /*********************************************************************
877  *  Ioctl entry point
878  *
879  *  ixgbe_ioctl is called when the user wants to configure the
880  *  interface.
881  *
882  *  return 0 on success, positive on failure
883  **********************************************************************/
884
885 static int
886 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct ifreq    *ifr = (struct ifreq *) data;
890 #if defined(INET) || defined(INET6)
891         struct ifaddr *ifa = (struct ifaddr *)data;
892         bool            avoid_reset = FALSE;
893 #endif
894         int             error = 0;
895
896         switch (command) {
897
898         case SIOCSIFADDR:
899 #ifdef INET
900                 if (ifa->ifa_addr->sa_family == AF_INET)
901                         avoid_reset = TRUE;
902 #endif
903 #ifdef INET6
904                 if (ifa->ifa_addr->sa_family == AF_INET6)
905                         avoid_reset = TRUE;
906 #endif
907 #if defined(INET) || defined(INET6)
908                 /*
909                 ** Calling init results in link renegotiation,
910                 ** so we avoid doing it when possible.
911                 */
912                 if (avoid_reset) {
913                         ifp->if_flags |= IFF_UP;
914                         if (!(ifp->if_flags & IFF_RUNNING))
915                                 ixgbe_init(adapter);
916                         if (!(ifp->if_flags & IFF_NOARP))
917                                 arp_ifinit(ifp, ifa);
918                 } else
919                         error = ether_ioctl(ifp, command, data);
920 #endif
921                 break;
922         case SIOCSIFMTU:
923                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
924                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
925                         error = EINVAL;
926                 } else {
927                         IXGBE_CORE_LOCK(adapter);
928                         ifp->if_mtu = ifr->ifr_mtu;
929                         adapter->max_frame_size =
930                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
931                         ixgbe_init_locked(adapter);
932                         IXGBE_CORE_UNLOCK(adapter);
933                 }
934                 break;
935         case SIOCSIFFLAGS:
936                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
937                 IXGBE_CORE_LOCK(adapter);
938                 if (ifp->if_flags & IFF_UP) {
939                         if ((ifp->if_flags & IFF_RUNNING)) {
940                                 if ((ifp->if_flags ^ adapter->if_flags) &
941                                     (IFF_PROMISC | IFF_ALLMULTI)) {
942                                         ixgbe_set_promisc(adapter);
943                                 }
944                         } else
945                                 ixgbe_init_locked(adapter);
946                 } else
947                         if (ifp->if_flags & IFF_RUNNING)
948                                 ixgbe_stop(adapter);
949                 adapter->if_flags = ifp->if_flags;
950                 IXGBE_CORE_UNLOCK(adapter);
951                 break;
952         case SIOCADDMULTI:
953         case SIOCDELMULTI:
954                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
955                 if (ifp->if_flags & IFF_RUNNING) {
956                         IXGBE_CORE_LOCK(adapter);
957                         ixgbe_disable_intr(adapter);
958                         ixgbe_set_multi(adapter);
959                         ixgbe_enable_intr(adapter);
960                         IXGBE_CORE_UNLOCK(adapter);
961                 }
962                 break;
963         case SIOCSIFMEDIA:
964         case SIOCGIFMEDIA:
965                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
966                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
967                 break;
968         case SIOCSIFCAP:
969         {
970                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
971                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
972                 if (mask & IFCAP_HWCSUM)
973                         ifp->if_capenable ^= IFCAP_HWCSUM;
974                 if (mask & IFCAP_TSO4)
975                         ifp->if_capenable ^= IFCAP_TSO4;
976                 if (mask & IFCAP_TSO6)
977                         ifp->if_capenable ^= IFCAP_TSO6;
978 #if 0 /* NET_LRO */
979                 if (mask & IFCAP_LRO)
980                         ifp->if_capenable ^= IFCAP_LRO;
981 #endif
982                 if (mask & IFCAP_VLAN_HWTAGGING)
983                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
984                 if (mask & IFCAP_VLAN_HWFILTER)
985                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
986 #if 0 /* NET_TSO */
987                 if (mask & IFCAP_VLAN_HWTSO)
988                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
989 #endif
990                 if (ifp->if_flags & IFF_RUNNING) {
991                         IXGBE_CORE_LOCK(adapter);
992                         ixgbe_init_locked(adapter);
993                         IXGBE_CORE_UNLOCK(adapter);
994                 }
995 #if 0
996                 VLAN_CAPABILITIES(ifp);
997 #endif
998                 break;
999         }
1000
1001         default:
1002                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1003                 error = ether_ioctl(ifp, command, data);
1004                 break;
1005         }
1006
1007         return (error);
1008 }
1009
1010 /*********************************************************************
1011  *  Init entry point
1012  *
1013  *  This routine is used in two ways. It is used by the stack as
1014  *  init entry point in network interface structure. It is also used
1015  *  by the driver as a hw/sw initialization routine to get to a
1016  *  consistent state.
1017  *
1018  *  return 0 on success, positive on failure
1019  **********************************************************************/
1020 #define IXGBE_MHADD_MFS_SHIFT 16
1021
1022 static void
1023 ixgbe_init_locked(struct adapter *adapter)
1024 {
1025         struct ifnet   *ifp = adapter->ifp;
1026         device_t        dev = adapter->dev;
1027         struct ixgbe_hw *hw = &adapter->hw;
1028         u32             k, txdctl, mhadd, gpie;
1029         u32             rxdctl, rxctrl;
1030
1031         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1032         INIT_DEBUGOUT("ixgbe_init: begin");
1033         hw->adapter_stopped = FALSE;
1034         ixgbe_stop_adapter(hw);
1035         callout_stop(&adapter->timer);
1036
1037         /* reprogram the RAR[0] in case user changed it. */
1038         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1039
1040         /* Get the latest mac address, User can use a LAA */
1041         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1042               IXGBE_ETH_LENGTH_OF_ADDRESS);
1043         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1044         hw->addr_ctrl.rar_used_count = 1;
1045
1046         /* Set the various hardware offload abilities */
1047         ifp->if_hwassist = 0;
1048         if (ifp->if_capenable & IFCAP_TSO)
1049                 ifp->if_hwassist |= CSUM_TSO;
1050         if (ifp->if_capenable & IFCAP_TXCSUM) {
1051                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1052 #if 0
1053                 if (hw->mac.type != ixgbe_mac_82598EB)
1054                         ifp->if_hwassist |= CSUM_SCTP;
1055 #endif
1056         }
1057
1058         /* Prepare transmit descriptors and buffers */
1059         if (ixgbe_setup_transmit_structures(adapter)) {
1060                 device_printf(dev,"Could not setup transmit structures\n");
1061                 ixgbe_stop(adapter);
1062                 return;
1063         }
1064
1065         ixgbe_init_hw(hw);
1066         ixgbe_initialize_transmit_units(adapter);
1067
1068         /* Setup Multicast table */
1069         ixgbe_set_multi(adapter);
1070
1071         /*
1072         ** Determine the correct mbuf pool
1073         ** for doing jumbo/headersplit
1074         */
1075         if (adapter->max_frame_size <= 2048)
1076                 adapter->rx_mbuf_sz = MCLBYTES;
1077         else if (adapter->max_frame_size <= 4096)
1078                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1079         else if (adapter->max_frame_size <= 9216)
1080                 adapter->rx_mbuf_sz = MJUM9BYTES;
1081         else
1082                 adapter->rx_mbuf_sz = MJUM16BYTES;
1083
1084         /* Prepare receive descriptors and buffers */
1085         if (ixgbe_setup_receive_structures(adapter)) {
1086                 device_printf(dev,"Could not setup receive structures\n");
1087                 ixgbe_stop(adapter);
1088                 return;
1089         }
1090
1091         /* Configure RX settings */
1092         ixgbe_initialize_receive_units(adapter);
1093
1094         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1095
1096         /* Enable Fan Failure Interrupt */
1097         gpie |= IXGBE_SDP1_GPIEN;
1098
1099         /* Add for Module detection */
1100         if (hw->mac.type == ixgbe_mac_82599EB)
1101                 gpie |= IXGBE_SDP2_GPIEN;
1102
1103         /* Thermal Failure Detection */
1104         if (hw->mac.type == ixgbe_mac_X540)
1105                 gpie |= IXGBE_SDP0_GPIEN;
1106
1107         if (adapter->msix > 1) {
1108                 /* Enable Enhanced MSIX mode */
1109                 gpie |= IXGBE_GPIE_MSIX_MODE;
1110                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1111                     IXGBE_GPIE_OCD;
1112         }
1113         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1114
1115         /* Set MTU size */
1116         if (ifp->if_mtu > ETHERMTU) {
1117                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1118                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1119                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1120                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1121         }
1122         
1123         /* Now enable all the queues */
1124
1125         for (int i = 0; i < adapter->num_queues; i++) {
1126                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1127                 txdctl |= IXGBE_TXDCTL_ENABLE;
1128                 /* Set WTHRESH to 8, burst writeback */
1129                 txdctl |= (8 << 16);
1130                 /*
1131                  * When the internal queue falls below PTHRESH (32),
1132                  * start prefetching as long as there are at least
1133                  * HTHRESH (1) buffers ready. The values are taken
1134                  * from the Intel linux driver 3.8.21.
1135                  * Prefetching enables tx line rate even with 1 queue.
1136                  */
1137                 txdctl |= (32 << 0) | (1 << 8);
1138                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1139         }
1140
1141         for (int i = 0; i < adapter->num_queues; i++) {
1142                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1143                 if (hw->mac.type == ixgbe_mac_82598EB) {
1144                         /*
1145                         ** PTHRESH = 21
1146                         ** HTHRESH = 4
1147                         ** WTHRESH = 8
1148                         */
1149                         rxdctl &= ~0x3FFFFF;
1150                         rxdctl |= 0x080420;
1151                 }
1152                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1153                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1154                 for (k = 0; k < 10; k++) {
1155                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1156                             IXGBE_RXDCTL_ENABLE)
1157                                 break;
1158                         else
1159                                 msec_delay(1);
1160                 }
1161                 wmb();
1162 #ifdef DEV_NETMAP
1163                 /*
1164                  * In netmap mode, we must preserve the buffers made
1165                  * available to userspace before the if_init()
1166                  * (this is true by default on the TX side, because
1167                  * init makes all buffers available to userspace).
1168                  *
1169                  * netmap_reset() and the device specific routines
1170                  * (e.g. ixgbe_setup_receive_rings()) map these
1171                  * buffers at the end of the NIC ring, so here we
1172                  * must set the RDT (tail) register to make sure
1173                  * they are not overwritten.
1174                  *
1175                  * In this driver the NIC ring starts at RDH = 0,
1176                  * RDT points to the last slot available for reception (?),
1177                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1178                  */
1179                 if (ifp->if_capenable & IFCAP_NETMAP) {
1180                         struct netmap_adapter *na = NA(adapter->ifp);
1181                         struct netmap_kring *kring = &na->rx_rings[i];
1182                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1183
1184                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1185                 } else
1186 #endif /* DEV_NETMAP */
1187                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1188         }
1189
1190         /* Set up VLAN support and filter */
1191         ixgbe_setup_vlan_hw_support(adapter);
1192
1193         /* Enable Receive engine */
1194         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1195         if (hw->mac.type == ixgbe_mac_82598EB)
1196                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1197         rxctrl |= IXGBE_RXCTRL_RXEN;
1198         ixgbe_enable_rx_dma(hw, rxctrl);
1199
1200         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1201
1202         /* Set up MSI/X routing */
1203         if (ixgbe_enable_msix)  {
1204                 ixgbe_configure_ivars(adapter);
1205                 /* Set up auto-mask */
1206                 if (hw->mac.type == ixgbe_mac_82598EB)
1207                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1208                 else {
1209                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1210                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1211                 }
1212         } else {  /* Simple settings for Legacy/MSI */
1213                 ixgbe_set_ivar(adapter, 0, 0, 0);
1214                 ixgbe_set_ivar(adapter, 0, 0, 1);
1215                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1216         }
1217
1218 #ifdef IXGBE_FDIR
1219         /* Init Flow director */
1220         if (hw->mac.type != ixgbe_mac_82598EB) {
1221                 u32 hdrm = 32 << fdir_pballoc;
1222
1223                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1224                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1225         }
1226 #endif
1227
1228         /*
1229         ** Check on any SFP devices that
1230         ** need to be kick-started
1231         */
1232         if (hw->phy.type == ixgbe_phy_none) {
1233                 int err = hw->phy.ops.identify(hw);
1234                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1235                         device_printf(dev,
1236                             "Unsupported SFP+ module type was detected.\n");
1237                         return;
1238                 }
1239         }
1240
1241         /* Set moderation on the Link interrupt */
1242         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1243
1244         /* Config/Enable Link */
1245         ixgbe_config_link(adapter);
1246
1247         /* Hardware Packet Buffer & Flow Control setup */
1248         {
1249                 u32 rxpb, frame, size, tmp;
1250
1251                 frame = adapter->max_frame_size;
1252
1253                 /* Calculate High Water */
1254                 if (hw->mac.type == ixgbe_mac_X540)
1255                         tmp = IXGBE_DV_X540(frame, frame);
1256                 else
1257                         tmp = IXGBE_DV(frame, frame);
1258                 size = IXGBE_BT2KB(tmp);
1259                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1260                 hw->fc.high_water[0] = rxpb - size;
1261
1262                 /* Now calculate Low Water */
1263                 if (hw->mac.type == ixgbe_mac_X540)
1264                         tmp = IXGBE_LOW_DV_X540(frame);
1265                 else
1266                         tmp = IXGBE_LOW_DV(frame);
1267                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1268                 
1269                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1270                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1271                 hw->fc.send_xon = TRUE;
1272         }
1273         /* Initialize the FC settings */
1274         ixgbe_start_hw(hw);
1275
1276         /* And now turn on interrupts */
1277         ixgbe_enable_intr(adapter);
1278
1279         /* Now inform the stack we're ready */
1280         ifp->if_flags |= IFF_RUNNING;
1281         ifp->if_flags &= ~IFF_OACTIVE;
1282
1283         return;
1284 }
1285
1286 static void
1287 ixgbe_init(void *arg)
1288 {
1289         struct adapter *adapter = arg;
1290
1291         IXGBE_CORE_LOCK(adapter);
1292         ixgbe_init_locked(adapter);
1293         IXGBE_CORE_UNLOCK(adapter);
1294         return;
1295 }
1296
1297
1298 /*
1299 **
1300 ** MSIX Interrupt Handlers and Tasklets
1301 **
1302 */
1303
1304 static inline void
1305 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1306 {
1307         struct ixgbe_hw *hw = &adapter->hw;
1308         u64     queue = (u64)(1 << vector);
1309         u32     mask;
1310
1311         if (hw->mac.type == ixgbe_mac_82598EB) {
1312                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1313                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1314         } else {
1315                 mask = (queue & 0xFFFFFFFF);
1316                 if (mask)
1317                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1318                 mask = (queue >> 32);
1319                 if (mask)
1320                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1321         }
1322 }
1323
1324 static inline void
1325 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1326 {
1327         struct ixgbe_hw *hw = &adapter->hw;
1328         u64     queue = (u64)(1 << vector);
1329         u32     mask;
1330
1331         if (hw->mac.type == ixgbe_mac_82598EB) {
1332                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1333                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1334         } else {
1335                 mask = (queue & 0xFFFFFFFF);
1336                 if (mask)
1337                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1338                 mask = (queue >> 32);
1339                 if (mask)
1340                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1341         }
1342 }
1343
1344 static inline void
1345 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1346 {
1347         u32 mask;
1348
1349         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1350                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1351                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1352         } else {
1353                 mask = (queues & 0xFFFFFFFF);
1354                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1355                 mask = (queues >> 32);
1356                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1357         }
1358 }
1359
1360
1361 static void
1362 ixgbe_handle_que(void *context, int pending)
1363 {
1364         struct ix_queue *que = context;
1365         struct adapter  *adapter = que->adapter;
1366         struct tx_ring  *txr = que->txr;
1367         struct ifnet    *ifp = adapter->ifp;
1368         bool            more;
1369
1370         if (ifp->if_flags & IFF_RUNNING) {
1371                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1372                 IXGBE_TX_LOCK(txr);
1373                 ixgbe_txeof(txr);
1374 #if 0 /*__FreeBSD_version >= 800000*/
1375                 if (!drbr_empty(ifp, txr->br))
1376                         ixgbe_mq_start_locked(ifp, txr, NULL);
1377 #else
1378                 if (!ifq_is_empty(&ifp->if_snd))
1379                         ixgbe_start_locked(txr, ifp);
1380 #endif
1381                 IXGBE_TX_UNLOCK(txr);
1382                 if (more) {
1383                         taskqueue_enqueue(que->tq, &que->que_task);
1384                         return;
1385                 }
1386         }
1387
1388         /* Reenable this interrupt */
1389         ixgbe_enable_queue(adapter, que->msix);
1390         return;
1391 }
1392
1393
1394 /*********************************************************************
1395  *
1396  *  Legacy Interrupt Service routine
1397  *
1398  **********************************************************************/
1399
1400 static void
1401 ixgbe_legacy_irq(void *arg)
1402 {
1403         struct ix_queue *que = arg;
1404         struct adapter  *adapter = que->adapter;
1405         struct ixgbe_hw *hw = &adapter->hw;
1406         struct          tx_ring *txr = adapter->tx_rings;
1407         bool            more_tx, more_rx;
1408         u32             reg_eicr, loop = MAX_LOOP;
1409
1410
1411         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1412
1413         ++que->irqs;
1414         if (reg_eicr == 0) {
1415                 ixgbe_enable_intr(adapter);
1416                 return;
1417         }
1418
1419         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1420
1421         IXGBE_TX_LOCK(txr);
1422         do {
1423                 more_tx = ixgbe_txeof(txr);
1424         } while (loop-- && more_tx);
1425         IXGBE_TX_UNLOCK(txr);
1426
1427         if (more_rx || more_tx)
1428                 taskqueue_enqueue(que->tq, &que->que_task);
1429
1430         /* Check for fan failure */
1431         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1432             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1433                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1434                     "REPLACE IMMEDIATELY!!\n");
1435                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1436         }
1437
1438         /* Link status change */
1439         if (reg_eicr & IXGBE_EICR_LSC)
1440                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1441
1442         ixgbe_enable_intr(adapter);
1443         return;
1444 }
1445
1446
1447 /*********************************************************************
1448  *
1449  *  MSIX Queue Interrupt Service routine
1450  *
1451  **********************************************************************/
1452 void
1453 ixgbe_msix_que(void *arg)
1454 {
1455         struct ix_queue *que = arg;
1456         struct adapter  *adapter = que->adapter;
1457         struct tx_ring  *txr = que->txr;
1458         struct rx_ring  *rxr = que->rxr;
1459         bool            more_tx, more_rx;
1460         u32             newitr = 0;
1461
1462         ixgbe_disable_queue(adapter, que->msix);
1463         ++que->irqs;
1464
1465         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1466
1467         IXGBE_TX_LOCK(txr);
1468         more_tx = ixgbe_txeof(txr);
1469         /*
1470         ** Make certain that if the stack 
1471         ** has anything queued the task gets
1472         ** scheduled to handle it.
1473         */
1474 #if 0
1475 #if __FreeBSD_version < 800000
1476         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1477 #else
1478         if (!drbr_empty(adapter->ifp, txr->br))
1479 #endif
1480 #endif
1481         if (!ifq_is_empty(&adapter->ifp->if_snd))
1482                 more_tx = 1;
1483         IXGBE_TX_UNLOCK(txr);
1484
1485         /* Do AIM now? */
1486
1487         if (ixgbe_enable_aim == FALSE)
1488                 goto no_calc;
1489         /*
1490         ** Do Adaptive Interrupt Moderation:
1491         **  - Write out last calculated setting
1492         **  - Calculate based on average size over
1493         **    the last interval.
1494         */
1495         if (que->eitr_setting)
1496                 IXGBE_WRITE_REG(&adapter->hw,
1497                     IXGBE_EITR(que->msix), que->eitr_setting);
1498  
1499         que->eitr_setting = 0;
1500
1501         /* Idle, do nothing */
1502         if ((txr->bytes == 0) && (rxr->bytes == 0))
1503                 goto no_calc;
1504                                 
1505         if ((txr->bytes) && (txr->packets))
1506                 newitr = txr->bytes/txr->packets;
1507         if ((rxr->bytes) && (rxr->packets))
1508                 newitr = max(newitr,
1509                     (rxr->bytes / rxr->packets));
1510         newitr += 24; /* account for hardware frame, crc */
1511
1512         /* set an upper boundary */
1513         newitr = min(newitr, 3000);
1514
1515         /* Be nice to the mid range */
1516         if ((newitr > 300) && (newitr < 1200))
1517                 newitr = (newitr / 3);
1518         else
1519                 newitr = (newitr / 2);
1520
1521         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1522                 newitr |= newitr << 16;
1523         else
1524                 newitr |= IXGBE_EITR_CNT_WDIS;
1525                  
1526         /* save for next interrupt */
1527         que->eitr_setting = newitr;
1528
1529         /* Reset state */
1530         txr->bytes = 0;
1531         txr->packets = 0;
1532         rxr->bytes = 0;
1533         rxr->packets = 0;
1534
1535 no_calc:
1536         if (more_tx || more_rx)
1537                 taskqueue_enqueue(que->tq, &que->que_task);
1538         else /* Reenable this interrupt */
1539                 ixgbe_enable_queue(adapter, que->msix);
1540         return;
1541 }
1542
1543
1544 static void
1545 ixgbe_msix_link(void *arg)
1546 {
1547         struct adapter  *adapter = arg;
1548         struct ixgbe_hw *hw = &adapter->hw;
1549         u32             reg_eicr;
1550
1551         ++adapter->link_irq;
1552
1553         /* First get the cause */
1554         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1555         /* Clear interrupt with write */
1556         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1557
1558         /* Link status change */
1559         if (reg_eicr & IXGBE_EICR_LSC)
1560                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1561
1562         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1563 #ifdef IXGBE_FDIR
1564                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1565                         /* This is probably overkill :) */
1566                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1567                                 return;
1568                         /* Disable the interrupt */
1569                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1570                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1571                 } else
1572 #endif
1573                 if (reg_eicr & IXGBE_EICR_ECC) {
1574                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1575                             "Please Reboot!!\n");
1576                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1577                 } else
1578
1579                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1580                         /* Clear the interrupt */
1581                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1582                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1583                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1584                         /* Clear the interrupt */
1585                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1586                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1587                 }
1588         } 
1589
1590         /* Check for fan failure */
1591         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1592             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1593                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1594                     "REPLACE IMMEDIATELY!!\n");
1595                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1596         }
1597
1598         /* Check for over temp condition */
1599         if ((hw->mac.type == ixgbe_mac_X540) &&
1600             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1601                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1602                     "PHY IS SHUT DOWN!!\n");
1603                 device_printf(adapter->dev, "System shutdown required\n");
1604                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1605         }
1606
1607         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1608         return;
1609 }
1610
1611 /*********************************************************************
1612  *
1613  *  Media Ioctl callback
1614  *
1615  *  This routine is called whenever the user queries the status of
1616  *  the interface using ifconfig.
1617  *
1618  **********************************************************************/
1619 static void
1620 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1621 {
1622         struct adapter *adapter = ifp->if_softc;
1623
1624         INIT_DEBUGOUT("ixgbe_media_status: begin");
1625         IXGBE_CORE_LOCK(adapter);
1626         ixgbe_update_link_status(adapter);
1627
1628         ifmr->ifm_status = IFM_AVALID;
1629         ifmr->ifm_active = IFM_ETHER;
1630
1631         if (!adapter->link_active) {
1632                 IXGBE_CORE_UNLOCK(adapter);
1633                 return;
1634         }
1635
1636         ifmr->ifm_status |= IFM_ACTIVE;
1637
1638         switch (adapter->link_speed) {
1639                 case IXGBE_LINK_SPEED_100_FULL:
1640                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1641                         break;
1642                 case IXGBE_LINK_SPEED_1GB_FULL:
1643                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1644                         break;
1645                 case IXGBE_LINK_SPEED_10GB_FULL:
1646                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1647                         break;
1648         }
1649
1650         IXGBE_CORE_UNLOCK(adapter);
1651
1652         return;
1653 }
1654
1655 /*********************************************************************
1656  *
1657  *  Media Ioctl callback
1658  *
1659  *  This routine is called when the user changes speed/duplex using
1660  *  media/mediopt option with ifconfig.
1661  *
1662  **********************************************************************/
1663 static int
1664 ixgbe_media_change(struct ifnet * ifp)
1665 {
1666         struct adapter *adapter = ifp->if_softc;
1667         struct ifmedia *ifm = &adapter->media;
1668
1669         INIT_DEBUGOUT("ixgbe_media_change: begin");
1670
1671         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1672                 return (EINVAL);
1673
1674         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1675         case IFM_AUTO:
1676                 adapter->hw.phy.autoneg_advertised =
1677                     IXGBE_LINK_SPEED_100_FULL |
1678                     IXGBE_LINK_SPEED_1GB_FULL |
1679                     IXGBE_LINK_SPEED_10GB_FULL;
1680                 break;
1681         default:
1682                 device_printf(adapter->dev, "Only auto media type\n");
1683                 return (EINVAL);
1684         }
1685
1686         return (0);
1687 }
1688
1689 /*********************************************************************
1690  *
1691  *  This routine maps the mbufs to tx descriptors, allowing the
1692  *  TX engine to transmit the packets. 
1693  *      - return 0 on success, positive on failure
1694  *
1695  **********************************************************************/
1696
1697 static int
1698 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1699 {
1700         struct adapter  *adapter = txr->adapter;
1701         u32             olinfo_status = 0, cmd_type_len;
1702         u32             paylen = 0;
1703         int             i, j, error, nsegs, maxsegs;
1704         int             first, last = 0;
1705         struct mbuf     *m_head;
1706         bus_dma_segment_t segs[adapter->num_segs];
1707         bus_dmamap_t    map;
1708         struct ixgbe_tx_buf *txbuf;
1709         union ixgbe_adv_tx_desc *txd = NULL;
1710
1711         m_head = *m_headp;
1712
1713         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1714                 error = ixgbe_tso_pullup(txr, m_headp);
1715                 if (error)
1716                         return error;
1717                 m_head = *m_headp;
1718         }
1719
1720         /* Basic descriptor defines */
1721         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1722             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1723
1724         if (m_head->m_flags & M_VLANTAG)
1725                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1726
1727         /*
1728          * Important to capture the first descriptor
1729          * used because it will contain the index of
1730          * the one we tell the hardware to report back
1731          */
1732         first = txr->next_avail_desc;
1733         txbuf = &txr->tx_buffers[first];
1734         map = txbuf->map;
1735
1736         /*
1737          * Map the packet for DMA.
1738          */
1739         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1740         if (maxsegs > adapter->num_segs)
1741                 maxsegs = adapter->num_segs;
1742
1743         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1744             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1745         if (error) {
1746                 if (error == ENOBUFS)
1747                         adapter->mbuf_defrag_failed++;
1748                 else
1749                         adapter->no_tx_dma_setup++;
1750
1751                 m_freem(*m_headp);
1752                 *m_headp = NULL;
1753                 return (error);
1754         }
1755
1756         /* Make certain there are enough descriptors */
1757         if (nsegs > txr->tx_avail - 2) {
1758                 txr->no_desc_avail++;
1759                 error = ENOBUFS;
1760                 goto xmit_fail;
1761         }
1762         m_head = *m_headp;
1763
1764         /*
1765         ** Set up the appropriate offload context
1766         ** this becomes the first descriptor of 
1767         ** a packet.
1768         */
1769         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1770                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1771                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1772                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1773                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1774                         ++adapter->tso_tx;
1775                 } else
1776                         return (ENXIO);
1777         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1778                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1779
1780 #ifdef IXGBE_IEEE1588
1781         /* This is changing soon to an mtag detection */
1782         if (we detect this mbuf has a TSTAMP mtag)
1783                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1784 #endif
1785
1786 #ifdef IXGBE_FDIR
1787         /* Do the flow director magic */
1788         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1789                 ++txr->atr_count;
1790                 if (txr->atr_count >= atr_sample_rate) {
1791                         ixgbe_atr(txr, m_head);
1792                         txr->atr_count = 0;
1793                 }
1794         }
1795 #endif
1796         /* Record payload length */
1797         if (paylen == 0)
1798                 olinfo_status |= m_head->m_pkthdr.len <<
1799                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1800
1801         i = txr->next_avail_desc;
1802         for (j = 0; j < nsegs; j++) {
1803                 bus_size_t seglen;
1804                 bus_addr_t segaddr;
1805
1806                 txbuf = &txr->tx_buffers[i];
1807                 txd = &txr->tx_base[i];
1808                 seglen = segs[j].ds_len;
1809                 segaddr = htole64(segs[j].ds_addr);
1810
1811                 txd->read.buffer_addr = segaddr;
1812                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1813                     cmd_type_len |seglen);
1814                 txd->read.olinfo_status = htole32(olinfo_status);
1815                 last = i; /* descriptor that will get completion IRQ */
1816
1817                 if (++i == adapter->num_tx_desc)
1818                         i = 0;
1819
1820                 txbuf->m_head = NULL;
1821                 txbuf->eop_index = -1;
1822         }
1823
1824         txd->read.cmd_type_len |=
1825             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1826         txr->tx_avail -= nsegs;
1827         txr->next_avail_desc = i;
1828
1829         txbuf->m_head = m_head;
1830         /* Swap the dma map between the first and last descriptor */
1831         txr->tx_buffers[first].map = txbuf->map;
1832         txbuf->map = map;
1833         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1834
1835         /* Set the index of the descriptor that will be marked done */
1836         txbuf = &txr->tx_buffers[first];
1837         txbuf->eop_index = last;
1838
1839         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1840             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1841         /*
1842          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1843          * hardware that this frame is available to transmit.
1844          */
1845         ++txr->total_packets;
1846         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1847
1848         return (0);
1849
1850 xmit_fail:
1851         bus_dmamap_unload(txr->txtag, txbuf->map);
1852         return (error);
1853
1854 }
1855
1856 static void
1857 ixgbe_set_promisc(struct adapter *adapter)
1858 {
1859         u_int32_t       reg_rctl;
1860         struct ifnet   *ifp = adapter->ifp;
1861
1862         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1863         reg_rctl &= (~IXGBE_FCTRL_UPE);
1864         reg_rctl &= (~IXGBE_FCTRL_MPE);
1865         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1866
1867         if (ifp->if_flags & IFF_PROMISC) {
1868                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1869                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1870         } else if (ifp->if_flags & IFF_ALLMULTI) {
1871                 reg_rctl |= IXGBE_FCTRL_MPE;
1872                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1873                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1874         }
1875         return;
1876 }
1877
1878
1879 /*********************************************************************
1880  *  Multicast Update
1881  *
1882  *  This routine is called whenever multicast address list is updated.
1883  *
1884  **********************************************************************/
1885 #define IXGBE_RAR_ENTRIES 16
1886
1887 static void
1888 ixgbe_set_multi(struct adapter *adapter)
1889 {
1890         u32     fctrl;
1891         u8      *mta;
1892         u8      *update_ptr;
1893         struct  ifmultiaddr *ifma;
1894         int     mcnt = 0;
1895         struct ifnet   *ifp = adapter->ifp;
1896
1897         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1898
1899         mta = adapter->mta;
1900         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1901             MAX_NUM_MULTICAST_ADDRESSES);
1902
1903         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1904         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1905         if (ifp->if_flags & IFF_PROMISC)
1906                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1907         else if (ifp->if_flags & IFF_ALLMULTI) {
1908                 fctrl |= IXGBE_FCTRL_MPE;
1909                 fctrl &= ~IXGBE_FCTRL_UPE;
1910         } else
1911                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1912         
1913         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1914
1915         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1916                 if (ifma->ifma_addr->sa_family != AF_LINK)
1917                         continue;
1918                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1919                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1920                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1921                 mcnt++;
1922         }
1923
1924         update_ptr = mta;
1925         ixgbe_update_mc_addr_list(&adapter->hw,
1926             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1927
1928         return;
1929 }
1930
1931 /*
1932  * This is an iterator function now needed by the multicast
1933  * shared code. It simply feeds the shared code routine the
1934  * addresses in the array of ixgbe_set_multi() one by one.
1935  */
1936 static u8 *
1937 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1938 {
1939         u8 *addr = *update_ptr;
1940         u8 *newptr;
1941         *vmdq = 0;
1942
1943         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1944         *update_ptr = newptr;
1945         return addr;
1946 }
1947
1948
1949 /*********************************************************************
1950  *  Timer routine
1951  *
1952  *  This routine checks for link status,updates statistics,
1953  *  and runs the watchdog check.
1954  *
1955  **********************************************************************/
1956
1957 static void
1958 ixgbe_local_timer(void *arg)
1959 {
1960         struct adapter  *adapter = arg;
1961         device_t        dev = adapter->dev;
1962         struct ifnet    *ifp = adapter->ifp;
1963         struct ix_queue *que = adapter->queues;
1964         struct tx_ring  *txr = adapter->tx_rings;
1965         int             hung, busy, paused;
1966
1967         IXGBE_CORE_LOCK(adapter);
1968         hung = busy = paused = 0;
1969
1970         /* Check for pluggable optics */
1971         if (adapter->sfp_probe)
1972                 if (!ixgbe_sfp_probe(adapter))
1973                         goto out; /* Nothing to do */
1974
1975         ixgbe_update_link_status(adapter);
1976         ixgbe_update_stats_counters(adapter);
1977
1978         /*
1979          * If the interface has been paused
1980          * then don't do the watchdog check
1981          */
1982         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
1983                 paused = 1;
1984
1985         /*
1986         ** Check the TX queues status
1987         **      - central locked handling of OACTIVE
1988         **      - watchdog only if all queues show hung
1989         */          
1990         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
1991                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
1992                     (paused == 0))
1993                         ++hung;
1994                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
1995                         ++busy;
1996                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
1997                         taskqueue_enqueue(que->tq, &que->que_task);
1998         }
1999         /* Only truely watchdog if all queues show hung */
2000         if (hung == adapter->num_queues)
2001                 goto watchdog;
2002         /* Only turn off the stack flow when ALL are depleted */
2003         if (busy == adapter->num_queues)
2004                 ifp->if_flags |= IFF_OACTIVE;
2005         else if ((ifp->if_flags & IFF_OACTIVE) &&
2006             (busy < adapter->num_queues))
2007                 ifp->if_flags &= ~IFF_OACTIVE;
2008
2009 out:
2010         ixgbe_rearm_queues(adapter, adapter->que_mask);
2011         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2012         IXGBE_CORE_UNLOCK(adapter);
2013         return;
2014
2015 watchdog:
2016         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2017         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2018             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2019             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2020         device_printf(dev,"TX(%d) desc avail = %d,"
2021             "Next TX to Clean = %d\n",
2022             txr->me, txr->tx_avail, txr->next_to_clean);
2023         adapter->ifp->if_flags &= ~IFF_RUNNING;
2024         adapter->watchdog_events++;
2025         ixgbe_init_locked(adapter);
2026
2027         IXGBE_CORE_UNLOCK(adapter);
2028 }
2029
2030 /*
2031 ** Note: this routine updates the OS on the link state
2032 **      the real check of the hardware only happens with
2033 **      a link interrupt.
2034 */
2035 static void
2036 ixgbe_update_link_status(struct adapter *adapter)
2037 {
2038         struct ifnet    *ifp = adapter->ifp;
2039         struct tx_ring *txr = adapter->tx_rings;
2040         device_t dev = adapter->dev;
2041
2042
2043         if (adapter->link_up){ 
2044                 if (adapter->link_active == FALSE) {
2045                         if (bootverbose)
2046                                 device_printf(dev,"Link is up %d Gbps %s \n",
2047                                     ((adapter->link_speed == 128)? 10:1),
2048                                     "Full Duplex");
2049                         adapter->link_active = TRUE;
2050                         /* Update any Flow Control changes */
2051                         ixgbe_fc_enable(&adapter->hw);
2052                         ifp->if_link_state = LINK_STATE_UP;
2053                         if_link_state_change(ifp);
2054                 }
2055         } else { /* Link down */
2056                 if (adapter->link_active == TRUE) {
2057                         if (bootverbose)
2058                                 device_printf(dev,"Link is Down\n");
2059                         ifp->if_link_state = LINK_STATE_DOWN;
2060                         if_link_state_change(ifp);
2061                         adapter->link_active = FALSE;
2062                         for (int i = 0; i < adapter->num_queues;
2063                             i++, txr++)
2064                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2065                 }
2066         }
2067
2068         return;
2069 }
2070
2071
2072 /*********************************************************************
2073  *
2074  *  This routine disables all traffic on the adapter by issuing a
2075  *  global reset on the MAC and deallocates TX/RX buffers.
2076  *
2077  **********************************************************************/
2078
2079 static void
2080 ixgbe_stop(void *arg)
2081 {
2082         struct ifnet   *ifp;
2083         struct adapter *adapter = arg;
2084         struct ixgbe_hw *hw = &adapter->hw;
2085         ifp = adapter->ifp;
2086
2087         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2088
2089         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2090         ixgbe_disable_intr(adapter);
2091         callout_stop(&adapter->timer);
2092
2093         /* Let the stack know...*/
2094         ifp->if_flags &= ~IFF_RUNNING;
2095         ifp->if_flags |= IFF_OACTIVE;
2096
2097         ixgbe_reset_hw(hw);
2098         hw->adapter_stopped = FALSE;
2099         ixgbe_stop_adapter(hw);
2100         /* Turn off the laser */
2101         if (hw->phy.multispeed_fiber)
2102                 ixgbe_disable_tx_laser(hw);
2103
2104         /* reprogram the RAR[0] in case user changed it. */
2105         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2106
2107         return;
2108 }
2109
2110
2111 /*********************************************************************
2112  *
2113  *  Determine hardware revision.
2114  *
2115  **********************************************************************/
2116 static void
2117 ixgbe_identify_hardware(struct adapter *adapter)
2118 {
2119         device_t        dev = adapter->dev;
2120         struct ixgbe_hw *hw = &adapter->hw;
2121
2122         /* Save off the information about this board */
2123         hw->vendor_id = pci_get_vendor(dev);
2124         hw->device_id = pci_get_device(dev);
2125         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2126         hw->subsystem_vendor_id =
2127             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2128         hw->subsystem_device_id =
2129             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2130
2131         /* We need this here to set the num_segs below */
2132         ixgbe_set_mac_type(hw);
2133
2134         /* Pick up the 82599 and VF settings */
2135         if (hw->mac.type != ixgbe_mac_82598EB) {
2136                 hw->phy.smart_speed = ixgbe_smart_speed;
2137                 adapter->num_segs = IXGBE_82599_SCATTER;
2138         } else
2139                 adapter->num_segs = IXGBE_82598_SCATTER;
2140
2141         return;
2142 }
2143
2144 /*********************************************************************
2145  *
2146  *  Determine optic type
2147  *
2148  **********************************************************************/
2149 static void
2150 ixgbe_setup_optics(struct adapter *adapter)
2151 {
2152         struct ixgbe_hw *hw = &adapter->hw;
2153         int             layer;
2154         
2155         layer = ixgbe_get_supported_physical_layer(hw);
2156
2157         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2158                 adapter->optics = IFM_10G_T;
2159                 return;
2160         }
2161
2162         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2163                 adapter->optics = IFM_1000_T;
2164                 return;
2165         }
2166
2167         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2168             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2169                 adapter->optics = IFM_10G_LR;
2170                 return;
2171         }
2172
2173         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2174                 adapter->optics = IFM_10G_SR;
2175                 return;
2176         }
2177
2178         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2179                 adapter->optics = IFM_10G_TWINAX;
2180                 return;
2181         }
2182
2183         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2184             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2185                 adapter->optics = IFM_10G_CX4;
2186                 return;
2187         }
2188
2189         /* If we get here just set the default */
2190         adapter->optics = IFM_ETHER | IFM_AUTO;
2191         return;
2192 }
2193
2194 /*********************************************************************
2195  *
2196  *  Setup the Legacy or MSI Interrupt handler
2197  *
2198  **********************************************************************/
2199 static int
2200 ixgbe_allocate_legacy(struct adapter *adapter)
2201 {
2202         device_t dev = adapter->dev;
2203         struct          ix_queue *que = adapter->queues;
2204         int error, rid = 0;
2205         unsigned int intr_flags;
2206
2207         /* MSI RID at 1 */
2208         if (adapter->msix == 1)
2209                 rid = 1;
2210
2211         /* Try allocating a MSI interrupt first */
2212         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2213                 &rid, &intr_flags);
2214
2215         /* We allocate a single interrupt resource */
2216         adapter->res = bus_alloc_resource_any(dev,
2217             SYS_RES_IRQ, &rid, intr_flags);
2218         if (adapter->res == NULL) {
2219                 device_printf(dev, "Unable to allocate bus resource: "
2220                     "interrupt\n");
2221                 return (ENXIO);
2222         }
2223
2224         /*
2225          * Try allocating a fast interrupt and the associated deferred
2226          * processing contexts.
2227          */
2228         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2229         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2230             taskqueue_thread_enqueue, &que->tq);
2231         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2232             device_get_nameunit(adapter->dev));
2233
2234         /* Tasklets for Link, SFP and Multispeed Fiber */
2235         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2236         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2237         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2238 #ifdef IXGBE_FDIR
2239         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2240 #endif
2241         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2242             taskqueue_thread_enqueue, &adapter->tq);
2243         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2244             device_get_nameunit(adapter->dev));
2245
2246         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2247             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2248                 device_printf(dev, "Failed to register fast interrupt "
2249                     "handler: %d\n", error);
2250                 taskqueue_free(que->tq);
2251                 taskqueue_free(adapter->tq);
2252                 que->tq = NULL;
2253                 adapter->tq = NULL;
2254                 return (error);
2255         }
2256         /* For simplicity in the handlers */
2257         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2258
2259         return (0);
2260 }
2261
2262
2263 /*********************************************************************
2264  *
2265  *  Setup MSIX Interrupt resources and handlers 
2266  *
2267  **********************************************************************/
2268 static int
2269 ixgbe_allocate_msix(struct adapter *adapter)
2270 {
2271         device_t        dev = adapter->dev;
2272         struct          ix_queue *que = adapter->queues;
2273         int             error, rid, vector = 0;
2274         char            desc[16];
2275
2276         error = pci_setup_msix(dev);
2277         if (error) {
2278                 device_printf(dev, "MSI-X setup failed\n");
2279                 return (error);
2280         }
2281
2282         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2283                 rid = vector + 1;
2284
2285                 /*
2286                 ** Bind the msix vector, and thus the
2287                 ** ring to the corresponding cpu.
2288                 */
2289                 error = pci_alloc_msix_vector(dev, vector, &rid, i);
2290                 if (error) {
2291                         device_printf(dev, "pci_alloc_msix_vector failed\n");
2292                         return (error);
2293                 }
2294
2295                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2296                     RF_SHAREABLE | RF_ACTIVE);
2297                 if (que->res == NULL) {
2298                         device_printf(dev,"Unable to allocate"
2299                             " bus resource: que interrupt [%d]\n", vector);
2300                         return (ENXIO);
2301                 }
2302                 /* Set the handler function */
2303                 ksnprintf(desc, sizeof(desc), "%s que %d",
2304                     device_get_nameunit(dev), i);
2305                 error = bus_setup_intr_descr(dev, que->res, INTR_MPSAFE,
2306                     ixgbe_msix_que, que, &que->tag, &que->serializer, desc);
2307                 if (error) {
2308                         que->res = NULL;
2309                         device_printf(dev, "Failed to register QUE handler");
2310                         return (error);
2311                 }
2312                 que->msix = vector;
2313                 adapter->que_mask |= (u64)(1 << que->msix);
2314
2315                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2316                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2317                     taskqueue_thread_enqueue, &que->tq);
2318                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2319                     device_get_nameunit(adapter->dev));
2320         }
2321
2322         /* and Link, bind vector to cpu #0 */
2323         rid = vector + 1;
2324         error = pci_alloc_msix_vector(dev, vector, &rid, 0);
2325         if (error) {
2326                 device_printf(dev, "pci_alloc_msix_vector failed\n");
2327                 return (error);
2328         }
2329         adapter->res = bus_alloc_resource_any(dev,
2330             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2331         if (!adapter->res) {
2332                 device_printf(dev,"Unable to allocate"
2333             " bus resource: Link interrupt [%d]\n", rid);
2334                 return (ENXIO);
2335         }
2336         /* Set the link handler function */
2337         error = bus_setup_intr_descr(dev, adapter->res, INTR_MPSAFE,
2338             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer,
2339             "link");
2340         if (error) {
2341                 adapter->res = NULL;
2342                 device_printf(dev, "Failed to register LINK handler");
2343                 return (error);
2344         }
2345         pci_enable_msix(dev);
2346
2347         adapter->linkvec = vector;
2348         /* Tasklets for Link, SFP and Multispeed Fiber */
2349         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2350         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2351         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2352 #ifdef IXGBE_FDIR
2353         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2354 #endif
2355         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2356             taskqueue_thread_enqueue, &adapter->tq);
2357         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2358             device_get_nameunit(adapter->dev));
2359
2360         return (0);
2361 }
2362
2363 /*
2364  * Setup Either MSI/X or MSI
2365  */
2366 static int
2367 ixgbe_setup_msix(struct adapter *adapter)
2368 {
2369         device_t dev = adapter->dev;
2370         int rid, want, queues, msgs;
2371
2372         /* Override by tuneable */
2373         if (ixgbe_enable_msix == 0)
2374                 goto msi;
2375
2376         /* First try MSI/X */
2377         rid = PCIR_BAR(MSIX_82598_BAR);
2378         adapter->msix_mem = bus_alloc_resource_any(dev,
2379             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2380         if (!adapter->msix_mem) {
2381                 rid += 4;       /* 82599 maps in higher BAR */
2382                 adapter->msix_mem = bus_alloc_resource_any(dev,
2383                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2384         }
2385         if (!adapter->msix_mem) {
2386                 /* May not be enabled */
2387                 device_printf(adapter->dev,
2388                     "Unable to map MSIX table \n");
2389                 goto msi;
2390         }
2391
2392         msgs = pci_msix_count(dev); 
2393         if (msgs == 0) { /* system has msix disabled */
2394                 bus_release_resource(dev, SYS_RES_MEMORY,
2395                     rid, adapter->msix_mem);
2396                 adapter->msix_mem = NULL;
2397                 goto msi;
2398         }
2399
2400         /* Figure out a reasonable auto config value */
2401         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2402
2403         if (ixgbe_num_queues != 0)
2404                 queues = ixgbe_num_queues;
2405         /* Set max queues to 8 when autoconfiguring */
2406         else if ((ixgbe_num_queues == 0) && (queues > 8))
2407                 queues = 8;
2408
2409         /*
2410         ** Want one vector (RX/TX pair) per queue
2411         ** plus an additional for Link.
2412         */
2413         want = queues + 1;
2414         if (msgs >= want)
2415                 msgs = want;
2416         else {
2417                 device_printf(adapter->dev,
2418                     "MSIX Configuration Problem, "
2419                     "%d vectors but %d queues wanted!\n",
2420                     msgs, want);
2421                 return (0); /* Will go to Legacy setup */
2422         }
2423         if (msgs) {
2424                 device_printf(adapter->dev,
2425                     "Using MSIX interrupts with %d vectors\n", msgs);
2426                 adapter->num_queues = queues;
2427                 return (msgs);
2428         }
2429 msi:
2430         msgs = pci_msi_count(dev);
2431         return (msgs);
2432 }
2433
2434
2435 static int
2436 ixgbe_allocate_pci_resources(struct adapter *adapter)
2437 {
2438         int             rid;
2439         device_t        dev = adapter->dev;
2440
2441         rid = PCIR_BAR(0);
2442         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2443             &rid, RF_ACTIVE);
2444
2445         if (!(adapter->pci_mem)) {
2446                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2447                 return (ENXIO);
2448         }
2449
2450         adapter->osdep.mem_bus_space_tag =
2451                 rman_get_bustag(adapter->pci_mem);
2452         adapter->osdep.mem_bus_space_handle =
2453                 rman_get_bushandle(adapter->pci_mem);
2454         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2455
2456         /* Legacy defaults */
2457         adapter->num_queues = 1;
2458         adapter->hw.back = &adapter->osdep;
2459
2460         /*
2461         ** Now setup MSI or MSI/X, should
2462         ** return us the number of supported
2463         ** vectors. (Will be 1 for MSI)
2464         */
2465         adapter->msix = ixgbe_setup_msix(adapter);
2466         return (0);
2467 }
2468
2469 static void
2470 ixgbe_free_pci_resources(struct adapter * adapter)
2471 {
2472         struct          ix_queue *que = adapter->queues;
2473         device_t        dev = adapter->dev;
2474         int             rid, memrid;
2475
2476         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2477                 memrid = PCIR_BAR(MSIX_82598_BAR);
2478         else
2479                 memrid = PCIR_BAR(MSIX_82599_BAR);
2480
2481         /*
2482         ** There is a slight possibility of a failure mode
2483         ** in attach that will result in entering this function
2484         ** before interrupt resources have been initialized, and
2485         ** in that case we do not want to execute the loops below
2486         ** We can detect this reliably by the state of the adapter
2487         ** res pointer.
2488         */
2489         if (adapter->res == NULL)
2490                 goto mem;
2491
2492         /*
2493         **  Release all msix queue resources:
2494         */
2495         for (int i = 0; i < adapter->num_queues; i++, que++) {
2496                 rid = que->msix + 1;
2497                 if (que->tag != NULL) {
2498                         bus_teardown_intr(dev, que->res, que->tag);
2499                         que->tag = NULL;
2500                 }
2501                 if (que->res != NULL)
2502                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2503         }
2504
2505
2506         /* Clean the Legacy or Link interrupt last */
2507         if (adapter->linkvec) /* we are doing MSIX */
2508                 rid = adapter->linkvec + 1;
2509         else
2510                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2511
2512         if (adapter->tag != NULL) {
2513                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2514                 adapter->tag = NULL;
2515         }
2516         if (adapter->res != NULL)
2517                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2518         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2519                 pci_release_msi(adapter->dev);
2520
2521 mem:
2522         if (adapter->msix)
2523                 pci_release_msi(dev);
2524
2525         if (adapter->msix_mem != NULL)
2526                 bus_release_resource(dev, SYS_RES_MEMORY,
2527                     memrid, adapter->msix_mem);
2528
2529         if (adapter->pci_mem != NULL)
2530                 bus_release_resource(dev, SYS_RES_MEMORY,
2531                     PCIR_BAR(0), adapter->pci_mem);
2532
2533         return;
2534 }
2535
2536 /*********************************************************************
2537  *
2538  *  Setup networking device structure and register an interface.
2539  *
2540  **********************************************************************/
2541 static int
2542 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2543 {
2544         struct ixgbe_hw *hw = &adapter->hw;
2545         struct ifnet   *ifp;
2546
2547         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2548
2549         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2550         if (ifp == NULL) {
2551                 device_printf(dev, "can not allocate ifnet structure\n");
2552                 return (-1);
2553         }
2554         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2555         ifp->if_baudrate = 1000000000;
2556         ifp->if_init = ixgbe_init;
2557         ifp->if_softc = adapter;
2558         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2559         ifp->if_ioctl = ixgbe_ioctl;
2560         ifp->if_start = ixgbe_start;
2561 #if 0 /* __FreeBSD_version >= 800000 */
2562         ifp->if_transmit = ixgbe_mq_start;
2563         ifp->if_qflush = ixgbe_qflush;
2564 #endif
2565         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2566
2567         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2568
2569         adapter->max_frame_size =
2570             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2571
2572         /*
2573          * Tell the upper layer(s) we support long frames.
2574          */
2575         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2576
2577         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2578         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2579         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2580 #if 0 /* NET_TSO */
2581                              |  IFCAP_VLAN_HWTSO
2582 #endif
2583                              |  IFCAP_VLAN_MTU;
2584         ifp->if_capenable = ifp->if_capabilities;
2585
2586         /* Don't enable LRO by default */
2587 #if 0 /* NET_LRO */
2588         ifp->if_capabilities |= IFCAP_LRO;
2589 #endif
2590
2591         /*
2592         ** Don't turn this on by default, if vlans are
2593         ** created on another pseudo device (eg. lagg)
2594         ** then vlan events are not passed thru, breaking
2595         ** operation, but with HW FILTER off it works. If
2596         ** using vlans directly on the ixgbe driver you can
2597         ** enable this and get full hardware tag filtering.
2598         */
2599         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2600
2601         /*
2602          * Specify the media types supported by this adapter and register
2603          * callbacks to update media and link information
2604          */
2605         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2606                      ixgbe_media_status);
2607         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2608         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2609         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2610                 ifmedia_add(&adapter->media,
2611                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2612                 ifmedia_add(&adapter->media,
2613                     IFM_ETHER | IFM_1000_T, 0, NULL);
2614         }
2615         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2616         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2617
2618         return (0);
2619 }
2620
2621 static void
2622 ixgbe_config_link(struct adapter *adapter)
2623 {
2624         struct ixgbe_hw *hw = &adapter->hw;
2625         u32     autoneg, err = 0;
2626         bool    sfp, negotiate;
2627
2628         sfp = ixgbe_is_sfp(hw);
2629
2630         if (sfp) { 
2631                 if (hw->phy.multispeed_fiber) {
2632                         hw->mac.ops.setup_sfp(hw);
2633                         ixgbe_enable_tx_laser(hw);
2634                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2635                 } else
2636                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2637         } else {
2638                 if (hw->mac.ops.check_link)
2639                         err = ixgbe_check_link(hw, &autoneg,
2640                             &adapter->link_up, FALSE);
2641                 if (err)
2642                         goto out;
2643                 autoneg = hw->phy.autoneg_advertised;
2644                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2645                         err  = hw->mac.ops.get_link_capabilities(hw,
2646                             &autoneg, &negotiate);
2647                 if (err)
2648                         goto out;
2649                 if (hw->mac.ops.setup_link)
2650                         err = hw->mac.ops.setup_link(hw, autoneg,
2651                             negotiate, adapter->link_up);
2652         }
2653 out:
2654         return;
2655 }
2656
2657 /********************************************************************
2658  * Manage DMA'able memory.
2659  *******************************************************************/
2660 static void
2661 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2662 {
2663         if (error)
2664                 return;
2665         *(bus_addr_t *) arg = segs->ds_addr;
2666         return;
2667 }
2668
2669 static int
2670 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2671                 struct ixgbe_dma_alloc *dma, int mapflags)
2672 {
2673         device_t dev = adapter->dev;
2674         int             r;
2675
2676         r = bus_dma_tag_create(NULL,    /* parent */
2677                                DBA_ALIGN, 0,    /* alignment, bounds */
2678                                BUS_SPACE_MAXADDR,       /* lowaddr */
2679                                BUS_SPACE_MAXADDR,       /* highaddr */
2680                                NULL, NULL,      /* filter, filterarg */
2681                                size,    /* maxsize */
2682                                1,       /* nsegments */
2683                                size,    /* maxsegsize */
2684                                BUS_DMA_ALLOCNOW,        /* flags */
2685                                &dma->dma_tag);
2686         if (r != 0) {
2687                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2688                        "error %u\n", r);
2689                 goto fail_0;
2690         }
2691         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2692                              BUS_DMA_NOWAIT, &dma->dma_map);
2693         if (r != 0) {
2694                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2695                        "error %u\n", r);
2696                 goto fail_1;
2697         }
2698         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2699                             size,
2700                             ixgbe_dmamap_cb,
2701                             &dma->dma_paddr,
2702                             mapflags | BUS_DMA_NOWAIT);
2703         if (r != 0) {
2704                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2705                        "error %u\n", r);
2706                 goto fail_2;
2707         }
2708         dma->dma_size = size;
2709         return (0);
2710 fail_2:
2711         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2712 fail_1:
2713         bus_dma_tag_destroy(dma->dma_tag);
2714 fail_0:
2715         dma->dma_map = NULL;
2716         dma->dma_tag = NULL;
2717         return (r);
2718 }
2719
2720 static void
2721 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2722 {
2723         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2724             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2725         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2726         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2727         bus_dma_tag_destroy(dma->dma_tag);
2728 }
2729
2730
2731 /*********************************************************************
2732  *
2733  *  Allocate memory for the transmit and receive rings, and then
2734  *  the descriptors associated with each, called only once at attach.
2735  *
2736  **********************************************************************/
2737 static int
2738 ixgbe_allocate_queues(struct adapter *adapter)
2739 {
2740         device_t        dev = adapter->dev;
2741         struct ix_queue *que;
2742         struct tx_ring  *txr;
2743         struct rx_ring  *rxr;
2744         int rsize, tsize, error = IXGBE_SUCCESS;
2745         int txconf = 0, rxconf = 0;
2746
2747         /* First allocate the top level queue structs */
2748         if (!(adapter->queues =
2749             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2750             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2751                 device_printf(dev, "Unable to allocate queue memory\n");
2752                 error = ENOMEM;
2753                 goto fail;
2754         }
2755
2756         /* First allocate the TX ring struct memory */
2757         if (!(adapter->tx_rings =
2758             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2759             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2760                 device_printf(dev, "Unable to allocate TX ring memory\n");
2761                 error = ENOMEM;
2762                 goto tx_fail;
2763         }
2764
2765         /* Next allocate the RX */
2766         if (!(adapter->rx_rings =
2767             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2768             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2769                 device_printf(dev, "Unable to allocate RX ring memory\n");
2770                 error = ENOMEM;
2771                 goto rx_fail;
2772         }
2773
2774         /* For the ring itself */
2775         tsize = roundup2(adapter->num_tx_desc *
2776             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2777
2778         /*
2779          * Now set up the TX queues, txconf is needed to handle the
2780          * possibility that things fail midcourse and we need to
2781          * undo memory gracefully
2782          */ 
2783         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2784                 /* Set up some basics */
2785                 txr = &adapter->tx_rings[i];
2786                 txr->adapter = adapter;
2787                 txr->me = i;
2788
2789                 /* Initialize the TX side lock */
2790                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2791                     device_get_nameunit(dev), txr->me);
2792                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2793
2794                 if (ixgbe_dma_malloc(adapter, tsize,
2795                         &txr->txdma, BUS_DMA_NOWAIT)) {
2796                         device_printf(dev,
2797                             "Unable to allocate TX Descriptor memory\n");
2798                         error = ENOMEM;
2799                         goto err_tx_desc;
2800                 }
2801                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2802                 bzero((void *)txr->tx_base, tsize);
2803
2804                 /* Now allocate transmit buffers for the ring */
2805                 if (ixgbe_allocate_transmit_buffers(txr)) {
2806                         device_printf(dev,
2807                             "Critical Failure setting up transmit buffers\n");
2808                         error = ENOMEM;
2809                         goto err_tx_desc;
2810                 }
2811 #if 0 /* __FreeBSD_version >= 800000 */
2812                 /* Allocate a buf ring */
2813                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2814                     M_WAITOK, &txr->tx_mtx);
2815                 if (txr->br == NULL) {
2816                         device_printf(dev,
2817                             "Critical Failure setting up buf ring\n");
2818                         error = ENOMEM;
2819                         goto err_tx_desc;
2820                 }
2821 #endif
2822         }
2823
2824         /*
2825          * Next the RX queues...
2826          */ 
2827         rsize = roundup2(adapter->num_rx_desc *
2828             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2829         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2830                 rxr = &adapter->rx_rings[i];
2831                 /* Set up some basics */
2832                 rxr->adapter = adapter;
2833                 rxr->me = i;
2834
2835                 /* Initialize the RX side lock */
2836                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2837                     device_get_nameunit(dev), rxr->me);
2838                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2839
2840                 if (ixgbe_dma_malloc(adapter, rsize,
2841                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2842                         device_printf(dev,
2843                             "Unable to allocate RxDescriptor memory\n");
2844                         error = ENOMEM;
2845                         goto err_rx_desc;
2846                 }
2847                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2848                 bzero((void *)rxr->rx_base, rsize);
2849
2850                 /* Allocate receive buffers for the ring*/
2851                 if (ixgbe_allocate_receive_buffers(rxr)) {
2852                         device_printf(dev,
2853                             "Critical Failure setting up receive buffers\n");
2854                         error = ENOMEM;
2855                         goto err_rx_desc;
2856                 }
2857         }
2858
2859         /*
2860         ** Finally set up the queue holding structs
2861         */
2862         for (int i = 0; i < adapter->num_queues; i++) {
2863                 que = &adapter->queues[i];
2864                 que->adapter = adapter;
2865                 que->txr = &adapter->tx_rings[i];
2866                 que->rxr = &adapter->rx_rings[i];
2867         }
2868
2869         return (0);
2870
2871 err_rx_desc:
2872         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2873                 ixgbe_dma_free(adapter, &rxr->rxdma);
2874 err_tx_desc:
2875         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2876                 ixgbe_dma_free(adapter, &txr->txdma);
2877         kfree(adapter->rx_rings, M_DEVBUF);
2878 rx_fail:
2879         kfree(adapter->tx_rings, M_DEVBUF);
2880 tx_fail:
2881         kfree(adapter->queues, M_DEVBUF);
2882 fail:
2883         return (error);
2884 }
2885
2886 /*********************************************************************
2887  *
2888  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2889  *  the information needed to transmit a packet on the wire. This is
2890  *  called only once at attach, setup is done every reset.
2891  *
2892  **********************************************************************/
2893 static int
2894 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2895 {
2896         struct adapter *adapter = txr->adapter;
2897         device_t dev = adapter->dev;
2898         struct ixgbe_tx_buf *txbuf;
2899         int error, i;
2900
2901         /*
2902          * Setup DMA descriptor areas.
2903          */
2904         if ((error = bus_dma_tag_create(
2905                                NULL,    /* parent */
2906                                1, 0,            /* alignment, bounds */
2907                                BUS_SPACE_MAXADDR,       /* lowaddr */
2908                                BUS_SPACE_MAXADDR,       /* highaddr */
2909                                NULL, NULL,              /* filter, filterarg */
2910                                IXGBE_TSO_SIZE,          /* maxsize */
2911                                adapter->num_segs,       /* nsegments */
2912                                PAGE_SIZE,               /* maxsegsize */
2913                                0,                       /* flags */
2914                                &txr->txtag))) {
2915                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2916                 goto fail;
2917         }
2918
2919         if (!(txr->tx_buffers =
2920             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2921             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2922                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2923                 error = ENOMEM;
2924                 goto fail;
2925         }
2926
2927         /* Create the descriptor buffer dma maps */
2928         txbuf = txr->tx_buffers;
2929         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2930                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2931                 if (error != 0) {
2932                         device_printf(dev, "Unable to create TX DMA map\n");
2933                         goto fail;
2934                 }
2935         }
2936
2937         return 0;
2938 fail:
2939         /* We free all, it handles case where we are in the middle */
2940         ixgbe_free_transmit_structures(adapter);
2941         return (error);
2942 }
2943
2944 /*********************************************************************
2945  *
2946  *  Initialize a transmit ring.
2947  *
2948  **********************************************************************/
2949 static void
2950 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2951 {
2952         struct adapter *adapter = txr->adapter;
2953         struct ixgbe_tx_buf *txbuf;
2954         int i;
2955 #ifdef DEV_NETMAP
2956         struct netmap_adapter *na = NA(adapter->ifp);
2957         struct netmap_slot *slot;
2958 #endif /* DEV_NETMAP */
2959
2960         /* Clear the old ring contents */
2961         IXGBE_TX_LOCK(txr);
2962 #ifdef DEV_NETMAP
2963         /*
2964          * (under lock): if in netmap mode, do some consistency
2965          * checks and set slot to entry 0 of the netmap ring.
2966          */
2967         slot = netmap_reset(na, NR_TX, txr->me, 0);
2968 #endif /* DEV_NETMAP */
2969         bzero((void *)txr->tx_base,
2970               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
2971         /* Reset indices */
2972         txr->next_avail_desc = 0;
2973         txr->next_to_clean = 0;
2974
2975         /* Free any existing tx buffers. */
2976         txbuf = txr->tx_buffers;
2977         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2978                 if (txbuf->m_head != NULL) {
2979                         bus_dmamap_sync(txr->txtag, txbuf->map,
2980                             BUS_DMASYNC_POSTWRITE);
2981                         bus_dmamap_unload(txr->txtag, txbuf->map);
2982                         m_freem(txbuf->m_head);
2983                         txbuf->m_head = NULL;
2984                 }
2985 #ifdef DEV_NETMAP
2986                 /*
2987                  * In netmap mode, set the map for the packet buffer.
2988                  * NOTE: Some drivers (not this one) also need to set
2989                  * the physical buffer address in the NIC ring.
2990                  * Slots in the netmap ring (indexed by "si") are
2991                  * kring->nkr_hwofs positions "ahead" wrt the
2992                  * corresponding slot in the NIC ring. In some drivers
2993                  * (not here) nkr_hwofs can be negative. Function
2994                  * netmap_idx_n2k() handles wraparounds properly.
2995                  */
2996                 if (slot) {
2997                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
2998                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
2999                 }
3000 #endif /* DEV_NETMAP */
3001                 /* Clear the EOP index */
3002                 txbuf->eop_index = -1;
3003         }
3004
3005 #ifdef IXGBE_FDIR
3006         /* Set the rate at which we sample packets */
3007         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3008                 txr->atr_sample = atr_sample_rate;
3009 #endif
3010
3011         /* Set number of descriptors available */
3012         txr->tx_avail = adapter->num_tx_desc;
3013
3014         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3015             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3016         IXGBE_TX_UNLOCK(txr);
3017 }
3018
3019 /*********************************************************************
3020  *
3021  *  Initialize all transmit rings.
3022  *
3023  **********************************************************************/
3024 static int
3025 ixgbe_setup_transmit_structures(struct adapter *adapter)
3026 {
3027         struct tx_ring *txr = adapter->tx_rings;
3028
3029         for (int i = 0; i < adapter->num_queues; i++, txr++)
3030                 ixgbe_setup_transmit_ring(txr);
3031
3032         return (0);
3033 }
3034
3035 /*********************************************************************
3036  *
3037  *  Enable transmit unit.
3038  *
3039  **********************************************************************/
3040 static void
3041 ixgbe_initialize_transmit_units(struct adapter *adapter)
3042 {
3043         struct tx_ring  *txr = adapter->tx_rings;
3044         struct ixgbe_hw *hw = &adapter->hw;
3045
3046         /* Setup the Base and Length of the Tx Descriptor Ring */
3047
3048         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3049                 u64     tdba = txr->txdma.dma_paddr;
3050                 u32     txctrl;
3051
3052                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3053                        (tdba & 0x00000000ffffffffULL));
3054                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3055                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3056                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3057
3058                 /* Setup the HW Tx Head and Tail descriptor pointers */
3059                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3060                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3061
3062                 /* Setup Transmit Descriptor Cmd Settings */
3063                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3064                 txr->queue_status = IXGBE_QUEUE_IDLE;
3065
3066                 /* Disable Head Writeback */
3067                 switch (hw->mac.type) {
3068                 case ixgbe_mac_82598EB:
3069                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3070                         break;
3071                 case ixgbe_mac_82599EB:
3072                 case ixgbe_mac_X540:
3073                 default:
3074                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3075                         break;
3076                 }
3077                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3078                 switch (hw->mac.type) {
3079                 case ixgbe_mac_82598EB:
3080                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3081                         break;
3082                 case ixgbe_mac_82599EB:
3083                 case ixgbe_mac_X540:
3084                 default:
3085                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3086                         break;
3087                 }
3088
3089         }
3090
3091         if (hw->mac.type != ixgbe_mac_82598EB) {
3092                 u32 dmatxctl, rttdcs;
3093                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3094                 dmatxctl |= IXGBE_DMATXCTL_TE;
3095                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3096                 /* Disable arbiter to set MTQC */
3097                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3098                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3099                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3100                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3101                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3102                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3103         }
3104
3105         return;
3106 }
3107
3108 /*********************************************************************
3109  *
3110  *  Free all transmit rings.
3111  *
3112  **********************************************************************/
3113 static void
3114 ixgbe_free_transmit_structures(struct adapter *adapter)
3115 {
3116         struct tx_ring *txr = adapter->tx_rings;
3117
3118         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3119                 IXGBE_TX_LOCK(txr);
3120                 ixgbe_free_transmit_buffers(txr);
3121                 ixgbe_dma_free(adapter, &txr->txdma);
3122                 IXGBE_TX_UNLOCK(txr);
3123                 IXGBE_TX_LOCK_DESTROY(txr);
3124         }
3125         kfree(adapter->tx_rings, M_DEVBUF);
3126 }
3127
3128 /*********************************************************************
3129  *
3130  *  Free transmit ring related data structures.
3131  *
3132  **********************************************************************/
3133 static void
3134 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3135 {
3136         struct adapter *adapter = txr->adapter;
3137         struct ixgbe_tx_buf *tx_buffer;
3138         int             i;
3139
3140         INIT_DEBUGOUT("free_transmit_ring: begin");
3141
3142         if (txr->tx_buffers == NULL)
3143                 return;
3144
3145         tx_buffer = txr->tx_buffers;
3146         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3147                 if (tx_buffer->m_head != NULL) {
3148                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3149                             BUS_DMASYNC_POSTWRITE);
3150                         bus_dmamap_unload(txr->txtag,
3151                             tx_buffer->map);
3152                         m_freem(tx_buffer->m_head);
3153                         tx_buffer->m_head = NULL;
3154                         if (tx_buffer->map != NULL) {
3155                                 bus_dmamap_destroy(txr->txtag,
3156                                     tx_buffer->map);
3157                                 tx_buffer->map = NULL;
3158                         }
3159                 } else if (tx_buffer->map != NULL) {
3160                         bus_dmamap_unload(txr->txtag,
3161                             tx_buffer->map);
3162                         bus_dmamap_destroy(txr->txtag,
3163                             tx_buffer->map);
3164                         tx_buffer->map = NULL;
3165                 }
3166         }
3167 #if 0 /* __FreeBSD_version >= 800000 */
3168         if (txr->br != NULL)
3169                 buf_ring_free(txr->br, M_DEVBUF);
3170 #endif
3171         if (txr->tx_buffers != NULL) {
3172                 kfree(txr->tx_buffers, M_DEVBUF);
3173                 txr->tx_buffers = NULL;
3174         }
3175         if (txr->txtag != NULL) {
3176                 bus_dma_tag_destroy(txr->txtag);
3177                 txr->txtag = NULL;
3178         }
3179         return;
3180 }
3181
3182 /*********************************************************************
3183  *
3184  *  Advanced Context Descriptor setup for VLAN or CSUM
3185  *
3186  **********************************************************************/
3187
3188 static bool
3189 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3190 {
3191         struct adapter *adapter = txr->adapter;
3192         struct ixgbe_adv_tx_context_desc *TXD;
3193         struct ixgbe_tx_buf        *tx_buffer;
3194         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3195         struct ether_vlan_header *eh;
3196         struct ip *ip;
3197         struct ip6_hdr *ip6;
3198         int  ehdrlen, ip_hlen = 0;
3199         u16     etype;
3200         u8      ipproto = 0;
3201         bool    offload = TRUE;
3202         int ctxd = txr->next_avail_desc;
3203         u16 vtag = 0;
3204
3205
3206         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3207                 offload = FALSE;
3208
3209         tx_buffer = &txr->tx_buffers[ctxd];
3210         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3211
3212         /*
3213         ** In advanced descriptors the vlan tag must 
3214         ** be placed into the descriptor itself.
3215         */
3216         if (mp->m_flags & M_VLANTAG) {
3217                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3218                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3219         } else if (offload == FALSE)
3220                 return FALSE;
3221
3222         /*
3223          * Determine where frame payload starts.
3224          * Jump over vlan headers if already present,
3225          * helpful for QinQ too.
3226          */
3227         eh = mtod(mp, struct ether_vlan_header *);
3228         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3229                 etype = ntohs(eh->evl_proto);
3230                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231         } else {
3232                 etype = ntohs(eh->evl_encap_proto);
3233                 ehdrlen = ETHER_HDR_LEN;
3234         }
3235
3236         /* Set the ether header length */
3237         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3238
3239         switch (etype) {
3240                 case ETHERTYPE_IP:
3241                         ip = (struct ip *)(mp->m_data + ehdrlen);
3242                         ip_hlen = ip->ip_hl << 2;
3243                         ipproto = ip->ip_p;
3244                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3245                         break;
3246                 case ETHERTYPE_IPV6:
3247                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3248                         ip_hlen = sizeof(struct ip6_hdr);
3249                         /* XXX-BZ this will go badly in case of ext hdrs. */
3250                         ipproto = ip6->ip6_nxt;
3251                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3252                         break;
3253                 default:
3254                         offload = FALSE;
3255                         break;
3256         }
3257
3258         vlan_macip_lens |= ip_hlen;
3259         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3260
3261         switch (ipproto) {
3262                 case IPPROTO_TCP:
3263                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3264                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3265                         break;
3266
3267                 case IPPROTO_UDP:
3268                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3269                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3270                         break;
3271
3272 #if 0
3273                 case IPPROTO_SCTP:
3274                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3275                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3276                         break;
3277 #endif
3278                 default:
3279                         offload = FALSE;
3280                         break;
3281         }
3282
3283         /* Now copy bits into descriptor */
3284         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3285         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3286         TXD->seqnum_seed = htole32(0);
3287         TXD->mss_l4len_idx = htole32(0);
3288
3289         tx_buffer->m_head = NULL;
3290         tx_buffer->eop_index = -1;
3291
3292         /* We've consumed the first desc, adjust counters */
3293         if (++ctxd == adapter->num_tx_desc)
3294                 ctxd = 0;
3295         txr->next_avail_desc = ctxd;
3296         --txr->tx_avail;
3297
3298         return (offload);
3299 }
3300
3301 /**********************************************************************
3302  *
3303  *  Setup work for hardware segmentation offload (TSO) on
3304  *  adapters using advanced tx descriptors
3305  *
3306  **********************************************************************/
3307 static bool
3308 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3309     u32 *olinfo_status)
3310 {
3311         struct adapter *adapter = txr->adapter;
3312         struct ixgbe_adv_tx_context_desc *TXD;
3313         struct ixgbe_tx_buf        *tx_buffer;
3314         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3315         u16 vtag = 0, eh_type;
3316         u32 mss_l4len_idx = 0, len;
3317         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3318         struct ether_vlan_header *eh;
3319 #if 0 /* IPv6 TSO */
3320 #ifdef INET6
3321         struct ip6_hdr *ip6;
3322 #endif
3323 #endif
3324 #ifdef INET
3325         struct ip *ip;
3326 #endif
3327         struct tcphdr *th;
3328
3329
3330         /*
3331          * Determine where frame payload starts.
3332          * Jump over vlan headers if already present
3333          */
3334         eh = mtod(mp, struct ether_vlan_header *);
3335         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3336                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3337                 eh_type = eh->evl_proto;
3338         } else {
3339                 ehdrlen = ETHER_HDR_LEN;
3340                 eh_type = eh->evl_encap_proto;
3341         }
3342
3343         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3344         len = ehdrlen + sizeof(struct tcphdr);
3345         switch (ntohs(eh_type)) {
3346 #if 0 /* IPv6 TSO */
3347 #ifdef INET6
3348         case ETHERTYPE_IPV6:
3349                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3350                         return FALSE;
3351                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3352                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3353                 if (ip6->ip6_nxt != IPPROTO_TCP)
3354                         return FALSE;
3355                 ip_hlen = sizeof(struct ip6_hdr);
3356                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3357                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3358                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3359                 break;
3360 #endif
3361 #endif
3362 #ifdef INET
3363         case ETHERTYPE_IP:
3364                 if (mp->m_len < len + sizeof(struct ip))
3365                         return FALSE;
3366                 ip = (struct ip *)(mp->m_data + ehdrlen);
3367                 if (ip->ip_p != IPPROTO_TCP)
3368                         return FALSE;
3369                 ip->ip_sum = 0;
3370                 ip_hlen = ip->ip_hl << 2;
3371                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3372                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3373                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3374                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3375                 /* Tell transmit desc to also do IPv4 checksum. */
3376                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3377                 break;
3378 #endif
3379         default:
3380                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3381                     __func__, ntohs(eh_type));
3382                 break;
3383         }
3384
3385         ctxd = txr->next_avail_desc;
3386         tx_buffer = &txr->tx_buffers[ctxd];
3387         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3388
3389         tcp_hlen = th->th_off << 2;
3390
3391         /* This is used in the transmit desc in encap */
3392         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3393
3394         /* VLAN MACLEN IPLEN */
3395         if (mp->m_flags & M_VLANTAG) {
3396                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3397                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3398         }
3399
3400         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3401         vlan_macip_lens |= ip_hlen;
3402         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3403
3404         /* ADV DTYPE TUCMD */
3405         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3406         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3407         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3408
3409         /* MSS L4LEN IDX */
3410         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3411         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3412         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3413
3414         TXD->seqnum_seed = htole32(0);
3415         tx_buffer->m_head = NULL;
3416         tx_buffer->eop_index = -1;
3417
3418         if (++ctxd == adapter->num_tx_desc)
3419                 ctxd = 0;
3420
3421         txr->tx_avail--;
3422         txr->next_avail_desc = ctxd;
3423         return TRUE;
3424 }
3425
3426 #ifdef IXGBE_FDIR
3427 /*
3428 ** This routine parses packet headers so that Flow
3429 ** Director can make a hashed filter table entry 
3430 ** allowing traffic flows to be identified and kept
3431 ** on the same cpu.  This would be a performance
3432 ** hit, but we only do it at IXGBE_FDIR_RATE of
3433 ** packets.
3434 */
3435 static void
3436 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3437 {
3438         struct adapter                  *adapter = txr->adapter;
3439         struct ix_queue                 *que;
3440         struct ip                       *ip;
3441         struct tcphdr                   *th;
3442         struct udphdr                   *uh;
3443         struct ether_vlan_header        *eh;
3444         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3445         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3446         int                             ehdrlen, ip_hlen;
3447         u16                             etype;
3448
3449         eh = mtod(mp, struct ether_vlan_header *);
3450         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3451                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3452                 etype = eh->evl_proto;
3453         } else {
3454                 ehdrlen = ETHER_HDR_LEN;
3455                 etype = eh->evl_encap_proto;
3456         }
3457
3458         /* Only handling IPv4 */
3459         if (etype != htons(ETHERTYPE_IP))
3460                 return;
3461
3462         ip = (struct ip *)(mp->m_data + ehdrlen);
3463         ip_hlen = ip->ip_hl << 2;
3464
3465         /* check if we're UDP or TCP */
3466         switch (ip->ip_p) {
3467         case IPPROTO_TCP:
3468                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3469                 /* src and dst are inverted */
3470                 common.port.dst ^= th->th_sport;
3471                 common.port.src ^= th->th_dport;
3472                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3473                 break;
3474         case IPPROTO_UDP:
3475                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3476                 /* src and dst are inverted */
3477                 common.port.dst ^= uh->uh_sport;
3478                 common.port.src ^= uh->uh_dport;
3479                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3480                 break;
3481         default:
3482                 return;
3483         }
3484
3485         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3486         if (mp->m_pkthdr.ether_vtag)
3487                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3488         else
3489                 common.flex_bytes ^= etype;
3490         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3491
3492         que = &adapter->queues[txr->me];
3493         /*
3494         ** This assumes the Rx queue and Tx
3495         ** queue are bound to the same CPU
3496         */
3497         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3498             input, common, que->msix);
3499 }
3500 #endif /* IXGBE_FDIR */
3501
3502 /**********************************************************************
3503  *
3504  *  Examine each tx_buffer in the used queue. If the hardware is done
3505  *  processing the packet then free associated resources. The
3506  *  tx_buffer is put back on the free queue.
3507  *
3508  **********************************************************************/
3509 static bool
3510 ixgbe_txeof(struct tx_ring *txr)
3511 {
3512         struct adapter  *adapter = txr->adapter;
3513         struct ifnet    *ifp = adapter->ifp;
3514         u32     first, last, done, processed;
3515         struct ixgbe_tx_buf *tx_buffer;
3516         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3517
3518         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3519
3520 #ifdef DEV_NETMAP
3521         if (ifp->if_capenable & IFCAP_NETMAP) {
3522                 struct netmap_adapter *na = NA(ifp);
3523                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3524
3525                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3526
3527                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3528                     BUS_DMASYNC_POSTREAD);
3529                 /*
3530                  * In netmap mode, all the work is done in the context
3531                  * of the client thread. Interrupt handlers only wake up
3532                  * clients, which may be sleeping on individual rings
3533                  * or on a global resource for all rings.
3534                  * To implement tx interrupt mitigation, we wake up the client
3535                  * thread roughly every half ring, even if the NIC interrupts
3536                  * more frequently. This is implemented as follows:
3537                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3538                  *   the slot that should wake up the thread (nkr_num_slots
3539                  *   means the user thread should not be woken up);
3540                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3541                  *   or the slot has the DD bit set.
3542                  *
3543                  * When the driver has separate locks, we need to
3544                  * release and re-acquire txlock to avoid deadlocks.
3545                  * XXX see if we can find a better way.
3546                  */
3547                 if (!netmap_mitigate ||
3548                     (kring->nr_kflags < kring->nkr_num_slots &&
3549                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3550                         kring->nr_kflags = kring->nkr_num_slots;
3551                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3552                         IXGBE_TX_UNLOCK(txr);
3553                         IXGBE_CORE_LOCK(adapter);
3554                         selwakeuppri(&na->tx_si, PI_NET);
3555                         IXGBE_CORE_UNLOCK(adapter);
3556                         IXGBE_TX_LOCK(txr);
3557                 }
3558                 return FALSE;
3559         }
3560 #endif /* DEV_NETMAP */
3561
3562         if (txr->tx_avail == adapter->num_tx_desc) {
3563                 txr->queue_status = IXGBE_QUEUE_IDLE;
3564                 return FALSE;
3565         }
3566
3567         processed = 0;
3568         first = txr->next_to_clean;
3569         tx_buffer = &txr->tx_buffers[first];
3570         /* For cleanup we just use legacy struct */
3571         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3572         last = tx_buffer->eop_index;
3573         if (last == -1)
3574                 return FALSE;
3575         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3576
3577         /*
3578         ** Get the index of the first descriptor
3579         ** BEYOND the EOP and call that 'done'.
3580         ** I do this so the comparison in the
3581         ** inner while loop below can be simple
3582         */
3583         if (++last == adapter->num_tx_desc) last = 0;
3584         done = last;
3585
3586         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3587             BUS_DMASYNC_POSTREAD);
3588         /*
3589         ** Only the EOP descriptor of a packet now has the DD
3590         ** bit set, this is what we look for...
3591         */
3592         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3593                 /* We clean the range of the packet */
3594                 while (first != done) {
3595                         tx_desc->upper.data = 0;
3596                         tx_desc->lower.data = 0;
3597                         tx_desc->buffer_addr = 0;
3598                         ++txr->tx_avail;
3599                         ++processed;
3600
3601                         if (tx_buffer->m_head) {
3602                                 txr->bytes +=
3603                                     tx_buffer->m_head->m_pkthdr.len;
3604                                 bus_dmamap_sync(txr->txtag,
3605                                     tx_buffer->map,
3606                                     BUS_DMASYNC_POSTWRITE);
3607                                 bus_dmamap_unload(txr->txtag,
3608                                     tx_buffer->map);
3609                                 m_freem(tx_buffer->m_head);
3610                                 tx_buffer->m_head = NULL;
3611                                 tx_buffer->map = NULL;
3612                         }
3613                         tx_buffer->eop_index = -1;
3614                         txr->watchdog_time = ticks;
3615
3616                         if (++first == adapter->num_tx_desc)
3617                                 first = 0;
3618
3619                         tx_buffer = &txr->tx_buffers[first];
3620                         tx_desc =
3621                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3622                 }
3623                 ++txr->packets;
3624                 ++ifp->if_opackets;
3625                 /* See if there is more work now */
3626                 last = tx_buffer->eop_index;
3627                 if (last != -1) {
3628                         eop_desc =
3629                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3630                         /* Get next done point */
3631                         if (++last == adapter->num_tx_desc) last = 0;
3632                         done = last;
3633                 } else
3634                         break;
3635         }
3636         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3637             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3638
3639         txr->next_to_clean = first;
3640
3641         /*
3642         ** Watchdog calculation, we know there's
3643         ** work outstanding or the first return
3644         ** would have been taken, so none processed
3645         ** for too long indicates a hang.
3646         */
3647         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3648                 txr->queue_status = IXGBE_QUEUE_HUNG;
3649
3650         /* With a minimum free clear the depleted state bit.  */
3651         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3652                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3653
3654         if (txr->tx_avail == adapter->num_tx_desc) {
3655                 txr->queue_status = IXGBE_QUEUE_IDLE;
3656                 return (FALSE);
3657         }
3658
3659         return TRUE;
3660 }
3661
3662 /*********************************************************************
3663  *
3664  *  Refresh mbuf buffers for RX descriptor rings
3665  *   - now keeps its own state so discards due to resource
3666  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3667  *     it just returns, keeping its placeholder, thus it can simply
3668  *     be recalled to try again.
3669  *
3670  **********************************************************************/
3671 static void
3672 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3673 {
3674         struct adapter          *adapter = rxr->adapter;
3675         bus_dma_segment_t       hseg[1];
3676         bus_dma_segment_t       pseg[1];
3677         struct ixgbe_rx_buf     *rxbuf;
3678         struct mbuf             *mh, *mp;
3679         int                     i, j, nsegs, error;
3680         bool                    refreshed = FALSE;
3681
3682         i = j = rxr->next_to_refresh;
3683         /* Control the loop with one beyond */
3684         if (++j == adapter->num_rx_desc)
3685                 j = 0;
3686
3687         while (j != limit) {
3688                 rxbuf = &rxr->rx_buffers[i];
3689                 if (rxr->hdr_split == FALSE)
3690                         goto no_split;
3691
3692                 if (rxbuf->m_head == NULL) {
3693                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3694                         if (mh == NULL)
3695                                 goto update;
3696                 } else
3697                         mh = rxbuf->m_head;
3698
3699                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3700                 mh->m_len = MHLEN;
3701                 mh->m_flags |= M_PKTHDR;
3702                 /* Get the memory mapping */
3703                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3704                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3705                 if (error != 0) {
3706                         kprintf("Refresh mbufs: hdr dmamap load"
3707                             " failure - %d\n", error);
3708                         m_free(mh);
3709                         rxbuf->m_head = NULL;
3710                         goto update;
3711                 }
3712                 rxbuf->m_head = mh;
3713                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3714                     BUS_DMASYNC_PREREAD);
3715                 rxr->rx_base[i].read.hdr_addr =
3716                     htole64(hseg[0].ds_addr);
3717
3718 no_split:
3719                 if (rxbuf->m_pack == NULL) {
3720                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3721                             M_PKTHDR, adapter->rx_mbuf_sz);
3722                         if (mp == NULL)
3723                                 goto update;
3724                 } else
3725                         mp = rxbuf->m_pack;
3726
3727                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3728                 /* Get the memory mapping */
3729                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3730                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3731                 if (error != 0) {
3732                         kprintf("Refresh mbufs: payload dmamap load"
3733                             " failure - %d\n", error);
3734                         m_free(mp);
3735                         rxbuf->m_pack = NULL;
3736                         goto update;
3737                 }
3738                 rxbuf->m_pack = mp;
3739                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3740                     BUS_DMASYNC_PREREAD);
3741                 rxr->rx_base[i].read.pkt_addr =
3742                     htole64(pseg[0].ds_addr);
3743
3744                 refreshed = TRUE;
3745                 /* Next is precalculated */
3746                 i = j;
3747                 rxr->next_to_refresh = i;
3748                 if (++j == adapter->num_rx_desc)
3749                         j = 0;
3750         }
3751 update:
3752         if (refreshed) /* Update hardware tail index */
3753                 IXGBE_WRITE_REG(&adapter->hw,
3754                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3755         return;
3756 }
3757
3758 /*********************************************************************
3759  *
3760  *  Allocate memory for rx_buffer structures. Since we use one
3761  *  rx_buffer per received packet, the maximum number of rx_buffer's
3762  *  that we'll need is equal to the number of receive descriptors
3763  *  that we've allocated.
3764  *
3765  **********************************************************************/
3766 static int
3767 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3768 {
3769         struct  adapter         *adapter = rxr->adapter;
3770         device_t                dev = adapter->dev;
3771         struct ixgbe_rx_buf     *rxbuf;
3772         int                     i, bsize, error;
3773
3774         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3775         if (!(rxr->rx_buffers =
3776             (struct ixgbe_rx_buf *) kmalloc(bsize,
3777             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3778                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3779                 error = ENOMEM;
3780                 goto fail;
3781         }
3782
3783         if ((error = bus_dma_tag_create(NULL,   /* parent */
3784                                    1, 0,        /* alignment, bounds */
3785                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3786                                    BUS_SPACE_MAXADDR,   /* highaddr */
3787                                    NULL, NULL,          /* filter, filterarg */
3788                                    MSIZE,               /* maxsize */
3789                                    1,                   /* nsegments */
3790                                    MSIZE,               /* maxsegsize */
3791                                    0,                   /* flags */
3792                                    &rxr->htag))) {
3793                 device_printf(dev, "Unable to create RX DMA tag\n");
3794                 goto fail;
3795         }
3796
3797         if ((error = bus_dma_tag_create(NULL,   /* parent */
3798                                    1, 0,        /* alignment, bounds */
3799                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3800                                    BUS_SPACE_MAXADDR,   /* highaddr */
3801                                    NULL, NULL,          /* filter, filterarg */
3802                                    MJUM16BYTES,         /* maxsize */
3803                                    1,                   /* nsegments */
3804                                    MJUM16BYTES,         /* maxsegsize */
3805                                    0,                   /* flags */
3806                                    &rxr->ptag))) {
3807                 device_printf(dev, "Unable to create RX DMA tag\n");
3808                 goto fail;
3809         }
3810
3811         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3812                 rxbuf = &rxr->rx_buffers[i];
3813                 error = bus_dmamap_create(rxr->htag,
3814                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3815                 if (error) {
3816                         device_printf(dev, "Unable to create RX head map\n");
3817                         goto fail;
3818                 }
3819                 error = bus_dmamap_create(rxr->ptag,
3820                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3821                 if (error) {
3822                         device_printf(dev, "Unable to create RX pkt map\n");
3823                         goto fail;
3824                 }
3825         }
3826
3827         return (0);
3828
3829 fail:
3830         /* Frees all, but can handle partial completion */
3831         ixgbe_free_receive_structures(adapter);
3832         return (error);
3833 }
3834
3835 /*
3836 ** Used to detect a descriptor that has
3837 ** been merged by Hardware RSC.
3838 */
3839 static inline u32
3840 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3841 {
3842         return (le32toh(rx->wb.lower.lo_dword.data) &
3843             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3844 }
3845
3846 /*********************************************************************
3847  *
3848  *  Initialize Hardware RSC (LRO) feature on 82599
3849  *  for an RX ring, this is toggled by the LRO capability
3850  *  even though it is transparent to the stack.
3851  *
3852  **********************************************************************/
3853 #if 0   /* NET_LRO */
3854 static void
3855 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3856 {
3857         struct  adapter         *adapter = rxr->adapter;
3858         struct  ixgbe_hw        *hw = &adapter->hw;
3859         u32                     rscctrl, rdrxctl;
3860
3861         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3862         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3863 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3864         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3865 #endif /* DEV_NETMAP */
3866         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3867         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3868         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3869
3870         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3871         rscctrl |= IXGBE_RSCCTL_RSCEN;
3872         /*
3873         ** Limit the total number of descriptors that
3874         ** can be combined, so it does not exceed 64K
3875         */
3876         if (adapter->rx_mbuf_sz == MCLBYTES)
3877                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3878         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3879                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3880         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3881                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3882         else  /* Using 16K cluster */
3883                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3884
3885         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3886
3887         /* Enable TCP header recognition */
3888         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3889             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3890             IXGBE_PSRTYPE_TCPHDR));
3891
3892         /* Disable RSC for ACK packets */
3893         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3894             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3895
3896         rxr->hw_rsc = TRUE;
3897 }
3898 #endif
3899
3900 static void     
3901 ixgbe_free_receive_ring(struct rx_ring *rxr)
3902
3903         struct  adapter         *adapter;
3904         struct ixgbe_rx_buf       *rxbuf;
3905         int i;
3906
3907         adapter = rxr->adapter;
3908         for (i = 0; i < adapter->num_rx_desc; i++) {
3909                 rxbuf = &rxr->rx_buffers[i];
3910                 if (rxbuf->m_head != NULL) {
3911                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3912                             BUS_DMASYNC_POSTREAD);
3913                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3914                         rxbuf->m_head->m_flags |= M_PKTHDR;
3915                         m_freem(rxbuf->m_head);
3916                 }
3917                 if (rxbuf->m_pack != NULL) {
3918                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3919                             BUS_DMASYNC_POSTREAD);
3920                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3921                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3922                         m_freem(rxbuf->m_pack);
3923                 }
3924                 rxbuf->m_head = NULL;
3925                 rxbuf->m_pack = NULL;
3926         }
3927 }
3928
3929
3930 /*********************************************************************
3931  *
3932  *  Initialize a receive ring and its buffers.
3933  *
3934  **********************************************************************/
3935 static int
3936 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3937 {
3938         struct  adapter         *adapter;
3939         struct ifnet            *ifp;
3940         device_t                dev;
3941         struct ixgbe_rx_buf     *rxbuf;
3942         bus_dma_segment_t       pseg[1], hseg[1];
3943 #if 0   /* NET_LRO */
3944         struct lro_ctrl         *lro = &rxr->lro;
3945 #endif
3946         int                     rsize, nsegs, error = 0;
3947 #ifdef DEV_NETMAP
3948         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3949         struct netmap_slot *slot;
3950 #endif /* DEV_NETMAP */
3951
3952         adapter = rxr->adapter;
3953         ifp = adapter->ifp;
3954         dev = adapter->dev;
3955
3956         /* Clear the ring contents */
3957         IXGBE_RX_LOCK(rxr);
3958 #ifdef DEV_NETMAP
3959         /* same as in ixgbe_setup_transmit_ring() */
3960         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3961 #endif /* DEV_NETMAP */
3962         rsize = roundup2(adapter->num_rx_desc *
3963             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3964         bzero((void *)rxr->rx_base, rsize);
3965
3966         /* Free current RX buffer structs and their mbufs */
3967         ixgbe_free_receive_ring(rxr);
3968
3969         /* Configure header split? */
3970         if (ixgbe_header_split)
3971                 rxr->hdr_split = TRUE;
3972
3973         /* Now replenish the mbufs */
3974         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3975                 struct mbuf     *mh, *mp;
3976
3977                 rxbuf = &rxr->rx_buffers[j];
3978 #ifdef DEV_NETMAP
3979                 /*
3980                  * In netmap mode, fill the map and set the buffer
3981                  * address in the NIC ring, considering the offset
3982                  * between the netmap and NIC rings (see comment in
3983                  * ixgbe_setup_transmit_ring() ). No need to allocate
3984                  * an mbuf, so end the block with a continue;
3985                  */
3986                 if (slot) {
3987                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3988                         uint64_t paddr;
3989                         void *addr;
3990
3991                         addr = PNMB(slot + sj, &paddr);
3992                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
3993                         /* Update descriptor */
3994                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
3995                         continue;
3996                 }
3997 #endif /* DEV_NETMAP */
3998                 /*
3999                 ** Don't allocate mbufs if not
4000                 ** doing header split, its wasteful
4001                 */ 
4002                 if (rxr->hdr_split == FALSE)
4003                         goto skip_head;
4004
4005                 /* First the header */
4006                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4007                 if (rxbuf->m_head == NULL) {
4008                         error = ENOBUFS;
4009                         goto fail;
4010                 }
4011                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4012                 mh = rxbuf->m_head;
4013                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4014                 mh->m_flags |= M_PKTHDR;
4015                 /* Get the memory mapping */
4016                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
4017                     rxbuf->hmap, rxbuf->m_head, hseg, 1,
4018                     &nsegs, BUS_DMA_NOWAIT);
4019
4020                 if (error != 0) /* Nothing elegant to do here */
4021                         goto fail;
4022                 bus_dmamap_sync(rxr->htag,
4023                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
4024                 /* Update descriptor */
4025                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4026
4027 skip_head:
4028                 /* Now the payload cluster */
4029                 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4030                     M_PKTHDR, adapter->rx_mbuf_sz);
4031                 if (rxbuf->m_pack == NULL) {
4032                         error = ENOBUFS;
4033                         goto fail;
4034                 }
4035                 mp = rxbuf->m_pack;
4036                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4037                 /* Get the memory mapping */
4038                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
4039                     rxbuf->pmap, mp, hseg, 1,
4040                     &nsegs, BUS_DMA_NOWAIT);
4041                 if (error != 0)
4042                         goto fail;
4043                 bus_dmamap_sync(rxr->ptag,
4044                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
4045                 /* Update descriptor */
4046                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4047         }
4048
4049
4050         /* Setup our descriptor indices */
4051         rxr->next_to_check = 0;
4052         rxr->next_to_refresh = 0;
4053         rxr->lro_enabled = FALSE;
4054         rxr->rx_split_packets = 0;
4055         rxr->rx_bytes = 0;
4056         rxr->discard = FALSE;
4057         rxr->vtag_strip = FALSE;
4058
4059         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4060             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4061
4062         /*
4063         ** Now set up the LRO interface:
4064         ** 82598 uses software LRO, the
4065         ** 82599 and X540 use a hardware assist.
4066         */
4067 #if 0 /* NET_LRO */
4068         if ((adapter->hw.mac.type != ixgbe_mac_82598EB) &&
4069             (ifp->if_capenable & IFCAP_RXCSUM) &&
4070             (ifp->if_capenable & IFCAP_LRO))
4071                 ixgbe_setup_hw_rsc(rxr);
4072         else if (ifp->if_capenable & IFCAP_LRO) {
4073                 int err = tcp_lro_init(lro);
4074                 if (err) {
4075                         device_printf(dev, "LRO Initialization failed!\n");
4076                         goto fail;
4077                 }
4078                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4079                 rxr->lro_enabled = TRUE;
4080                 lro->ifp = adapter->ifp;
4081         }
4082 #endif
4083
4084         IXGBE_RX_UNLOCK(rxr);
4085         return (0);
4086
4087 fail:
4088         ixgbe_free_receive_ring(rxr);
4089         IXGBE_RX_UNLOCK(rxr);
4090         return (error);
4091 }
4092
4093 /*********************************************************************
4094  *
4095  *  Initialize all receive rings.
4096  *
4097  **********************************************************************/
4098 static int
4099 ixgbe_setup_receive_structures(struct adapter *adapter)
4100 {
4101         struct rx_ring *rxr = adapter->rx_rings;
4102         int j;
4103
4104         for (j = 0; j < adapter->num_queues; j++, rxr++)
4105                 if (ixgbe_setup_receive_ring(rxr))
4106                         goto fail;
4107
4108         return (0);
4109 fail:
4110         /*
4111          * Free RX buffers allocated so far, we will only handle
4112          * the rings that completed, the failing case will have
4113          * cleaned up for itself. 'j' failed, so its the terminus.
4114          */
4115         for (int i = 0; i < j; ++i) {
4116                 rxr = &adapter->rx_rings[i];
4117                 ixgbe_free_receive_ring(rxr);
4118         }
4119
4120         return (ENOBUFS);
4121 }
4122
4123 /*********************************************************************
4124  *
4125  *  Setup receive registers and features.
4126  *
4127  **********************************************************************/
4128 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4129
4130 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4131         
4132 static void
4133 ixgbe_initialize_receive_units(struct adapter *adapter)
4134 {
4135         struct  rx_ring *rxr = adapter->rx_rings;
4136         struct ixgbe_hw *hw = &adapter->hw;
4137         struct ifnet   *ifp = adapter->ifp;
4138         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
4139         u32             reta, mrqc = 0, hlreg, random[10];
4140
4141
4142         /*
4143          * Make sure receives are disabled while
4144          * setting up the descriptor ring
4145          */
4146         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4147         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4148             rxctrl & ~IXGBE_RXCTRL_RXEN);
4149
4150         /* Enable broadcasts */
4151         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4152         fctrl |= IXGBE_FCTRL_BAM;
4153         fctrl |= IXGBE_FCTRL_DPF;
4154         fctrl |= IXGBE_FCTRL_PMCF;
4155         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4156
4157         /* Set for Jumbo Frames? */
4158         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4159         if (ifp->if_mtu > ETHERMTU)
4160                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4161         else
4162                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4163 #ifdef DEV_NETMAP
4164         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4165         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4166                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4167         else
4168                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4169 #endif /* DEV_NETMAP */
4170         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4171
4172         bufsz = (adapter->rx_mbuf_sz +
4173             BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4174
4175         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4176                 u64 rdba = rxr->rxdma.dma_paddr;
4177
4178                 /* Setup the Base and Length of the Rx Descriptor Ring */
4179                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4180                                (rdba & 0x00000000ffffffffULL));
4181                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4182                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4183                     adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4184
4185                 /* Set up the SRRCTL register */
4186                 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4187                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4188                 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4189                 srrctl |= bufsz;
4190                 if (rxr->hdr_split) {
4191                         /* Use a standard mbuf for the header */
4192                         srrctl |= ((IXGBE_RX_HDR <<
4193                             IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT)
4194                             & IXGBE_SRRCTL_BSIZEHDR_MASK);
4195                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4196                 } else
4197                         srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4198                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4199
4200                 /* Setup the HW Rx Head and Tail Descriptor Pointers */
4201                 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4202                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4203         }
4204
4205         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4206                 u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4207                               IXGBE_PSRTYPE_UDPHDR |
4208                               IXGBE_PSRTYPE_IPV4HDR |
4209                               IXGBE_PSRTYPE_IPV6HDR;
4210                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4211         }
4212
4213         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4214
4215         /* Setup RSS */
4216         if (adapter->num_queues > 1) {
4217                 int i, j;
4218                 reta = 0;
4219
4220                 /* set up random bits */
4221                 karc4rand(&random, sizeof(random));
4222
4223                 /* Set up the redirection table */
4224                 for (i = 0, j = 0; i < 128; i++, j++) {
4225                         if (j == adapter->num_queues) j = 0;
4226                         reta = (reta << 8) | (j * 0x11);
4227                         if ((i & 3) == 3)
4228                                 IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4229                 }
4230
4231                 /* Now fill our hash function seeds */
4232                 for (int i = 0; i < 10; i++)
4233                         IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4234
4235                 /* Perform hash on these packet types */
4236                 mrqc = IXGBE_MRQC_RSSEN
4237                      | IXGBE_MRQC_RSS_FIELD_IPV4
4238                      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4239                      | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4240                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4241                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4242                      | IXGBE_MRQC_RSS_FIELD_IPV6
4243                      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4244                      | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4245                      | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4246                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4247
4248                 /* RSS and RX IPP Checksum are mutually exclusive */
4249                 rxcsum |= IXGBE_RXCSUM_PCSD;
4250         }
4251
4252         if (ifp->if_capenable & IFCAP_RXCSUM)
4253                 rxcsum |= IXGBE_RXCSUM_PCSD;
4254
4255         if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4256                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4257
4258         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4259
4260         return;
4261 }
4262
4263 /*********************************************************************
4264  *
4265  *  Free all receive rings.
4266  *
4267  **********************************************************************/
4268 static void
4269 ixgbe_free_receive_structures(struct adapter *adapter)
4270 {
4271         struct rx_ring *rxr = adapter->rx_rings;
4272
4273         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4274 #if 0   /* NET_LRO */
4275                 struct lro_ctrl         *lro = &rxr->lro;
4276 #endif
4277                 ixgbe_free_receive_buffers(rxr);
4278                 /* Free LRO memory */
4279 #if 0   /* NET_LRO */
4280                 tcp_lro_free(lro);
4281 #endif
4282                 /* Free the ring memory as well */
4283                 ixgbe_dma_free(adapter, &rxr->rxdma);
4284         }
4285
4286         kfree(adapter->rx_rings, M_DEVBUF);
4287 }
4288
4289
4290 /*********************************************************************
4291  *
4292  *  Free receive ring data structures
4293  *
4294  **********************************************************************/
4295 static void
4296 ixgbe_free_receive_buffers(struct rx_ring *rxr)
4297 {
4298         struct adapter          *adapter = rxr->adapter;
4299         struct ixgbe_rx_buf     *rxbuf;
4300
4301         INIT_DEBUGOUT("free_receive_structures: begin");
4302
4303         /* Cleanup any existing buffers */
4304         if (rxr->rx_buffers != NULL) {
4305                 for (int i = 0; i < adapter->num_rx_desc; i++) {
4306                         rxbuf = &rxr->rx_buffers[i];
4307                         if (rxbuf->m_head != NULL) {
4308                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4309                                     BUS_DMASYNC_POSTREAD);
4310                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4311                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4312                                 m_freem(rxbuf->m_head);
4313                         }
4314                         if (rxbuf->m_pack != NULL) {
4315                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4316                                     BUS_DMASYNC_POSTREAD);
4317                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4318                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4319                                 m_freem(rxbuf->m_pack);
4320                         }
4321                         rxbuf->m_head = NULL;
4322                         rxbuf->m_pack = NULL;
4323                         if (rxbuf->hmap != NULL) {
4324                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4325                                 rxbuf->hmap = NULL;
4326                         }
4327                         if (rxbuf->pmap != NULL) {
4328                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4329                                 rxbuf->pmap = NULL;
4330                         }
4331                 }
4332                 if (rxr->rx_buffers != NULL) {
4333                         kfree(rxr->rx_buffers, M_DEVBUF);
4334                         rxr->rx_buffers = NULL;
4335                 }
4336         }
4337
4338         if (rxr->htag != NULL) {
4339                 bus_dma_tag_destroy(rxr->htag);
4340                 rxr->htag = NULL;
4341         }
4342         if (rxr->ptag != NULL) {
4343                 bus_dma_tag_destroy(rxr->ptag);
4344                 rxr->ptag = NULL;
4345         }
4346
4347         return;
4348 }
4349
4350 static __inline void
4351 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4352 {
4353                  
4354         /*
4355          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4356          * should be computed by hardware. Also it should not have VLAN tag in
4357          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4358          */
4359 #if 0   /* NET_LRO */
4360         if (rxr->lro_enabled &&
4361             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4362             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4363             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4364             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4365             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4366             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4367             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4368             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4369                 /*
4370                  * Send to the stack if:
4371                  **  - LRO not enabled, or
4372                  **  - no LRO resources, or
4373                  **  - lro enqueue fails
4374                  */
4375                 if (rxr->lro.lro_cnt != 0)
4376                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4377                                 return;
4378         }
4379 #endif
4380         IXGBE_RX_UNLOCK(rxr);
4381         (*ifp->if_input)(ifp, m);
4382         IXGBE_RX_LOCK(rxr);
4383 }
4384
4385 static __inline void
4386 ixgbe_rx_discard(struct rx_ring *rxr, int i)
4387 {
4388         struct ixgbe_rx_buf     *rbuf;
4389
4390         rbuf = &rxr->rx_buffers[i];
4391
4392         if (rbuf->fmp != NULL) {/* Partial chain ? */
4393                 rbuf->fmp->m_flags |= M_PKTHDR;
4394                 m_freem(rbuf->fmp);
4395                 rbuf->fmp = NULL;
4396         }
4397
4398         /*
4399         ** With advanced descriptors the writeback
4400         ** clobbers the buffer addrs, so its easier
4401         ** to just free the existing mbufs and take
4402         ** the normal refresh path to get new buffers
4403         ** and mapping.
4404         */
4405         if (rbuf->m_head) {
4406                 m_free(rbuf->m_head);
4407                 rbuf->m_head = NULL;
4408         }
4409  
4410         if (rbuf->m_pack) {
4411                 m_free(rbuf->m_pack);
4412                 rbuf->m_pack = NULL;
4413         }
4414
4415         return;
4416 }
4417
4418 static void
4419 ixgbe_add_sysctl(struct adapter *adapter)
4420 {
4421         sysctl_ctx_init(&adapter->sysctl_ctx);
4422         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
4423             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
4424             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
4425         if (adapter->sysctl_tree == NULL) {
4426                 device_printf(adapter->dev, "can't add sysctl node\n");
4427                 return;
4428         }
4429         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
4430                         SYSCTL_CHILDREN(adapter->sysctl_tree),
4431                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
4432                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
4433
4434         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
4435                         SYSCTL_CHILDREN(adapter->sysctl_tree),
4436                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
4437                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
4438
4439         /*
4440         ** Allow a kind of speed control by forcing the autoneg
4441         ** advertised speed list to only a certain value, this
4442         ** supports 1G on 82599 devices, and 100Mb on x540.
4443         */
4444         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
4445                         SYSCTL_CHILDREN(adapter->sysctl_tree),
4446                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
4447                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
4448
4449         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
4450                         SYSCTL_CHILDREN(adapter->sysctl_tree),
4451                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
4452                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
4453
4454         /* Sysctl for limiting the amount of work done in the taskqueue */
4455         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
4456             "max number of rx packets to process", &adapter->rx_process_limit,
4457             ixgbe_rx_process_limit);
4458 }
4459
4460 /*********************************************************************
4461  *
4462  *  This routine executes in interrupt context. It replenishes
4463  *  the mbufs in the descriptor and sends data which has been
4464  *  dma'ed into host memory to upper layer.
4465  *
4466  *  We loop at most count times if count is > 0, or until done if
4467  *  count < 0.
4468  *
4469  *  Return TRUE for more work, FALSE for all clean.
4470  *********************************************************************/
4471 static bool
4472 ixgbe_rxeof(struct ix_queue *que, int count)
4473 {
4474         struct adapter          *adapter = que->adapter;
4475         struct rx_ring          *rxr = que->rxr;
4476         struct ifnet            *ifp = adapter->ifp;
4477 #if 0   /* NET_LRO */
4478         struct lro_ctrl         *lro = &rxr->lro;
4479         struct lro_entry        *queued;
4480 #endif
4481         int                     i, nextp, processed = 0;
4482         u32                     staterr = 0;
4483         union ixgbe_adv_rx_desc *cur;
4484         struct ixgbe_rx_buf     *rbuf, *nbuf;
4485
4486         IXGBE_RX_LOCK(rxr);
4487
4488 #ifdef DEV_NETMAP
4489         if (ifp->if_capenable & IFCAP_NETMAP) {
4490                 /*
4491                  * Same as the txeof routine: only wakeup clients on intr.
4492                  * NKR_PENDINTR in nr_kflags is used to implement interrupt
4493                  * mitigation (ixgbe_rxsync() will not look for new packets
4494                  * unless NKR_PENDINTR is set).
4495                  */
4496                 struct netmap_adapter *na = NA(ifp);
4497
4498                 na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4499                 selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4500                 IXGBE_RX_UNLOCK(rxr);
4501                 IXGBE_CORE_LOCK(adapter);
4502                 selwakeuppri(&na->rx_si, PI_NET);
4503                 IXGBE_CORE_UNLOCK(adapter);
4504                 return (FALSE);
4505         }
4506 #endif /* DEV_NETMAP */
4507         for (i = rxr->next_to_check; count != 0;) {
4508                 struct mbuf     *sendmp, *mh, *mp;
4509                 u32             rsc, ptype;
4510                 u16             hlen, plen, hdr;
4511                 u16             vtag = 0;
4512                 bool            eop;
4513  
4514                 /* Sync the ring. */
4515                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4516                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4517
4518                 cur = &rxr->rx_base[i];
4519                 staterr = le32toh(cur->wb.upper.status_error);
4520
4521                 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4522                         break;
4523                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4524                         break;
4525
4526                 count--;
4527                 sendmp = NULL;
4528                 nbuf = NULL;
4529                 rsc = 0;
4530                 cur->wb.upper.status_error = 0;
4531                 rbuf = &rxr->rx_buffers[i];
4532                 mh = rbuf->m_head;
4533                 mp = rbuf->m_pack;
4534
4535                 plen = le16toh(cur->wb.upper.length);
4536                 ptype = le32toh(cur->wb.lower.lo_dword.data) &
4537                     IXGBE_RXDADV_PKTTYPE_MASK;
4538                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4539                 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4540
4541                 /* Process vlan info */
4542                 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
4543                         vtag = le16toh(cur->wb.upper.vlan);
4544
4545                 /* Make sure bad packets are discarded */
4546                 if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4547                     (rxr->discard)) {
4548                         ifp->if_ierrors++;
4549                         rxr->rx_discarded++;
4550                         if (eop)
4551                                 rxr->discard = FALSE;
4552                         else
4553                                 rxr->discard = TRUE;
4554                         ixgbe_rx_discard(rxr, i);
4555                         goto next_desc;
4556                 }
4557
4558                 /*
4559                 ** On 82599 which supports a hardware
4560                 ** LRO (called HW RSC), packets need
4561                 ** not be fragmented across sequential
4562                 ** descriptors, rather the next descriptor
4563                 ** is indicated in bits of the descriptor.
4564                 ** This also means that we might proceses
4565                 ** more than one packet at a time, something
4566                 ** that has never been true before, it
4567                 ** required eliminating global chain pointers
4568                 ** in favor of what we are doing here.  -jfv
4569                 */
4570                 if (!eop) {
4571                         /*
4572                         ** Figure out the next descriptor
4573                         ** of this frame.
4574                         */
4575                         if (rxr->hw_rsc == TRUE) {
4576                                 rsc = ixgbe_rsc_count(cur);
4577                                 rxr->rsc_num += (rsc - 1);
4578                         }
4579                         if (rsc) { /* Get hardware index */
4580                                 nextp = ((staterr &
4581                                     IXGBE_RXDADV_NEXTP_MASK) >>
4582                                     IXGBE_RXDADV_NEXTP_SHIFT);
4583                         } else { /* Just sequential */
4584                                 nextp = i + 1;
4585                                 if (nextp == adapter->num_rx_desc)
4586                                         nextp = 0;
4587                         }
4588                         nbuf = &rxr->rx_buffers[nextp];
4589                         prefetch(nbuf);
4590                 }
4591                 /*
4592                 ** The header mbuf is ONLY used when header 
4593                 ** split is enabled, otherwise we get normal 
4594                 ** behavior, ie, both header and payload
4595                 ** are DMA'd into the payload buffer.
4596                 **
4597                 ** Rather than using the fmp/lmp global pointers
4598                 ** we now keep the head of a packet chain in the
4599                 ** buffer struct and pass this along from one
4600                 ** descriptor to the next, until we get EOP.
4601                 */
4602                 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
4603                         /* This must be an initial descriptor */
4604                         hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
4605                             IXGBE_RXDADV_HDRBUFLEN_SHIFT;
4606                         if (hlen > IXGBE_RX_HDR)
4607                                 hlen = IXGBE_RX_HDR;
4608                         mh->m_len = hlen;
4609                         mh->m_flags |= M_PKTHDR;
4610                         mh->m_next = NULL;
4611                         mh->m_pkthdr.len = mh->m_len;
4612                         /* Null buf pointer so it is refreshed */
4613                         rbuf->m_head = NULL;
4614                         /*
4615                         ** Check the payload length, this
4616                         ** could be zero if its a small
4617                         ** packet.
4618                         */
4619                         if (plen > 0) {
4620                                 mp->m_len = plen;
4621                                 mp->m_next = NULL;
4622                                 mp->m_flags &= ~M_PKTHDR;
4623                                 mh->m_next = mp;
4624                                 mh->m_pkthdr.len += mp->m_len;
4625                                 /* Null buf pointer so it is refreshed */
4626                                 rbuf->m_pack = NULL;
4627                                 rxr->rx_split_packets++;
4628                         }
4629                         /*
4630                         ** Now create the forward
4631                         ** chain so when complete 
4632                         ** we wont have to.
4633                         */
4634                         if (eop == 0) {
4635                                 /* stash the chain head */
4636                                 nbuf->fmp = mh;
4637                                 /* Make forward chain */
4638                                 if (plen)
4639                                         mp->m_next = nbuf->m_pack;
4640                                 else
4641                                         mh->m_next = nbuf->m_pack;
4642                         } else {
4643                                 /* Singlet, prepare to send */
4644                                 sendmp = mh;
4645                                 /* If hardware handled vtag */
4646                                 if (vtag) {
4647                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4648                                         sendmp->m_flags |= M_VLANTAG;
4649                                 }
4650                         }
4651                 } else {
4652                         /*
4653                         ** Either no header split, or a
4654                         ** secondary piece of a fragmented
4655                         ** split packet.
4656                         */
4657                         mp->m_len = plen;
4658                         /*
4659                         ** See if there is a stored head
4660                         ** that determines what we are
4661                         */
4662                         sendmp = rbuf->fmp;
4663                         rbuf->m_pack = rbuf->fmp = NULL;
4664
4665                         if (sendmp != NULL) {  /* secondary frag */
4666                                 mp->m_flags &= ~M_PKTHDR;
4667                                 sendmp->m_pkthdr.len += mp->m_len;
4668                         } else {
4669                                 /* first desc of a non-ps chain */
4670                                 sendmp = mp;
4671                                 sendmp->m_flags |= M_PKTHDR;
4672                                 sendmp->m_pkthdr.len = mp->m_len;
4673                                 if (staterr & IXGBE_RXD_STAT_VP) {
4674                                         sendmp->m_pkthdr.ether_vlantag = vtag;
4675                                         sendmp->m_flags |= M_VLANTAG;
4676                                 }
4677                         }
4678                         /* Pass the head pointer on */
4679                         if (eop == 0) {
4680                                 nbuf->fmp = sendmp;
4681                                 sendmp = NULL;
4682                                 mp->m_next = nbuf->m_pack;
4683                         }
4684                 }
4685                 ++processed;
4686                 /* Sending this frame? */
4687                 if (eop) {
4688                         sendmp->m_pkthdr.rcvif = ifp;
4689                         ifp->if_ipackets++;
4690                         rxr->rx_packets++;
4691                         /* capture data for AIM */
4692                         rxr->bytes += sendmp->m_pkthdr.len;
4693                         rxr->rx_bytes += sendmp->m_pkthdr.len;
4694                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4695                                 ixgbe_rx_checksum(staterr, sendmp, ptype);
4696 #if 0 /* __FreeBSD_version >= 800000 */
4697                         sendmp->m_pkthdr.flowid = que->msix;
4698                         sendmp->m_flags |= M_FLOWID;
4699 #endif
4700                 }
4701 next_desc:
4702                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4703                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4704
4705                 /* Advance our pointers to the next descriptor. */
4706                 if (++i == adapter->num_rx_desc)
4707                         i = 0;
4708
4709                 /* Now send to the stack or do LRO */
4710                 if (sendmp != NULL) {
4711                         rxr->next_to_check = i;
4712                         ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4713                         i = rxr->next_to_check;
4714                 }
4715
4716                /* Every 8 descriptors we go to refresh mbufs */
4717                 if (processed == 8) {
4718                         ixgbe_refresh_mbufs(rxr, i);
4719                         processed = 0;
4720                 }
4721         }
4722
4723         /* Refresh any remaining buf structs */
4724         if (ixgbe_rx_unrefreshed(rxr))
4725                 ixgbe_refresh_mbufs(rxr, i);
4726
4727         rxr->next_to_check = i;
4728
4729         /*
4730          * Flush any outstanding LRO work
4731          */
4732 #if 0   /* NET_LRO */
4733         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4734                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4735                 tcp_lro_flush(lro, queued);
4736         }
4737 #endif
4738
4739         IXGBE_RX_UNLOCK(rxr);
4740
4741         /*
4742         ** We still have cleaning to do?
4743         ** Schedule another interrupt if so.
4744         */
4745         if ((staterr & IXGBE_RXD_STAT_DD) != 0) {
4746                 ixgbe_rearm_queues(adapter, (u64)(1 << que->msix));
4747                 return (TRUE);
4748         }
4749
4750         return (FALSE);
4751 }
4752
4753
4754 /*********************************************************************
4755  *
4756  *  Verify that the hardware indicated that the checksum is valid.
4757  *  Inform the stack about the status of checksum so that stack
4758  *  doesn't spend time verifying the checksum.
4759  *
4760  *********************************************************************/
4761 static void
4762 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4763 {
4764         u16     status = (u16) staterr;
4765         u8      errors = (u8) (staterr >> 24);
4766         bool    sctp = FALSE;
4767
4768         if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4769             (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4770                 sctp = TRUE;
4771
4772         if (status & IXGBE_RXD_STAT_IPCS) {
4773                 if (!(errors & IXGBE_RXD_ERR_IPE)) {
4774                         /* IP Checksum Good */
4775                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4776                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4777
4778                 } else
4779                         mp->m_pkthdr.csum_flags = 0;
4780         }
4781         if (status & IXGBE_RXD_STAT_L4CS) {
4782                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4783 #if 0
4784                 if (sctp)
4785                         type = CSUM_SCTP_VALID;
4786 #endif
4787                 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4788                         mp->m_pkthdr.csum_flags |= type;
4789                         if (!sctp)
4790                                 mp->m_pkthdr.csum_data = htons(0xffff);
4791                 } 
4792         }
4793         return;
4794 }
4795
4796
4797 /*
4798 ** This routine is run via an vlan config EVENT,
4799 ** it enables us to use the HW Filter table since
4800 ** we can get the vlan id. This just creates the
4801 ** entry in the soft version of the VFTA, init will
4802 ** repopulate the real table.
4803 */
4804 static void
4805 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4806 {
4807         struct adapter  *adapter = ifp->if_softc;
4808         u16             index, bit;
4809
4810         if (ifp->if_softc !=  arg)   /* Not our event */
4811                 return;
4812
4813         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4814                 return;
4815
4816         IXGBE_CORE_LOCK(adapter);
4817         index = (vtag >> 5) & 0x7F;
4818         bit = vtag & 0x1F;
4819         adapter->shadow_vfta[index] |= (1 << bit);
4820         ++adapter->num_vlans;
4821         ixgbe_init_locked(adapter);
4822         IXGBE_CORE_UNLOCK(adapter);
4823 }
4824
4825 /*
4826 ** This routine is run via an vlan
4827 ** unconfig EVENT, remove our entry
4828 ** in the soft vfta.
4829 */
4830 static void
4831 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4832 {
4833         struct adapter  *adapter = ifp->if_softc;
4834         u16             index, bit;
4835
4836         if (ifp->if_softc !=  arg)
4837                 return;
4838
4839         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4840                 return;
4841
4842         IXGBE_CORE_LOCK(adapter);
4843         index = (vtag >> 5) & 0x7F;
4844         bit = vtag & 0x1F;
4845         adapter->shadow_vfta[index] &= ~(1 << bit);
4846         --adapter->num_vlans;
4847         /* Re-init to load the changes */
4848         ixgbe_init_locked(adapter);
4849         IXGBE_CORE_UNLOCK(adapter);
4850 }
4851
4852 static void
4853 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4854 {
4855         struct ifnet    *ifp = adapter->ifp;
4856         struct ixgbe_hw *hw = &adapter->hw;
4857         struct rx_ring  *rxr;
4858         u32             ctrl;
4859
4860         /*
4861         ** We get here thru init_locked, meaning
4862         ** a soft reset, this has already cleared
4863         ** the VFTA and other state, so if there
4864         ** have been no vlan's registered do nothing.
4865         */
4866         if (adapter->num_vlans == 0)
4867                 return;
4868
4869         /*
4870         ** A soft reset zero's out the VFTA, so
4871         ** we need to repopulate it now.
4872         */
4873         for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4874                 if (adapter->shadow_vfta[i] != 0)
4875                         IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4876                             adapter->shadow_vfta[i]);
4877
4878         ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4879         /* Enable the Filter Table if enabled */
4880         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4881                 ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4882                 ctrl |= IXGBE_VLNCTRL_VFE;
4883         }
4884         if (hw->mac.type == ixgbe_mac_82598EB)
4885                 ctrl |= IXGBE_VLNCTRL_VME;
4886         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4887
4888         /* Setup the queues for vlans */
4889         for (int i = 0; i < adapter->num_queues; i++) {
4890                 rxr = &adapter->rx_rings[i];
4891                 /* On 82599 the VLAN enable is per/queue in RXDCTL */
4892                 if (hw->mac.type != ixgbe_mac_82598EB) {
4893                         ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4894                         ctrl |= IXGBE_RXDCTL_VME;
4895                         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4896                 }
4897                 rxr->vtag_strip = TRUE;
4898         }
4899 }
4900
4901 static void
4902 ixgbe_enable_intr(struct adapter *adapter)
4903 {
4904         struct ixgbe_hw *hw = &adapter->hw;
4905         struct ix_queue *que = adapter->queues;
4906         u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4907
4908
4909         /* Enable Fan Failure detection */
4910         if (hw->device_id == IXGBE_DEV_ID_82598AT)
4911                     mask |= IXGBE_EIMS_GPI_SDP1;
4912         else {
4913                     mask |= IXGBE_EIMS_ECC;
4914                     mask |= IXGBE_EIMS_GPI_SDP0;
4915                     mask |= IXGBE_EIMS_GPI_SDP1;
4916                     mask |= IXGBE_EIMS_GPI_SDP2;
4917 #ifdef IXGBE_FDIR
4918                     mask |= IXGBE_EIMS_FLOW_DIR;
4919 #endif
4920         }
4921
4922         IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4923
4924         /* With RSS we use auto clear */
4925         if (adapter->msix_mem) {
4926                 mask = IXGBE_EIMS_ENABLE_MASK;
4927                 /* Don't autoclear Link */
4928                 mask &= ~IXGBE_EIMS_OTHER;
4929                 mask &= ~IXGBE_EIMS_LSC;
4930                 IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4931         }
4932
4933         /*
4934         ** Now enable all queues, this is done separately to
4935         ** allow for handling the extended (beyond 32) MSIX
4936         ** vectors that can be used by 82599
4937         */
4938         for (int i = 0; i < adapter->num_queues; i++, que++)
4939                 ixgbe_enable_queue(adapter, que->msix);
4940
4941         IXGBE_WRITE_FLUSH(hw);
4942
4943         return;
4944 }
4945
4946 static void
4947 ixgbe_disable_intr(struct adapter *adapter)
4948 {
4949         if (adapter->msix_mem)
4950                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4951         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4952                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4953         } else {
4954                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4955                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4956                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4957         }
4958         IXGBE_WRITE_FLUSH(&adapter->hw);
4959         return;
4960 }
4961
4962 u16
4963 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4964 {
4965         u16 value;
4966
4967         value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4968             reg, 2);
4969
4970         return (value);
4971 }
4972
4973 void
4974 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4975 {
4976         pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4977             reg, value, 2);
4978
4979         return;
4980 }
4981
4982 /*
4983 ** Setup the correct IVAR register for a particular MSIX interrupt
4984 **   (yes this is all very magic and confusing :)
4985 **  - entry is the register array entry
4986 **  - vector is the MSIX vector for this queue
4987 **  - type is RX/TX/MISC
4988 */
4989 static void
4990 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4991 {
4992         struct ixgbe_hw *hw = &adapter->hw;
4993         u32 ivar, index;
4994
4995         vector |= IXGBE_IVAR_ALLOC_VAL;
4996
4997         switch (hw->mac.type) {
4998
4999         case ixgbe_mac_82598EB:
5000                 if (type == -1)
5001                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
5002                 else
5003                         entry += (type * 64);
5004                 index = (entry >> 2) & 0x1F;
5005                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
5006                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
5007                 ivar |= (vector << (8 * (entry & 0x3)));
5008                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
5009                 break;
5010
5011         case ixgbe_mac_82599EB:
5012         case ixgbe_mac_X540:
5013                 if (type == -1) { /* MISC IVAR */
5014                         index = (entry & 1) * 8;
5015                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5016                         ivar &= ~(0xFF << index);
5017                         ivar |= (vector << index);
5018                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5019                 } else {        /* RX/TX IVARS */
5020                         index = (16 * (entry & 1)) + (8 * type);
5021                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5022                         ivar &= ~(0xFF << index);
5023                         ivar |= (vector << index);
5024                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5025                 }
5026
5027         default:
5028                 break;
5029         }
5030 }
5031
5032 static void
5033 ixgbe_configure_ivars(struct adapter *adapter)
5034 {
5035         struct  ix_queue *que = adapter->queues;
5036         u32 newitr;
5037
5038         if (ixgbe_max_interrupt_rate > 0)
5039                 newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5040         else
5041                 newitr = 0;
5042
5043         for (int i = 0; i < adapter->num_queues; i++, que++) {
5044                 /* First the RX queue entry */
5045                 ixgbe_set_ivar(adapter, i, que->msix, 0);
5046                 /* ... and the TX */
5047                 ixgbe_set_ivar(adapter, i, que->msix, 1);
5048                 /* Set an Initial EITR value */
5049                 IXGBE_WRITE_REG(&adapter->hw,
5050                     IXGBE_EITR(que->msix), newitr);
5051         }
5052
5053         /* For the Link interrupt */
5054         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5055 }
5056
5057 /*
5058 ** ixgbe_sfp_probe - called in the local timer to
5059 ** determine if a port had optics inserted.
5060 */  
5061 static bool ixgbe_sfp_probe(struct adapter *adapter)
5062 {
5063         struct ixgbe_hw *hw = &adapter->hw;
5064         device_t        dev = adapter->dev;
5065         bool            result = FALSE;
5066
5067         if ((hw->phy.type == ixgbe_phy_nl) &&
5068             (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5069                 s32 ret = hw->phy.ops.identify_sfp(hw);
5070                 if (ret)
5071                         goto out;
5072                 ret = hw->phy.ops.reset(hw);
5073                 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5074                         device_printf(dev,"Unsupported SFP+ module detected!");
5075                         kprintf(" Reload driver with supported module.\n");
5076                         adapter->sfp_probe = FALSE;
5077                         goto out;
5078                 } else
5079                         device_printf(dev,"SFP+ module detected!\n");
5080                 /* We now have supported optics */
5081                 adapter->sfp_probe = FALSE;
5082                 /* Set the optics type so system reports correctly */
5083                 ixgbe_setup_optics(adapter);
5084                 result = TRUE;
5085         }
5086 out:
5087         return (result);
5088 }
5089
5090 /*
5091 ** Tasklet handler for MSIX Link interrupts
5092 **  - do outside interrupt since it might sleep
5093 */
5094 static void
5095 ixgbe_handle_link(void *context, int pending)
5096 {
5097         struct adapter  *adapter = context;
5098
5099         ixgbe_check_link(&adapter->hw,
5100             &adapter->link_speed, &adapter->link_up, 0);
5101         ixgbe_update_link_status(adapter);
5102 }
5103
5104 /*
5105 ** Tasklet for handling SFP module interrupts
5106 */
5107 static void
5108 ixgbe_handle_mod(void *context, int pending)
5109 {
5110         struct adapter  *adapter = context;
5111         struct ixgbe_hw *hw = &adapter->hw;
5112         device_t        dev = adapter->dev;
5113         u32 err;
5114
5115         err = hw->phy.ops.identify_sfp(hw);
5116         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5117                 device_printf(dev,
5118                     "Unsupported SFP+ module type was detected.\n");
5119                 return;
5120         }
5121         err = hw->mac.ops.setup_sfp(hw);
5122         if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5123                 device_printf(dev,
5124                     "Setup failure - unsupported SFP+ module type.\n");
5125                 return;
5126         }
5127         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5128         return;
5129 }
5130
5131
5132 /*
5133 ** Tasklet for handling MSF (multispeed fiber) interrupts
5134 */
5135 static void
5136 ixgbe_handle_msf(void *context, int pending)
5137 {
5138         struct adapter  *adapter = context;
5139         struct ixgbe_hw *hw = &adapter->hw;
5140         u32 autoneg;
5141         bool negotiate;
5142
5143         autoneg = hw->phy.autoneg_advertised;
5144         if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5145                 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5146         if (hw->mac.ops.setup_link)
5147                 hw->mac.ops.setup_link(hw, autoneg, negotiate, TRUE);
5148         return;
5149 }
5150
5151 #ifdef IXGBE_FDIR
5152 /*
5153 ** Tasklet for reinitializing the Flow Director filter table
5154 */
5155 static void
5156 ixgbe_reinit_fdir(void *context, int pending)
5157 {
5158         struct adapter  *adapter = context;
5159         struct ifnet   *ifp = adapter->ifp;
5160
5161         if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5162                 return;
5163         ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5164         adapter->fdir_reinit = 0;
5165         /* re-enable flow director interrupts */
5166         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5167         /* Restart the interface */
5168         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5169         return;
5170 }
5171 #endif
5172
5173 /**********************************************************************
5174  *
5175  *  Update the board statistics counters.
5176  *
5177  **********************************************************************/
5178 static void
5179 ixgbe_update_stats_counters(struct adapter *adapter)
5180 {
5181         struct ifnet   *ifp = adapter->ifp;
5182         struct ixgbe_hw *hw = &adapter->hw;
5183         u32  missed_rx = 0, bprc, lxon, lxoff, total;
5184         u64  total_missed_rx = 0;
5185
5186         adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5187         adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5188         adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5189         adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5190
5191         for (int i = 0; i < 8; i++) {
5192                 u32 mp;
5193                 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5194                 /* missed_rx tallies misses for the gprc workaround */
5195                 missed_rx += mp;
5196                 /* global total per queue */
5197                 adapter->stats.mpc[i] += mp;
5198                 /* Running comprehensive total for stats display */
5199                 total_missed_rx += adapter->stats.mpc[i];
5200                 if (hw->mac.type == ixgbe_mac_82598EB)
5201                         adapter->stats.rnbc[i] +=
5202                             IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5203                 adapter->stats.pxontxc[i] +=
5204                     IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5205                 adapter->stats.pxonrxc[i] +=
5206                     IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5207                 adapter->stats.pxofftxc[i] +=
5208                     IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5209                 adapter->stats.pxoffrxc[i] +=
5210                     IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5211                 adapter->stats.pxon2offc[i] +=
5212                     IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5213         }
5214         for (int i = 0; i < 16; i++) {
5215                 adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5216                 adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5217                 adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5218                 adapter->stats.qbrc[i] += 
5219                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBRC(i)) << 32);
5220                 adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5221                 adapter->stats.qbtc[i] +=
5222                     ((u64)IXGBE_READ_REG(hw, IXGBE_QBTC(i)) << 32);
5223                 adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5224         }
5225         adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5226         adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5227         adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5228
5229         /* Hardware workaround, gprc counts missed packets */
5230         adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5231         adapter->stats.gprc -= missed_rx;
5232
5233         if (hw->mac.type != ixgbe_mac_82598EB) {
5234                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5235                     ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5236                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5237                     ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5238                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5239                     ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5240                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5241                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5242         } else {
5243                 adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5244                 adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5245                 /* 82598 only has a counter in the high register */
5246                 adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5247                 adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5248                 adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5249         }
5250
5251         /*
5252          * Workaround: mprc hardware is incorrectly counting
5253          * broadcasts, so for now we subtract those.
5254          */
5255         bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5256         adapter->stats.bprc += bprc;
5257         adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5258         if (hw->mac.type == ixgbe_mac_82598EB)
5259                 adapter->stats.mprc -= bprc;
5260
5261         adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5262         adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5263         adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5264         adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5265         adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5266         adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5267
5268         lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5269         adapter->stats.lxontxc += lxon;
5270         lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5271         adapter->stats.lxofftxc += lxoff;
5272         total = lxon + lxoff;
5273
5274         adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5275         adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5276         adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5277         adapter->stats.gptc -= total;
5278         adapter->stats.mptc -= total;
5279         adapter->stats.ptc64 -= total;
5280         adapter->stats.gotc -= total * ETHER_MIN_LEN;
5281
5282         adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5283         adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5284         adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5285         adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5286         adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5287         adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5288         adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5289         adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5290         adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5291         adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5292         adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5293         adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5294         adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5295         adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5296         adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5297         adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5298         adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5299         adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5300         /* Only read FCOE on 82599 */
5301         if (hw->mac.type != ixgbe_mac_82598EB) {
5302                 adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5303                 adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5304                 adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5305                 adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5306                 adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5307         }
5308
5309         /* Fill out the OS statistics structure */
5310         ifp->if_ipackets = adapter->stats.gprc;
5311         ifp->if_opackets = adapter->stats.gptc;
5312         ifp->if_ibytes = adapter->stats.gorc;
5313         ifp->if_obytes = adapter->stats.gotc;
5314         ifp->if_imcasts = adapter->stats.mprc;
5315         ifp->if_collisions = 0;
5316
5317         /* Rx Errors */
5318         ifp->if_ierrors = total_missed_rx + adapter->stats.crcerrs +
5319                 adapter->stats.rlec;
5320 }
5321
5322 /** ixgbe_sysctl_tdh_handler - Handler function
5323  *  Retrieves the TDH value from the hardware
5324  */
5325 static int 
5326 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5327 {
5328         int error;
5329
5330         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5331         if (!txr) return 0;
5332
5333         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5334         error = sysctl_handle_int(oidp, &val, 0, req);
5335         if (error || !req->newptr)
5336                 return error;
5337         return 0;
5338 }
5339
5340 /** ixgbe_sysctl_tdt_handler - Handler function
5341  *  Retrieves the TDT value from the hardware
5342  */
5343 static int 
5344 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5345 {
5346         int error;
5347
5348         struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5349         if (!txr) return 0;
5350
5351         unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5352         error = sysctl_handle_int(oidp, &val, 0, req);
5353         if (error || !req->newptr)
5354                 return error;
5355         return 0;
5356 }
5357
5358 /** ixgbe_sysctl_rdh_handler - Handler function
5359  *  Retrieves the RDH value from the hardware
5360  */
5361 static int 
5362 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5363 {
5364         int error;
5365
5366         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5367         if (!rxr) return 0;
5368
5369         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5370         error = sysctl_handle_int(oidp, &val, 0, req);
5371         if (error || !req->newptr)
5372                 return error;
5373         return 0;
5374 }
5375
5376 /** ixgbe_sysctl_rdt_handler - Handler function
5377  *  Retrieves the RDT value from the hardware
5378  */
5379 static int 
5380 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5381 {
5382         int error;
5383
5384         struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5385         if (!rxr) return 0;
5386
5387         unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5388         error = sysctl_handle_int(oidp, &val, 0, req);
5389         if (error || !req->newptr)
5390                 return error;
5391         return 0;
5392 }
5393
5394 static int
5395 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5396 {
5397         int error;
5398         struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5399         unsigned int reg, usec, rate;
5400
5401         reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5402         usec = ((reg & 0x0FF8) >> 3);
5403         if (usec > 0)
5404                 rate = 500000 / usec;
5405         else
5406                 rate = 0;
5407         error = sysctl_handle_int(oidp, &rate, 0, req);
5408         if (error || !req->newptr)
5409                 return error;
5410         reg &= ~0xfff; /* default, no limitation */
5411         ixgbe_max_interrupt_rate = 0;
5412         if (rate > 0 && rate < 500000) {
5413                 if (rate < 1000)
5414                         rate = 1000;
5415                 ixgbe_max_interrupt_rate = rate;
5416                 reg |= ((4000000/rate) & 0xff8 );
5417         }
5418         IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5419         return 0;
5420 }
5421
5422 /*
5423  * Add sysctl variables, one per statistic, to the system.
5424  */
5425 static void
5426 ixgbe_add_hw_stats(struct adapter *adapter)
5427 {
5428         struct tx_ring *txr = adapter->tx_rings;
5429         struct rx_ring *rxr = adapter->rx_rings;
5430
5431         struct sysctl_ctx_list *ctx = &adapter->sysctl_ctx;
5432         struct sysctl_oid *tree = adapter->sysctl_tree;
5433         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5434         struct ixgbe_hw_stats *stats = &adapter->stats;
5435
5436         struct sysctl_oid *stat_node, *queue_node;
5437         struct sysctl_oid_list *stat_list, *queue_list;
5438
5439 #define QUEUE_NAME_LEN 32
5440         char namebuf[QUEUE_NAME_LEN];
5441
5442         /* Driver Statistics */
5443         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5444                         CTLFLAG_RD, &adapter->dropped_pkts,
5445                         "Driver dropped packets");
5446         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5447                         CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5448                         "m_defrag() failed");
5449         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_tx_dma_setup",
5450                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5451                         "Driver tx dma failure in xmit");
5452         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5453                         CTLFLAG_RD, &adapter->watchdog_events,
5454                         "Watchdog timeouts");
5455         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tso_tx",
5456                         CTLFLAG_RD, &adapter->tso_tx,
5457                         "TSO");
5458         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5459                         CTLFLAG_RD, &adapter->link_irq,
5460                         "Link MSIX IRQ Handled");
5461
5462         for (int i = 0; i < adapter->num_queues; i++, txr++) {
5463         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5464                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5465                                             CTLFLAG_RD, NULL, "Queue Name");
5466                 queue_list = SYSCTL_CHILDREN(queue_node);
5467
5468                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5469                                 CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5470                                 sizeof(&adapter->queues[i]),
5471                                 ixgbe_sysctl_interrupt_rate_handler, "IU",
5472                                 "Interrupt Rate");
5473                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5474                                 CTLFLAG_RD, &(adapter->queues[i].irqs), 0,
5475                                 "irqs on this queue");
5476                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5477                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5478                                 ixgbe_sysctl_tdh_handler, "IU",
5479                                 "Transmit Descriptor Head");
5480                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5481                                 CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5482                                 ixgbe_sysctl_tdt_handler, "IU",
5483                                 "Transmit Descriptor Tail");
5484                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5485                                 CTLFLAG_RD, &txr->no_desc_avail, 0,
5486                                 "Queue No Descriptor Available");
5487                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5488                                 CTLFLAG_RD, &txr->total_packets, 0,
5489                                 "Queue Packets Transmitted");
5490         }
5491
5492         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5493         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5494                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5495                                             CTLFLAG_RD, NULL, "Queue Name");
5496                 queue_list = SYSCTL_CHILDREN(queue_node);
5497
5498 #if 0   /* NET_LRO */
5499                 struct lro_ctrl *lro = &rxr->lro;
5500 #endif
5501
5502         ksnprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5503                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
5504                                             CTLFLAG_RD, NULL, "Queue Name");
5505                 queue_list = SYSCTL_CHILDREN(queue_node);
5506
5507                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5508                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5509                                 ixgbe_sysctl_rdh_handler, "IU",
5510                                 "Receive Descriptor Head");
5511                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5512                                 CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5513                                 ixgbe_sysctl_rdt_handler, "IU",
5514                                 "Receive Descriptor Tail");
5515                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5516                                 CTLFLAG_RD, &rxr->rx_packets, 0,
5517                                 "Queue Packets Received");
5518                 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5519                                 CTLFLAG_RD, &rxr->rx_bytes, 0,
5520                                 "Queue Bytes Received");
5521 #if 0   /* NET_LRO */
5522                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5523                                 CTLFLAG_RD, &lro->lro_queued, 0,
5524                                 "LRO Queued");
5525                 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5526                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5527                                 "LRO Flushed");
5528 #endif
5529         }
5530
5531         /* MAC stats get the own sub node */
5532
5533         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5534                                     CTLFLAG_RD, NULL, "MAC Statistics");
5535         stat_list = SYSCTL_CHILDREN(stat_node);
5536
5537         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5538                         CTLFLAG_RD, &stats->crcerrs, 0,
5539                         "CRC Errors");
5540         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5541                         CTLFLAG_RD, &stats->illerrc, 0,
5542                         "Illegal Byte Errors");
5543         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5544                         CTLFLAG_RD, &stats->errbc, 0,
5545                         "Byte Errors");
5546         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5547                         CTLFLAG_RD, &stats->mspdc, 0,
5548                         "MAC Short Packets Discarded");
5549         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5550                         CTLFLAG_RD, &stats->mlfc, 0,
5551                         "MAC Local Faults");
5552         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5553                         CTLFLAG_RD, &stats->mrfc, 0,
5554                         "MAC Remote Faults");
5555         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5556                         CTLFLAG_RD, &stats->rlec, 0,
5557                         "Receive Length Errors");
5558         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_txd",
5559                         CTLFLAG_RD, &stats->lxontxc, 0,
5560                         "Link XON Transmitted");
5561         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xon_rcvd",
5562                         CTLFLAG_RD, &stats->lxonrxc, 0,
5563                         "Link XON Received");
5564         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_txd",
5565                         CTLFLAG_RD, &stats->lxofftxc, 0,
5566                         "Link XOFF Transmitted");
5567         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "link_xoff_rcvd",
5568                         CTLFLAG_RD, &stats->lxoffrxc, 0,
5569                         "Link XOFF Received");
5570
5571         /* Packet Reception Stats */
5572         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5573                         CTLFLAG_RD, &stats->tor, 0,
5574                         "Total Octets Received"); 
5575         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5576                         CTLFLAG_RD, &stats->gorc, 0,
5577                         "Good Octets Received"); 
5578         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5579                         CTLFLAG_RD, &stats->tpr, 0,
5580                         "Total Packets Received");
5581         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5582                         CTLFLAG_RD, &stats->gprc, 0,
5583                         "Good Packets Received");
5584         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5585                         CTLFLAG_RD, &stats->mprc, 0,
5586                         "Multicast Packets Received");
5587         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5588                         CTLFLAG_RD, &stats->bprc, 0,
5589                         "Broadcast Packets Received");
5590         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5591                         CTLFLAG_RD, &stats->prc64, 0,
5592                         "64 byte frames received ");
5593         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5594                         CTLFLAG_RD, &stats->prc127, 0,
5595                         "65-127 byte frames received");
5596         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5597                         CTLFLAG_RD, &stats->prc255, 0,
5598                         "128-255 byte frames received");
5599         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5600                         CTLFLAG_RD, &stats->prc511, 0,
5601                         "256-511 byte frames received");
5602         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5603                         CTLFLAG_RD, &stats->prc1023, 0,
5604                         "512-1023 byte frames received");
5605         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5606                         CTLFLAG_RD, &stats->prc1522, 0,
5607                         "1023-1522 byte frames received");
5608         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5609                         CTLFLAG_RD, &stats->ruc, 0,
5610                         "Receive Undersized");
5611         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5612                         CTLFLAG_RD, &stats->rfc, 0,
5613                         "Fragmented Packets Received ");
5614         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5615                         CTLFLAG_RD, &stats->roc, 0,
5616                         "Oversized Packets Received");
5617         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5618                         CTLFLAG_RD, &stats->rjc, 0,
5619                         "Received Jabber");
5620         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5621                         CTLFLAG_RD, &stats->mngprc, 0,
5622                         "Management Packets Received");
5623         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5624                         CTLFLAG_RD, &stats->mngptc, 0,
5625                         "Management Packets Dropped");
5626         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5627                         CTLFLAG_RD, &stats->xec, 0,
5628                         "Checksum Errors");
5629
5630         /* Packet Transmission Stats */
5631         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5632                         CTLFLAG_RD, &stats->gotc, 0,
5633                         "Good Octets Transmitted"); 
5634         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5635                         CTLFLAG_RD, &stats->tpt, 0,
5636                         "Total Packets Transmitted");
5637         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5638                         CTLFLAG_RD, &stats->gptc, 0,
5639                         "Good Packets Transmitted");
5640         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5641                         CTLFLAG_RD, &stats->bptc, 0,
5642                         "Broadcast Packets Transmitted");
5643         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5644                         CTLFLAG_RD, &stats->mptc, 0,
5645                         "Multicast Packets Transmitted");
5646         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5647                         CTLFLAG_RD, &stats->mngptc, 0,
5648                         "Management Packets Transmitted");
5649         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5650                         CTLFLAG_RD, &stats->ptc64, 0,
5651                         "64 byte frames transmitted ");
5652         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5653                         CTLFLAG_RD, &stats->ptc127, 0,
5654                         "65-127 byte frames transmitted");
5655         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5656                         CTLFLAG_RD, &stats->ptc255, 0,
5657                         "128-255 byte frames transmitted");
5658         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5659                         CTLFLAG_RD, &stats->ptc511, 0,
5660                         "256-511 byte frames transmitted");
5661         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5662                         CTLFLAG_RD, &stats->ptc1023, 0,
5663                         "512-1023 byte frames transmitted");
5664         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5665                         CTLFLAG_RD, &stats->ptc1522, 0,
5666                         "1024-1522 byte frames transmitted");
5667
5668         /* FC Stats */
5669         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_crc",
5670                 CTLFLAG_RD, &stats->fccrc, 0,
5671                 "FC CRC Errors");
5672         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_last",
5673                 CTLFLAG_RD, &stats->fclast, 0,
5674                 "FC Last Error");
5675         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_drpd",
5676                 CTLFLAG_RD, &stats->fcoerpdc, 0,
5677                 "FCoE Packets Dropped");
5678         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_rcvd",
5679                 CTLFLAG_RD, &stats->fcoeprc, 0,
5680                 "FCoE Packets Received");
5681         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_pkts_txd",
5682                 CTLFLAG_RD, &stats->fcoeptc, 0,
5683                 "FCoE Packets Transmitted");
5684         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_rcvd",
5685                 CTLFLAG_RD, &stats->fcoedwrc, 0,
5686                 "FCoE DWords Received");
5687         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "fc_dword_txd",
5688                 CTLFLAG_RD, &stats->fcoedwtc, 0,
5689                 "FCoE DWords Transmitted");
5690 }
5691
5692 /*
5693 ** Set flow control using sysctl:
5694 ** Flow control values:
5695 **      0 - off
5696 **      1 - rx pause
5697 **      2 - tx pause
5698 **      3 - full
5699 */
5700 static int
5701 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5702 {
5703         int error, last;
5704         struct adapter *adapter = (struct adapter *) arg1;
5705
5706         last = adapter->fc;
5707         error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5708         if ((error) || (req->newptr == NULL))
5709                 return (error);
5710
5711         /* Don't bother if it's not changed */
5712         if (adapter->fc == last)
5713                 return (0);
5714
5715         switch (adapter->fc) {
5716                 case ixgbe_fc_rx_pause:
5717                 case ixgbe_fc_tx_pause:
5718                 case ixgbe_fc_full:
5719                         adapter->hw.fc.requested_mode = adapter->fc;
5720                         break;
5721                 case ixgbe_fc_none:
5722                 default:
5723                         adapter->hw.fc.requested_mode = ixgbe_fc_none;
5724         }
5725         /* Don't autoneg if forcing a value */
5726         adapter->hw.fc.disable_fc_autoneg = TRUE;
5727         ixgbe_fc_enable(&adapter->hw);
5728         return error;
5729 }
5730
5731 static void
5732 ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name,
5733         const char *description, int *limit, int value)
5734 {
5735         *limit = value;
5736         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5737             SYSCTL_CHILDREN(adapter->sysctl_tree),
5738             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5739 }
5740
5741 /*
5742 ** Control link advertise speed:
5743 **      1 - advertise only 1G
5744 **      2 - advertise 100Mb
5745 **      3 - advertise normal
5746 */
5747 static int
5748 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5749 {
5750         int                     error = 0;
5751         struct adapter          *adapter;
5752         device_t                dev;
5753         struct ixgbe_hw         *hw;
5754         ixgbe_link_speed        speed, last;
5755
5756         adapter = (struct adapter *) arg1;
5757         dev = adapter->dev;
5758         hw = &adapter->hw;
5759         last = adapter->advertise;
5760
5761         error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5762         if ((error) || (adapter->advertise == -1))
5763                 return (error);
5764
5765         if (adapter->advertise == last) /* no change */
5766                 return (0);
5767
5768         if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5769             (hw->phy.multispeed_fiber)))
5770                 return (error);
5771
5772         if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5773                 device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5774                 return (error);
5775         }
5776
5777         if (adapter->advertise == 1)
5778                 speed = IXGBE_LINK_SPEED_1GB_FULL;
5779         else if (adapter->advertise == 2)
5780                 speed = IXGBE_LINK_SPEED_100_FULL;
5781         else if (adapter->advertise == 3)
5782                 speed = IXGBE_LINK_SPEED_1GB_FULL |
5783                         IXGBE_LINK_SPEED_10GB_FULL;
5784         else /* bogus value */
5785                 return (error);
5786
5787         hw->mac.autotry_restart = TRUE;
5788         hw->mac.ops.setup_link(hw, speed, TRUE, TRUE);
5789
5790         return (error);
5791 }
5792
5793 /*
5794 ** Thermal Shutdown Trigger
5795 **   - cause a Thermal Overtemp IRQ
5796 */
5797 static int
5798 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5799 {
5800         int             error, fire = 0;
5801         struct adapter  *adapter = (struct adapter *) arg1;
5802         struct ixgbe_hw *hw = &adapter->hw;
5803
5804
5805         if (hw->mac.type != ixgbe_mac_X540)
5806                 return (0);
5807
5808         error = sysctl_handle_int(oidp, &fire, 0, req);
5809         if ((error) || (req->newptr == NULL))
5810                 return (error);
5811
5812         if (fire) {
5813                 u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5814                 reg |= IXGBE_EICR_TS;
5815                 IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5816         }
5817
5818         return (0);
5819 }
5820
5821 /* rearrange mbuf chain to get contiguous bytes */
5822 static int
5823 ixgbe_tso_pullup(struct tx_ring *txr, struct mbuf **mp)
5824 {
5825         int hoff, iphlen, thoff;
5826         struct mbuf *m;
5827
5828         m = *mp;
5829         KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
5830
5831         iphlen = m->m_pkthdr.csum_iphlen;
5832         thoff = m->m_pkthdr.csum_thlen;
5833         hoff = m->m_pkthdr.csum_lhlen;
5834
5835         KASSERT(iphlen > 0, ("invalid ip hlen"));
5836         KASSERT(thoff > 0, ("invalid tcp hlen"));
5837         KASSERT(hoff > 0, ("invalid ether hlen"));
5838
5839         if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
5840                 m = m_pullup(m, hoff + iphlen + thoff);
5841                 if (m == NULL) {
5842                         *mp = NULL;
5843                         return ENOBUFS;
5844                 }
5845                 *mp = m;
5846         }
5847
5848         return 0;
5849 }