00ed301cb5f4e648cb343294f2e820e776c3cf5d
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 #if 0   /* HAVE_MSIX */
125 static int      ixgbe_setup_msix(struct adapter *);
126 #endif
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 #if 0   /* NET_LRO */
146 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
147 #endif
148
149 static void     ixgbe_enable_intr(struct adapter *);
150 static void     ixgbe_disable_intr(struct adapter *);
151 static void     ixgbe_update_stats_counters(struct adapter *);
152 static bool     ixgbe_txeof(struct tx_ring *);
153 static bool     ixgbe_rxeof(struct ix_queue *, int);
154 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
155 static void     ixgbe_set_promisc(struct adapter *);
156 static void     ixgbe_set_multi(struct adapter *);
157 static void     ixgbe_update_link_status(struct adapter *);
158 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
159 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
160 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
162 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
163 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
164                     struct ixgbe_dma_alloc *, int);
165 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
166 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
167                     const char *, int *, int);
168 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
169 #if 0   /* NET_TSO */
170 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
171 #endif
172 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
173 static void     ixgbe_configure_ivars(struct adapter *);
174 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
175
176 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
177 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
178 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
179
180 static void     ixgbe_add_hw_stats(struct adapter *adapter);
181
182 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
183 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
184                     struct mbuf *, u32);
185
186 /* Support for pluggable optic modules */
187 static bool     ixgbe_sfp_probe(struct adapter *);
188 static void     ixgbe_setup_optics(struct adapter *);
189
190 /* Legacy (single vector interrupt handler */
191 static void     ixgbe_legacy_irq(void *);
192
193 /* The MSI/X Interrupt handlers */
194 static void     ixgbe_msix_que(void *);
195 static void     ixgbe_msix_link(void *);
196
197 /* Deferred interrupt tasklets */
198 static void     ixgbe_handle_que(void *, int);
199 static void     ixgbe_handle_link(void *, int);
200 static void     ixgbe_handle_msf(void *, int);
201 static void     ixgbe_handle_mod(void *, int);
202
203 #ifdef IXGBE_FDIR
204 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
205 static void     ixgbe_reinit_fdir(void *, int);
206 #endif
207
208 /*********************************************************************
209  *  FreeBSD Device Interface Entry Points
210  *********************************************************************/
211
212 static device_method_t ixgbe_methods[] = {
213         /* Device interface */
214         DEVMETHOD(device_probe, ixgbe_probe),
215         DEVMETHOD(device_attach, ixgbe_attach),
216         DEVMETHOD(device_detach, ixgbe_detach),
217         DEVMETHOD(device_shutdown, ixgbe_shutdown),
218         {0, 0}
219 };
220
221 static driver_t ixgbe_driver = {
222         "ix", ixgbe_methods, sizeof(struct adapter),
223 };
224
225 devclass_t ixgbe_devclass;
226 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
227
228 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
229 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
230
231 /*
232 ** TUNEABLE PARAMETERS:
233 */
234
235 /*
236 ** AIM: Adaptive Interrupt Moderation
237 ** which means that the interrupt rate
238 ** is varied over time based on the
239 ** traffic for that interrupt vector
240 */
241 static int ixgbe_enable_aim = TRUE;
242 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
243
244 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
245 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
246
247 /* How many packets rxeof tries to clean at a time */
248 static int ixgbe_rx_process_limit = 128;
249 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
250
251 /*
252 ** Smart speed setting, default to on
253 ** this only works as a compile option
254 ** right now as its during attach, set
255 ** this to 'ixgbe_smart_speed_off' to
256 ** disable.
257 */
258 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
259
260 static int ixgbe_msi_enable = 1;
261 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
262
263 /*
264  * MSIX should be the default for best performance,
265  * but this allows it to be forced off for testing.
266  */
267 static int ixgbe_enable_msix = 1;
268 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
269
270 /*
271  * Header split: this causes the hardware to DMA
272  * the header into a separate mbuf from the payload,
273  * it can be a performance win in some workloads, but
274  * in others it actually hurts, its off by default. 
275  */
276 static int ixgbe_header_split = FALSE;
277 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
278
279 /*
280  * Number of Queues, can be set to 0,
281  * it then autoconfigures based on the
282  * number of cpus with a max of 8. This
283  * can be overriden manually here.
284  */
285 static int ixgbe_num_queues = 0;
286 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
287
288 /*
289 ** Number of TX descriptors per ring,
290 ** setting higher than RX as this seems
291 ** the better performing choice.
292 */
293 static int ixgbe_txd = PERFORM_TXD;
294 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
295
296 /* Number of RX descriptors per ring */
297 static int ixgbe_rxd = PERFORM_RXD;
298 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
299
300 /* Keep running tab on them for sanity check */
301 static int ixgbe_total_ports;
302
303 #ifdef IXGBE_FDIR
304 /*
305 ** For Flow Director: this is the
306 ** number of TX packets we sample
307 ** for the filter pool, this means
308 ** every 20th packet will be probed.
309 **
310 ** This feature can be disabled by 
311 ** setting this to 0.
312 */
313 static int atr_sample_rate = 20;
314 /* 
315 ** Flow Director actually 'steals'
316 ** part of the packet buffer as its
317 ** filter pool, this variable controls
318 ** how much it uses:
319 **  0 = 64K, 1 = 128K, 2 = 256K
320 */
321 static int fdir_pballoc = 1;
322 #endif
323
324 #ifdef DEV_NETMAP
325 /*
326  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
327  * be a reference on how to implement netmap support in a driver.
328  * Additional comments are in ixgbe_netmap.h .
329  *
330  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
331  * that extend the standard driver.
332  */
333 #include <dev/netmap/ixgbe_netmap.h>
334 #endif /* DEV_NETMAP */
335
336 /*********************************************************************
337  *  Device identification routine
338  *
339  *  ixgbe_probe determines if the driver should be loaded on
340  *  adapter based on PCI vendor/device id of the adapter.
341  *
342  *  return BUS_PROBE_DEFAULT on success, positive on failure
343  *********************************************************************/
344
345 static int
346 ixgbe_probe(device_t dev)
347 {
348         ixgbe_vendor_info_t *ent;
349
350         u16     pci_vendor_id = 0;
351         u16     pci_device_id = 0;
352         u16     pci_subvendor_id = 0;
353         u16     pci_subdevice_id = 0;
354         char    adapter_name[256];
355
356         INIT_DEBUGOUT("ixgbe_probe: begin");
357
358         pci_vendor_id = pci_get_vendor(dev);
359         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
360                 return (ENXIO);
361
362         pci_device_id = pci_get_device(dev);
363         pci_subvendor_id = pci_get_subvendor(dev);
364         pci_subdevice_id = pci_get_subdevice(dev);
365
366         ent = ixgbe_vendor_info_array;
367         while (ent->vendor_id != 0) {
368                 if ((pci_vendor_id == ent->vendor_id) &&
369                     (pci_device_id == ent->device_id) &&
370
371                     ((pci_subvendor_id == ent->subvendor_id) ||
372                      (ent->subvendor_id == 0)) &&
373
374                     ((pci_subdevice_id == ent->subdevice_id) ||
375                      (ent->subdevice_id == 0))) {
376                         ksprintf(adapter_name, "%s, Version - %s",
377                                 ixgbe_strings[ent->index],
378                                 ixgbe_driver_version);
379                         device_set_desc_copy(dev, adapter_name);
380                         ++ixgbe_total_ports;
381                         return (BUS_PROBE_DEFAULT);
382                 }
383                 ent++;
384         }
385         return (ENXIO);
386 }
387
388 /*********************************************************************
389  *  Device initialization routine
390  *
391  *  The attach entry point is called when the driver is being loaded.
392  *  This routine identifies the type of hardware, allocates all resources
393  *  and initializes the hardware.
394  *
395  *  return 0 on success, positive on failure
396  *********************************************************************/
397
398 static int
399 ixgbe_attach(device_t dev)
400 {
401         struct adapter *adapter;
402         struct ixgbe_hw *hw;
403         int             error = 0;
404         u16             csum;
405         u32             ctrl_ext;
406
407         INIT_DEBUGOUT("ixgbe_attach: begin");
408
409         if (resource_disabled("ixgbe", device_get_unit(dev))) {
410                 device_printf(dev, "Disabled by device hint\n");
411                 return (ENXIO);
412         }
413
414         /* Allocate, clear, and link in our adapter structure */
415         adapter = device_get_softc(dev);
416         adapter->dev = adapter->osdep.dev = dev;
417         hw = &adapter->hw;
418
419         /* Core Lock Init*/
420         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
421         spin_init(&adapter->mcast_spin);
422
423         /* SYSCTL APIs */
424
425         sysctl_ctx_init(&adapter->sysctl_ctx);
426         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
427             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
428             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
429         if (adapter->sysctl_tree == NULL) {
430                 device_printf(adapter->dev, "can't add sysctl node\n");
431                 return (EINVAL);
432         }
433         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
434                         SYSCTL_CHILDREN(adapter->sysctl_tree),
435                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
436                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
437
438         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
439                         SYSCTL_CHILDREN(adapter->sysctl_tree),
440                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
441                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
442
443         /*
444         ** Allow a kind of speed control by forcing the autoneg
445         ** advertised speed list to only a certain value, this
446         ** supports 1G on 82599 devices, and 100Mb on x540.
447         */
448         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
449                         SYSCTL_CHILDREN(adapter->sysctl_tree),
450                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
451                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
452
453         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
454                         SYSCTL_CHILDREN(adapter->sysctl_tree),
455                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
456                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
457
458         /* Set up the timer callout */
459         /* XXX: shouldn't this be a spin lock ? */
460         lockinit(&adapter->core_lock, "ixgbe core lock", 0, LK_CANRECURSE);
461         callout_init(&adapter->timer);
462
463         /* Determine hardware revision */
464         ixgbe_identify_hardware(adapter);
465
466         /* Do base PCI setup - map BAR0 */
467         if (ixgbe_allocate_pci_resources(adapter)) {
468                 device_printf(dev, "Allocation of PCI resources failed\n");
469                 error = ENXIO;
470                 goto err_out;
471         }
472
473         /* Do descriptor calc and sanity checks */
474         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
475             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
476                 device_printf(dev, "TXD config issue, using default!\n");
477                 adapter->num_tx_desc = DEFAULT_TXD;
478         } else
479                 adapter->num_tx_desc = ixgbe_txd;
480
481         /*
482         ** With many RX rings it is easy to exceed the
483         ** system mbuf allocation. Tuning nmbclusters
484         ** can alleviate this.
485         */
486         if (nmbclusters > 0 ) {
487                 int s;
488                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
489                 if (s > nmbclusters) {
490                         device_printf(dev, "RX Descriptors exceed "
491                             "system mbuf max, using default instead!\n");
492                         ixgbe_rxd = DEFAULT_RXD;
493                 }
494         }
495
496         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
497             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
498                 device_printf(dev, "RXD config issue, using default!\n");
499                 adapter->num_rx_desc = DEFAULT_RXD;
500         } else
501                 adapter->num_rx_desc = ixgbe_rxd;
502
503         /* Allocate our TX/RX Queues */
504         if (ixgbe_allocate_queues(adapter)) {
505                 error = ENOMEM;
506                 goto err_out;
507         }
508
509         /* Allocate multicast array memory. */
510         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
511             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
512         if (adapter->mta == NULL) {
513                 device_printf(dev, "Can not allocate multicast setup array\n");
514                 error = ENOMEM;
515                 goto err_late;
516         }
517
518         /* Initialize the shared code */
519         error = ixgbe_init_shared_code(hw);
520         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
521                 /*
522                 ** No optics in this port, set up
523                 ** so the timer routine will probe 
524                 ** for later insertion.
525                 */
526                 adapter->sfp_probe = TRUE;
527                 error = 0;
528         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
529                 device_printf(dev,"Unsupported SFP+ module detected!\n");
530                 error = EIO;
531                 goto err_late;
532         } else if (error) {
533                 device_printf(dev,"Unable to initialize the shared code\n");
534                 error = EIO;
535                 goto err_late;
536         }
537
538         /* Make sure we have a good EEPROM before we read from it */
539         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
540                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
541                 error = EIO;
542                 goto err_late;
543         }
544
545         error = ixgbe_init_hw(hw);
546         switch (error) {
547         case IXGBE_ERR_EEPROM_VERSION:
548                 device_printf(dev, "This device is a pre-production adapter/"
549                     "LOM.  Please be aware there may be issues associated "
550                     "with your hardware.\n If you are experiencing problems "
551                     "please contact your Intel or hardware representative "
552                     "who provided you with this hardware.\n");
553                 break;
554         case IXGBE_ERR_SFP_NOT_SUPPORTED:
555                 device_printf(dev,"Unsupported SFP+ Module\n");
556                 error = EIO;
557                 device_printf(dev,"Hardware Initialization Failure\n");
558                 goto err_late;
559         case IXGBE_ERR_SFP_NOT_PRESENT:
560                 device_printf(dev,"No SFP+ Module found\n");
561                 /* falls thru */
562         default:
563                 break;
564         }
565
566         /* Detect and set physical type */
567         ixgbe_setup_optics(adapter);
568
569         if ((adapter->msix > 1) && (ixgbe_enable_msix))
570                 error = ixgbe_allocate_msix(adapter); 
571         else
572                 error = ixgbe_allocate_legacy(adapter); 
573         if (error) 
574                 goto err_late;
575
576         /* Setup OS specific network interface */
577         if (ixgbe_setup_interface(dev, adapter) != 0)
578                 goto err_late;
579
580         /* Sysctl for limiting the amount of work done in the taskqueue */
581         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
582             "max number of rx packets to process", &adapter->rx_process_limit,
583             ixgbe_rx_process_limit);
584
585         /* Initialize statistics */
586         ixgbe_update_stats_counters(adapter);
587
588         /* Register for VLAN events */
589         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
590             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
591         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
592             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
593
594         /* Print PCIE bus type/speed/width info */
595         ixgbe_get_bus_info(hw);
596         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
597             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
598             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
599             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
600             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
601             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
602             ("Unknown"));
603
604         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
605             (hw->bus.speed == ixgbe_bus_speed_2500)) {
606                 device_printf(dev, "PCI-Express bandwidth available"
607                     " for this card\n     is not sufficient for"
608                     " optimal performance.\n");
609                 device_printf(dev, "For optimal performance a x8 "
610                     "PCIE, or x4 PCIE 2 slot is required.\n");
611         }
612
613         /* let hardware know driver is loaded */
614         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
615         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
616         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
617
618         ixgbe_add_hw_stats(adapter);
619
620 #ifdef DEV_NETMAP
621         ixgbe_netmap_attach(adapter);
622 #endif /* DEV_NETMAP */
623         INIT_DEBUGOUT("ixgbe_attach: end");
624         return (0);
625 err_late:
626         ixgbe_free_transmit_structures(adapter);
627         ixgbe_free_receive_structures(adapter);
628 err_out:
629         if (adapter->ifp != NULL)
630                 if_free(adapter->ifp);
631         ixgbe_free_pci_resources(adapter);
632         kfree(adapter->mta, M_DEVBUF);
633         return (error);
634
635 }
636
637 /*********************************************************************
638  *  Device removal routine
639  *
640  *  The detach entry point is called when the driver is being removed.
641  *  This routine stops the adapter and deallocates all the resources
642  *  that were allocated for driver operation.
643  *
644  *  return 0 on success, positive on failure
645  *********************************************************************/
646
647 static int
648 ixgbe_detach(device_t dev)
649 {
650         struct adapter *adapter = device_get_softc(dev);
651         struct ix_queue *que = adapter->queues;
652         u32     ctrl_ext;
653
654         INIT_DEBUGOUT("ixgbe_detach: begin");
655
656 #ifdef NET_VLAN
657         /* Make sure VLANS are not using driver */
658         if (adapter->ifp->if_vlantrunk != NULL) {
659                 device_printf(dev,"Vlan in use, detach first\n");
660                 return (EBUSY);
661         }
662 #endif
663
664         IXGBE_CORE_LOCK(adapter);
665         ixgbe_stop(adapter);
666         IXGBE_CORE_UNLOCK(adapter);
667
668         for (int i = 0; i < adapter->num_queues; i++, que++) {
669                 if (que->tq) {
670                         taskqueue_drain(que->tq, &que->que_task);
671                         taskqueue_free(que->tq);
672                 }
673         }
674
675         /* Drain the Link queue */
676         if (adapter->tq) {
677                 taskqueue_drain(adapter->tq, &adapter->link_task);
678                 taskqueue_drain(adapter->tq, &adapter->mod_task);
679                 taskqueue_drain(adapter->tq, &adapter->msf_task);
680 #ifdef IXGBE_FDIR
681                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
682 #endif
683                 taskqueue_free(adapter->tq);
684         }
685
686         /* let hardware know driver is unloading */
687         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
688         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
689         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
690
691         /* Unregister VLAN events */
692 #ifdef NET_VLAN
693         if (adapter->vlan_attach != NULL)
694                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
695         if (adapter->vlan_detach != NULL)
696                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
697 #endif
698
699         ether_ifdetach(adapter->ifp);
700         callout_stop(&adapter->timer);
701         lockuninit(&adapter->core_lock);
702 #ifdef DEV_NETMAP
703         netmap_detach(adapter->ifp);
704 #endif /* DEV_NETMAP */
705         ixgbe_free_pci_resources(adapter);
706         bus_generic_detach(dev);
707         if_free(adapter->ifp);
708
709         ixgbe_free_transmit_structures(adapter);
710         ixgbe_free_receive_structures(adapter);
711         kfree(adapter->mta, M_DEVBUF);
712         sysctl_ctx_free(&adapter->sysctl_ctx);
713         
714         spin_uninit(&adapter->mcast_spin);
715         IXGBE_CORE_LOCK_DESTROY(adapter);
716         return (0);
717 }
718
719 /*********************************************************************
720  *
721  *  Shutdown entry point
722  *
723  **********************************************************************/
724
725 static int
726 ixgbe_shutdown(device_t dev)
727 {
728         struct adapter *adapter = device_get_softc(dev);
729         IXGBE_CORE_LOCK(adapter);
730         ixgbe_stop(adapter);
731         IXGBE_CORE_UNLOCK(adapter);
732         return (0);
733 }
734
735
736 /*********************************************************************
737  *  Transmit entry point
738  *
739  *  ixgbe_start is called by the stack to initiate a transmit.
740  *  The driver will remain in this routine as long as there are
741  *  packets to transmit and transmit resources are available.
742  *  In case resources are not available stack is notified and
743  *  the packet is requeued.
744  **********************************************************************/
745
746 static void
747 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
748 {
749         struct mbuf    *m_head;
750         struct adapter *adapter = txr->adapter;
751
752         IXGBE_TX_LOCK_ASSERT(txr);
753
754         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
755                 return;
756         if (!adapter->link_active)
757                 return;
758
759         while (!ifq_is_empty(&ifp->if_snd)) {
760                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
761                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
762                         break;
763                 }
764
765                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
766                 if (m_head == NULL)
767                         break;
768
769                 if (ixgbe_xmit(txr, &m_head)) {
770 #if 0 /* XXX: prepend to an ALTQ queue ? */
771                         if (m_head != NULL)
772                                 IF_PREPEND(&ifp->if_snd, m_head);
773 #endif
774                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
775                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
776                         break;
777                 }
778                 /* Send a copy of the frame to the BPF listener */
779                 ETHER_BPF_MTAP(ifp, m_head);
780
781                 /* Set watchdog on */
782                 txr->watchdog_time = ticks;
783                 txr->queue_status = IXGBE_QUEUE_WORKING;
784
785         }
786         return;
787 }
788
789 /*
790  * Legacy TX start - called by the stack, this
791  * always uses the first tx ring, and should
792  * not be used with multiqueue tx enabled.
793  */
794 static void
795 ixgbe_start(struct ifnet *ifp)
796 {
797         struct adapter *adapter = ifp->if_softc;
798         struct tx_ring  *txr = adapter->tx_rings;
799
800         if (ifp->if_flags & IFF_RUNNING) {
801                 IXGBE_TX_LOCK(txr);
802                 ixgbe_start_locked(txr, ifp);
803                 IXGBE_TX_UNLOCK(txr);
804         }
805         return;
806 }
807
808 #if 0 /* __FreeBSD_version >= 800000 */
809 /*
810 ** Multiqueue Transmit driver
811 **
812 */
813 static int
814 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
815 {
816         struct adapter  *adapter = ifp->if_softc;
817         struct ix_queue *que;
818         struct tx_ring  *txr;
819         int             i = 0, err = 0;
820
821         /* Which queue to use */
822         if ((m->m_flags & M_FLOWID) != 0)
823                 i = m->m_pkthdr.flowid % adapter->num_queues;
824         else
825                 i = curcpu % adapter->num_queues;
826
827         txr = &adapter->tx_rings[i];
828         que = &adapter->queues[i];
829
830         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
831             IXGBE_TX_TRYLOCK(txr)) {
832                 err = ixgbe_mq_start_locked(ifp, txr, m);
833                 IXGBE_TX_UNLOCK(txr);
834         } else {
835                 err = drbr_enqueue(ifp, txr->br, m);
836                 taskqueue_enqueue(que->tq, &que->que_task);
837         }
838
839         return (err);
840 }
841
842 static int
843 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
844 {
845         struct adapter  *adapter = txr->adapter;
846         struct mbuf     *next;
847         int             enqueued, err = 0;
848
849         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
850             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
851             adapter->link_active == 0) {
852                 if (m != NULL)
853                         err = drbr_enqueue(ifp, txr->br, m);
854                 return (err);
855         }
856
857         enqueued = 0;
858         if (m == NULL) {
859                 next = drbr_dequeue(ifp, txr->br);
860         } else if (drbr_needs_enqueue(ifp, txr->br)) {
861                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
862                         return (err);
863                 next = drbr_dequeue(ifp, txr->br);
864         } else
865                 next = m;
866
867         /* Process the queue */
868         while (next != NULL) {
869                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
870                         if (next != NULL)
871                                 err = drbr_enqueue(ifp, txr->br, next);
872                         break;
873                 }
874                 enqueued++;
875                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
876                 /* Send a copy of the frame to the BPF listener */
877                 ETHER_BPF_MTAP(ifp, next);
878                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
879                         break;
880                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
881                         ixgbe_txeof(txr);
882                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
883                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
884                         break;
885                 }
886                 next = drbr_dequeue(ifp, txr->br);
887         }
888
889         if (enqueued > 0) {
890                 /* Set watchdog on */
891                 txr->queue_status |= IXGBE_QUEUE_WORKING;
892                 txr->watchdog_time = ticks;
893         }
894
895         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
896                 ixgbe_txeof(txr);
897
898         return (err);
899 }
900
901 /*
902 ** Flush all ring buffers
903 */
904 static void
905 ixgbe_qflush(struct ifnet *ifp)
906 {
907         struct adapter  *adapter = ifp->if_softc;
908         struct tx_ring  *txr = adapter->tx_rings;
909         struct mbuf     *m;
910
911         for (int i = 0; i < adapter->num_queues; i++, txr++) {
912                 IXGBE_TX_LOCK(txr);
913                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914                         m_freem(m);
915                 IXGBE_TX_UNLOCK(txr);
916         }
917         if_qflush(ifp);
918 }
919 #endif /* __FreeBSD_version >= 800000 */
920
921 /*********************************************************************
922  *  Ioctl entry point
923  *
924  *  ixgbe_ioctl is called when the user wants to configure the
925  *  interface.
926  *
927  *  return 0 on success, positive on failure
928  **********************************************************************/
929
930 static int
931 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
932 {
933         struct adapter  *adapter = ifp->if_softc;
934         struct ifreq    *ifr = (struct ifreq *) data;
935 #if defined(INET) || defined(INET6)
936         struct ifaddr *ifa = (struct ifaddr *)data;
937         bool            avoid_reset = FALSE;
938 #endif
939         int             error = 0;
940
941         switch (command) {
942
943         case SIOCSIFADDR:
944 #ifdef INET
945                 if (ifa->ifa_addr->sa_family == AF_INET)
946                         avoid_reset = TRUE;
947 #endif
948 #ifdef INET6
949                 if (ifa->ifa_addr->sa_family == AF_INET6)
950                         avoid_reset = TRUE;
951 #endif
952 #if defined(INET) || defined(INET6)
953                 /*
954                 ** Calling init results in link renegotiation,
955                 ** so we avoid doing it when possible.
956                 */
957                 if (avoid_reset) {
958                         ifp->if_flags |= IFF_UP;
959                         if (!(ifp->if_flags & IFF_RUNNING))
960                                 ixgbe_init(adapter);
961                         if (!(ifp->if_flags & IFF_NOARP))
962                                 arp_ifinit(ifp, ifa);
963                 } else
964                         error = ether_ioctl(ifp, command, data);
965 #endif
966                 break;
967         case SIOCSIFMTU:
968                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
969                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
970                         error = EINVAL;
971                 } else {
972                         IXGBE_CORE_LOCK(adapter);
973                         ifp->if_mtu = ifr->ifr_mtu;
974                         adapter->max_frame_size =
975                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
976                         ixgbe_init_locked(adapter);
977                         IXGBE_CORE_UNLOCK(adapter);
978                 }
979                 break;
980         case SIOCSIFFLAGS:
981                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
982                 IXGBE_CORE_LOCK(adapter);
983                 if (ifp->if_flags & IFF_UP) {
984                         if ((ifp->if_flags & IFF_RUNNING)) {
985                                 if ((ifp->if_flags ^ adapter->if_flags) &
986                                     (IFF_PROMISC | IFF_ALLMULTI)) {
987                                         ixgbe_set_promisc(adapter);
988                                 }
989                         } else
990                                 ixgbe_init_locked(adapter);
991                 } else
992                         if (ifp->if_flags & IFF_RUNNING)
993                                 ixgbe_stop(adapter);
994                 adapter->if_flags = ifp->if_flags;
995                 IXGBE_CORE_UNLOCK(adapter);
996                 break;
997         case SIOCADDMULTI:
998         case SIOCDELMULTI:
999                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1000                 if (ifp->if_flags & IFF_RUNNING) {
1001                         IXGBE_CORE_LOCK(adapter);
1002                         ixgbe_disable_intr(adapter);
1003                         ixgbe_set_multi(adapter);
1004                         ixgbe_enable_intr(adapter);
1005                         IXGBE_CORE_UNLOCK(adapter);
1006                 }
1007                 break;
1008         case SIOCSIFMEDIA:
1009         case SIOCGIFMEDIA:
1010                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1011                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1012                 break;
1013         case SIOCSIFCAP:
1014         {
1015                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1016                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1017                 if (mask & IFCAP_HWCSUM)
1018                         ifp->if_capenable ^= IFCAP_HWCSUM;
1019 #if 0 /* NET_TSO */
1020                 if (mask & IFCAP_TSO4)
1021                         ifp->if_capenable ^= IFCAP_TSO4;
1022                 if (mask & IFCAP_TSO6)
1023                         ifp->if_capenable ^= IFCAP_TSO6;
1024 #endif
1025 #if 0 /* NET_LRO */
1026                 if (mask & IFCAP_LRO)
1027                         ifp->if_capenable ^= IFCAP_LRO;
1028 #endif
1029                 if (mask & IFCAP_VLAN_HWTAGGING)
1030                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1031                 if (mask & IFCAP_VLAN_HWFILTER)
1032                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1033 #if 0 /* NET_TSO */
1034                 if (mask & IFCAP_VLAN_HWTSO)
1035                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1036 #endif
1037                 if (ifp->if_flags & IFF_RUNNING) {
1038                         IXGBE_CORE_LOCK(adapter);
1039                         ixgbe_init_locked(adapter);
1040                         IXGBE_CORE_UNLOCK(adapter);
1041                 }
1042 #if 0
1043                 VLAN_CAPABILITIES(ifp);
1044 #endif
1045                 break;
1046         }
1047
1048         default:
1049                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1050                 error = ether_ioctl(ifp, command, data);
1051                 break;
1052         }
1053
1054         return (error);
1055 }
1056
1057 /*********************************************************************
1058  *  Init entry point
1059  *
1060  *  This routine is used in two ways. It is used by the stack as
1061  *  init entry point in network interface structure. It is also used
1062  *  by the driver as a hw/sw initialization routine to get to a
1063  *  consistent state.
1064  *
1065  *  return 0 on success, positive on failure
1066  **********************************************************************/
1067 #define IXGBE_MHADD_MFS_SHIFT 16
1068
1069 static void
1070 ixgbe_init_locked(struct adapter *adapter)
1071 {
1072         struct ifnet   *ifp = adapter->ifp;
1073         device_t        dev = adapter->dev;
1074         struct ixgbe_hw *hw = &adapter->hw;
1075         u32             k, txdctl, mhadd, gpie;
1076         u32             rxdctl, rxctrl;
1077
1078         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1079         INIT_DEBUGOUT("ixgbe_init: begin");
1080         hw->adapter_stopped = FALSE;
1081         ixgbe_stop_adapter(hw);
1082         callout_stop(&adapter->timer);
1083
1084         /* reprogram the RAR[0] in case user changed it. */
1085         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1086
1087         /* Get the latest mac address, User can use a LAA */
1088         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1089               IXGBE_ETH_LENGTH_OF_ADDRESS);
1090         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1091         hw->addr_ctrl.rar_used_count = 1;
1092
1093         /* Set the various hardware offload abilities */
1094         ifp->if_hwassist = 0;
1095 #if 0 /* NET_TSO */
1096         if (ifp->if_capenable & IFCAP_TSO)
1097                 ifp->if_hwassist |= CSUM_TSO;
1098 #endif
1099         if (ifp->if_capenable & IFCAP_TXCSUM) {
1100                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1101 #if 0
1102                 if (hw->mac.type != ixgbe_mac_82598EB)
1103                         ifp->if_hwassist |= CSUM_SCTP;
1104 #endif
1105         }
1106
1107         /* Prepare transmit descriptors and buffers */
1108         if (ixgbe_setup_transmit_structures(adapter)) {
1109                 device_printf(dev,"Could not setup transmit structures\n");
1110                 ixgbe_stop(adapter);
1111                 return;
1112         }
1113
1114         ixgbe_init_hw(hw);
1115         ixgbe_initialize_transmit_units(adapter);
1116
1117         /* Setup Multicast table */
1118         ixgbe_set_multi(adapter);
1119
1120         /*
1121         ** Determine the correct mbuf pool
1122         ** for doing jumbo/headersplit
1123         */
1124         if (adapter->max_frame_size <= 2048)
1125                 adapter->rx_mbuf_sz = MCLBYTES;
1126         else if (adapter->max_frame_size <= 4096)
1127                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1128         else if (adapter->max_frame_size <= 9216)
1129                 adapter->rx_mbuf_sz = MJUM9BYTES;
1130         else
1131                 adapter->rx_mbuf_sz = MJUM16BYTES;
1132
1133         /* Prepare receive descriptors and buffers */
1134         if (ixgbe_setup_receive_structures(adapter)) {
1135                 device_printf(dev,"Could not setup receive structures\n");
1136                 ixgbe_stop(adapter);
1137                 return;
1138         }
1139
1140         /* Configure RX settings */
1141         ixgbe_initialize_receive_units(adapter);
1142
1143         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1144
1145         /* Enable Fan Failure Interrupt */
1146         gpie |= IXGBE_SDP1_GPIEN;
1147
1148         /* Add for Module detection */
1149         if (hw->mac.type == ixgbe_mac_82599EB)
1150                 gpie |= IXGBE_SDP2_GPIEN;
1151
1152         /* Thermal Failure Detection */
1153         if (hw->mac.type == ixgbe_mac_X540)
1154                 gpie |= IXGBE_SDP0_GPIEN;
1155
1156         if (adapter->msix > 1) {
1157                 /* Enable Enhanced MSIX mode */
1158                 gpie |= IXGBE_GPIE_MSIX_MODE;
1159                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1160                     IXGBE_GPIE_OCD;
1161         }
1162         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1163
1164         /* Set MTU size */
1165         if (ifp->if_mtu > ETHERMTU) {
1166                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1167                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1168                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1169                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1170         }
1171         
1172         /* Now enable all the queues */
1173
1174         for (int i = 0; i < adapter->num_queues; i++) {
1175                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1176                 txdctl |= IXGBE_TXDCTL_ENABLE;
1177                 /* Set WTHRESH to 8, burst writeback */
1178                 txdctl |= (8 << 16);
1179                 /*
1180                  * When the internal queue falls below PTHRESH (32),
1181                  * start prefetching as long as there are at least
1182                  * HTHRESH (1) buffers ready. The values are taken
1183                  * from the Intel linux driver 3.8.21.
1184                  * Prefetching enables tx line rate even with 1 queue.
1185                  */
1186                 txdctl |= (32 << 0) | (1 << 8);
1187                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1188         }
1189
1190         for (int i = 0; i < adapter->num_queues; i++) {
1191                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1192                 if (hw->mac.type == ixgbe_mac_82598EB) {
1193                         /*
1194                         ** PTHRESH = 21
1195                         ** HTHRESH = 4
1196                         ** WTHRESH = 8
1197                         */
1198                         rxdctl &= ~0x3FFFFF;
1199                         rxdctl |= 0x080420;
1200                 }
1201                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1202                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1203                 for (k = 0; k < 10; k++) {
1204                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1205                             IXGBE_RXDCTL_ENABLE)
1206                                 break;
1207                         else
1208                                 msec_delay(1);
1209                 }
1210                 wmb();
1211 #ifdef DEV_NETMAP
1212                 /*
1213                  * In netmap mode, we must preserve the buffers made
1214                  * available to userspace before the if_init()
1215                  * (this is true by default on the TX side, because
1216                  * init makes all buffers available to userspace).
1217                  *
1218                  * netmap_reset() and the device specific routines
1219                  * (e.g. ixgbe_setup_receive_rings()) map these
1220                  * buffers at the end of the NIC ring, so here we
1221                  * must set the RDT (tail) register to make sure
1222                  * they are not overwritten.
1223                  *
1224                  * In this driver the NIC ring starts at RDH = 0,
1225                  * RDT points to the last slot available for reception (?),
1226                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1227                  */
1228                 if (ifp->if_capenable & IFCAP_NETMAP) {
1229                         struct netmap_adapter *na = NA(adapter->ifp);
1230                         struct netmap_kring *kring = &na->rx_rings[i];
1231                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1232
1233                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1234                 } else
1235 #endif /* DEV_NETMAP */
1236                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1237         }
1238
1239         /* Set up VLAN support and filter */
1240         ixgbe_setup_vlan_hw_support(adapter);
1241
1242         /* Enable Receive engine */
1243         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1244         if (hw->mac.type == ixgbe_mac_82598EB)
1245                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1246         rxctrl |= IXGBE_RXCTRL_RXEN;
1247         ixgbe_enable_rx_dma(hw, rxctrl);
1248
1249         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1250
1251         /* Set up MSI/X routing */
1252         if (ixgbe_enable_msix)  {
1253                 ixgbe_configure_ivars(adapter);
1254                 /* Set up auto-mask */
1255                 if (hw->mac.type == ixgbe_mac_82598EB)
1256                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1257                 else {
1258                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1259                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1260                 }
1261         } else {  /* Simple settings for Legacy/MSI */
1262                 ixgbe_set_ivar(adapter, 0, 0, 0);
1263                 ixgbe_set_ivar(adapter, 0, 0, 1);
1264                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1265         }
1266
1267 #ifdef IXGBE_FDIR
1268         /* Init Flow director */
1269         if (hw->mac.type != ixgbe_mac_82598EB) {
1270                 u32 hdrm = 32 << fdir_pballoc;
1271
1272                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1273                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1274         }
1275 #endif
1276
1277         /*
1278         ** Check on any SFP devices that
1279         ** need to be kick-started
1280         */
1281         if (hw->phy.type == ixgbe_phy_none) {
1282                 int err = hw->phy.ops.identify(hw);
1283                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1284                         device_printf(dev,
1285                             "Unsupported SFP+ module type was detected.\n");
1286                         return;
1287                 }
1288         }
1289
1290         /* Set moderation on the Link interrupt */
1291         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1292
1293         /* Config/Enable Link */
1294         ixgbe_config_link(adapter);
1295
1296         /* Hardware Packet Buffer & Flow Control setup */
1297         {
1298                 u32 rxpb, frame, size, tmp;
1299
1300                 frame = adapter->max_frame_size;
1301
1302                 /* Calculate High Water */
1303                 if (hw->mac.type == ixgbe_mac_X540)
1304                         tmp = IXGBE_DV_X540(frame, frame);
1305                 else
1306                         tmp = IXGBE_DV(frame, frame);
1307                 size = IXGBE_BT2KB(tmp);
1308                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1309                 hw->fc.high_water[0] = rxpb - size;
1310
1311                 /* Now calculate Low Water */
1312                 if (hw->mac.type == ixgbe_mac_X540)
1313                         tmp = IXGBE_LOW_DV_X540(frame);
1314                 else
1315                         tmp = IXGBE_LOW_DV(frame);
1316                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1317                 
1318                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1319                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1320                 hw->fc.send_xon = TRUE;
1321         }
1322         /* Initialize the FC settings */
1323         ixgbe_start_hw(hw);
1324
1325         /* And now turn on interrupts */
1326         ixgbe_enable_intr(adapter);
1327
1328         /* Now inform the stack we're ready */
1329         ifp->if_flags |= IFF_RUNNING;
1330         ifp->if_flags &= ~IFF_OACTIVE;
1331
1332         return;
1333 }
1334
1335 static void
1336 ixgbe_init(void *arg)
1337 {
1338         struct adapter *adapter = arg;
1339
1340         IXGBE_CORE_LOCK(adapter);
1341         ixgbe_init_locked(adapter);
1342         IXGBE_CORE_UNLOCK(adapter);
1343         return;
1344 }
1345
1346
1347 /*
1348 **
1349 ** MSIX Interrupt Handlers and Tasklets
1350 **
1351 */
1352
1353 static inline void
1354 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1355 {
1356         struct ixgbe_hw *hw = &adapter->hw;
1357         u64     queue = (u64)(1 << vector);
1358         u32     mask;
1359
1360         if (hw->mac.type == ixgbe_mac_82598EB) {
1361                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1362                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1363         } else {
1364                 mask = (queue & 0xFFFFFFFF);
1365                 if (mask)
1366                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1367                 mask = (queue >> 32);
1368                 if (mask)
1369                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1370         }
1371 }
1372
1373 static inline void
1374 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1375 {
1376         struct ixgbe_hw *hw = &adapter->hw;
1377         u64     queue = (u64)(1 << vector);
1378         u32     mask;
1379
1380         if (hw->mac.type == ixgbe_mac_82598EB) {
1381                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1382                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1383         } else {
1384                 mask = (queue & 0xFFFFFFFF);
1385                 if (mask)
1386                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1387                 mask = (queue >> 32);
1388                 if (mask)
1389                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1390         }
1391 }
1392
1393 static inline void
1394 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1395 {
1396         u32 mask;
1397
1398         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1399                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1400                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1401         } else {
1402                 mask = (queues & 0xFFFFFFFF);
1403                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1404                 mask = (queues >> 32);
1405                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1406         }
1407 }
1408
1409
1410 static void
1411 ixgbe_handle_que(void *context, int pending)
1412 {
1413         struct ix_queue *que = context;
1414         struct adapter  *adapter = que->adapter;
1415         struct tx_ring  *txr = que->txr;
1416         struct ifnet    *ifp = adapter->ifp;
1417         bool            more;
1418
1419         if (ifp->if_flags & IFF_RUNNING) {
1420                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1421                 IXGBE_TX_LOCK(txr);
1422                 ixgbe_txeof(txr);
1423 #if 0 /*__FreeBSD_version >= 800000*/
1424                 if (!drbr_empty(ifp, txr->br))
1425                         ixgbe_mq_start_locked(ifp, txr, NULL);
1426 #else
1427                 if (!ifq_is_empty(&ifp->if_snd))
1428                         ixgbe_start_locked(txr, ifp);
1429 #endif
1430                 IXGBE_TX_UNLOCK(txr);
1431                 if (more) {
1432                         taskqueue_enqueue(que->tq, &que->que_task);
1433                         return;
1434                 }
1435         }
1436
1437         /* Reenable this interrupt */
1438         ixgbe_enable_queue(adapter, que->msix);
1439         return;
1440 }
1441
1442
1443 /*********************************************************************
1444  *
1445  *  Legacy Interrupt Service routine
1446  *
1447  **********************************************************************/
1448
1449 static void
1450 ixgbe_legacy_irq(void *arg)
1451 {
1452         struct ix_queue *que = arg;
1453         struct adapter  *adapter = que->adapter;
1454         struct ixgbe_hw *hw = &adapter->hw;
1455         struct          tx_ring *txr = adapter->tx_rings;
1456         bool            more_tx, more_rx;
1457         u32             reg_eicr, loop = MAX_LOOP;
1458
1459
1460         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1461
1462         ++que->irqs;
1463         if (reg_eicr == 0) {
1464                 ixgbe_enable_intr(adapter);
1465                 return;
1466         }
1467
1468         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1469
1470         IXGBE_TX_LOCK(txr);
1471         do {
1472                 more_tx = ixgbe_txeof(txr);
1473         } while (loop-- && more_tx);
1474         IXGBE_TX_UNLOCK(txr);
1475
1476         if (more_rx || more_tx)
1477                 taskqueue_enqueue(que->tq, &que->que_task);
1478
1479         /* Check for fan failure */
1480         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1481             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1482                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1483                     "REPLACE IMMEDIATELY!!\n");
1484                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1485         }
1486
1487         /* Link status change */
1488         if (reg_eicr & IXGBE_EICR_LSC)
1489                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1490
1491         ixgbe_enable_intr(adapter);
1492         return;
1493 }
1494
1495
1496 /*********************************************************************
1497  *
1498  *  MSIX Queue Interrupt Service routine
1499  *
1500  **********************************************************************/
1501 void
1502 ixgbe_msix_que(void *arg)
1503 {
1504         struct ix_queue *que = arg;
1505         struct adapter  *adapter = que->adapter;
1506         struct tx_ring  *txr = que->txr;
1507         struct rx_ring  *rxr = que->rxr;
1508         bool            more_tx, more_rx;
1509         u32             newitr = 0;
1510
1511         ixgbe_disable_queue(adapter, que->msix);
1512         ++que->irqs;
1513
1514         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1515
1516         IXGBE_TX_LOCK(txr);
1517         more_tx = ixgbe_txeof(txr);
1518         /*
1519         ** Make certain that if the stack 
1520         ** has anything queued the task gets
1521         ** scheduled to handle it.
1522         */
1523 #if 0
1524 #if __FreeBSD_version < 800000
1525         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1526 #else
1527         if (!drbr_empty(adapter->ifp, txr->br))
1528 #endif
1529 #endif
1530         if (!ifq_is_empty(&adapter->ifp->if_snd))
1531                 more_tx = 1;
1532         IXGBE_TX_UNLOCK(txr);
1533
1534         /* Do AIM now? */
1535
1536         if (ixgbe_enable_aim == FALSE)
1537                 goto no_calc;
1538         /*
1539         ** Do Adaptive Interrupt Moderation:
1540         **  - Write out last calculated setting
1541         **  - Calculate based on average size over
1542         **    the last interval.
1543         */
1544         if (que->eitr_setting)
1545                 IXGBE_WRITE_REG(&adapter->hw,
1546                     IXGBE_EITR(que->msix), que->eitr_setting);
1547  
1548         que->eitr_setting = 0;
1549
1550         /* Idle, do nothing */
1551         if ((txr->bytes == 0) && (rxr->bytes == 0))
1552                 goto no_calc;
1553                                 
1554         if ((txr->bytes) && (txr->packets))
1555                 newitr = txr->bytes/txr->packets;
1556         if ((rxr->bytes) && (rxr->packets))
1557                 newitr = max(newitr,
1558                     (rxr->bytes / rxr->packets));
1559         newitr += 24; /* account for hardware frame, crc */
1560
1561         /* set an upper boundary */
1562         newitr = min(newitr, 3000);
1563
1564         /* Be nice to the mid range */
1565         if ((newitr > 300) && (newitr < 1200))
1566                 newitr = (newitr / 3);
1567         else
1568                 newitr = (newitr / 2);
1569
1570         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1571                 newitr |= newitr << 16;
1572         else
1573                 newitr |= IXGBE_EITR_CNT_WDIS;
1574                  
1575         /* save for next interrupt */
1576         que->eitr_setting = newitr;
1577
1578         /* Reset state */
1579         txr->bytes = 0;
1580         txr->packets = 0;
1581         rxr->bytes = 0;
1582         rxr->packets = 0;
1583
1584 no_calc:
1585         if (more_tx || more_rx)
1586                 taskqueue_enqueue(que->tq, &que->que_task);
1587         else /* Reenable this interrupt */
1588                 ixgbe_enable_queue(adapter, que->msix);
1589         return;
1590 }
1591
1592
1593 static void
1594 ixgbe_msix_link(void *arg)
1595 {
1596         struct adapter  *adapter = arg;
1597         struct ixgbe_hw *hw = &adapter->hw;
1598         u32             reg_eicr;
1599
1600         ++adapter->link_irq;
1601
1602         /* First get the cause */
1603         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1604         /* Clear interrupt with write */
1605         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1606
1607         /* Link status change */
1608         if (reg_eicr & IXGBE_EICR_LSC)
1609                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1610
1611         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1612 #ifdef IXGBE_FDIR
1613                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1614                         /* This is probably overkill :) */
1615                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1616                                 return;
1617                         /* Disable the interrupt */
1618                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1619                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1620                 } else
1621 #endif
1622                 if (reg_eicr & IXGBE_EICR_ECC) {
1623                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1624                             "Please Reboot!!\n");
1625                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1626                 } else
1627
1628                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1629                         /* Clear the interrupt */
1630                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1631                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1632                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1633                         /* Clear the interrupt */
1634                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1635                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1636                 }
1637         } 
1638
1639         /* Check for fan failure */
1640         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1641             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1642                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1643                     "REPLACE IMMEDIATELY!!\n");
1644                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1645         }
1646
1647         /* Check for over temp condition */
1648         if ((hw->mac.type == ixgbe_mac_X540) &&
1649             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1650                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1651                     "PHY IS SHUT DOWN!!\n");
1652                 device_printf(adapter->dev, "System shutdown required\n");
1653                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1654         }
1655
1656         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1657         return;
1658 }
1659
1660 /*********************************************************************
1661  *
1662  *  Media Ioctl callback
1663  *
1664  *  This routine is called whenever the user queries the status of
1665  *  the interface using ifconfig.
1666  *
1667  **********************************************************************/
1668 static void
1669 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1670 {
1671         struct adapter *adapter = ifp->if_softc;
1672
1673         INIT_DEBUGOUT("ixgbe_media_status: begin");
1674         IXGBE_CORE_LOCK(adapter);
1675         ixgbe_update_link_status(adapter);
1676
1677         ifmr->ifm_status = IFM_AVALID;
1678         ifmr->ifm_active = IFM_ETHER;
1679
1680         if (!adapter->link_active) {
1681                 IXGBE_CORE_UNLOCK(adapter);
1682                 return;
1683         }
1684
1685         ifmr->ifm_status |= IFM_ACTIVE;
1686
1687         switch (adapter->link_speed) {
1688                 case IXGBE_LINK_SPEED_100_FULL:
1689                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1690                         break;
1691                 case IXGBE_LINK_SPEED_1GB_FULL:
1692                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1693                         break;
1694                 case IXGBE_LINK_SPEED_10GB_FULL:
1695                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1696                         break;
1697         }
1698
1699         IXGBE_CORE_UNLOCK(adapter);
1700
1701         return;
1702 }
1703
1704 /*********************************************************************
1705  *
1706  *  Media Ioctl callback
1707  *
1708  *  This routine is called when the user changes speed/duplex using
1709  *  media/mediopt option with ifconfig.
1710  *
1711  **********************************************************************/
1712 static int
1713 ixgbe_media_change(struct ifnet * ifp)
1714 {
1715         struct adapter *adapter = ifp->if_softc;
1716         struct ifmedia *ifm = &adapter->media;
1717
1718         INIT_DEBUGOUT("ixgbe_media_change: begin");
1719
1720         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1721                 return (EINVAL);
1722
1723         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1724         case IFM_AUTO:
1725                 adapter->hw.phy.autoneg_advertised =
1726                     IXGBE_LINK_SPEED_100_FULL |
1727                     IXGBE_LINK_SPEED_1GB_FULL |
1728                     IXGBE_LINK_SPEED_10GB_FULL;
1729                 break;
1730         default:
1731                 device_printf(adapter->dev, "Only auto media type\n");
1732                 return (EINVAL);
1733         }
1734
1735         return (0);
1736 }
1737
1738 /*********************************************************************
1739  *
1740  *  This routine maps the mbufs to tx descriptors, allowing the
1741  *  TX engine to transmit the packets. 
1742  *      - return 0 on success, positive on failure
1743  *
1744  **********************************************************************/
1745
1746 static int
1747 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1748 {
1749         struct adapter  *adapter = txr->adapter;
1750         u32             olinfo_status = 0, cmd_type_len;
1751         u32             paylen = 0;
1752         int             i, j, error, nsegs;
1753         int             first, last = 0;
1754         struct mbuf     *m_head;
1755         bus_dma_segment_t segs[1];
1756         bus_dmamap_t    map;
1757         struct ixgbe_tx_buf *txbuf;
1758         union ixgbe_adv_tx_desc *txd = NULL;
1759
1760         m_head = *m_headp;
1761
1762         /* Basic descriptor defines */
1763         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1764             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1765
1766         if (m_head->m_flags & M_VLANTAG)
1767                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1768
1769         /*
1770          * Important to capture the first descriptor
1771          * used because it will contain the index of
1772          * the one we tell the hardware to report back
1773          */
1774         first = txr->next_avail_desc;
1775         txbuf = &txr->tx_buffers[first];
1776         map = txbuf->map;
1777
1778         /*
1779          * Map the packet for DMA.
1780          */
1781         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1782             *m_headp, segs, 1, &nsegs, BUS_DMA_NOWAIT);
1783
1784         if (error == EFBIG) {
1785                 struct mbuf *m;
1786
1787                 m = m_defrag(*m_headp, MB_DONTWAIT);
1788                 if (m == NULL) {
1789                         adapter->mbuf_defrag_failed++;
1790                         m_freem(*m_headp);
1791                         *m_headp = NULL;
1792                         return (ENOBUFS);
1793                 }
1794                 *m_headp = m;
1795
1796                 /* Try it again */
1797                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1798                     *m_headp, segs, 1, &nsegs, BUS_DMA_NOWAIT);
1799
1800                 if (error == ENOMEM) {
1801                         adapter->no_tx_dma_setup++;
1802                         return (error);
1803                 } else if (error != 0) {
1804                         adapter->no_tx_dma_setup++;
1805                         m_freem(*m_headp);
1806                         *m_headp = NULL;
1807                         return (error);
1808                 }
1809         } else if (error == ENOMEM) {
1810                 adapter->no_tx_dma_setup++;
1811                 return (error);
1812         } else if (error != 0) {
1813                 adapter->no_tx_dma_setup++;
1814                 m_freem(*m_headp);
1815                 *m_headp = NULL;
1816                 return (error);
1817         }
1818
1819         /* Make certain there are enough descriptors */
1820         if (nsegs > txr->tx_avail - 2) {
1821                 txr->no_desc_avail++;
1822                 error = ENOBUFS;
1823                 goto xmit_fail;
1824         }
1825         m_head = *m_headp;
1826
1827         /*
1828         ** Set up the appropriate offload context
1829         ** this becomes the first descriptor of 
1830         ** a packet.
1831         */
1832 #if 0 /* NET_TSO */
1833         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1834                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1835                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1836                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1837                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1838                         ++adapter->tso_tx;
1839                 } else
1840                         return (ENXIO);
1841         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1842 #endif
1843         if (ixgbe_tx_ctx_setup(txr, m_head))
1844                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1845
1846 #ifdef IXGBE_IEEE1588
1847         /* This is changing soon to an mtag detection */
1848         if (we detect this mbuf has a TSTAMP mtag)
1849                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1850 #endif
1851
1852 #ifdef IXGBE_FDIR
1853         /* Do the flow director magic */
1854         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1855                 ++txr->atr_count;
1856                 if (txr->atr_count >= atr_sample_rate) {
1857                         ixgbe_atr(txr, m_head);
1858                         txr->atr_count = 0;
1859                 }
1860         }
1861 #endif
1862         /* Record payload length */
1863         if (paylen == 0)
1864                 olinfo_status |= m_head->m_pkthdr.len <<
1865                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1866
1867         i = txr->next_avail_desc;
1868         for (j = 0; j < nsegs; j++) {
1869                 bus_size_t seglen;
1870                 bus_addr_t segaddr;
1871
1872                 txbuf = &txr->tx_buffers[i];
1873                 txd = &txr->tx_base[i];
1874                 seglen = segs[j].ds_len;
1875                 segaddr = htole64(segs[j].ds_addr);
1876
1877                 txd->read.buffer_addr = segaddr;
1878                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1879                     cmd_type_len |seglen);
1880                 txd->read.olinfo_status = htole32(olinfo_status);
1881                 last = i; /* descriptor that will get completion IRQ */
1882
1883                 if (++i == adapter->num_tx_desc)
1884                         i = 0;
1885
1886                 txbuf->m_head = NULL;
1887                 txbuf->eop_index = -1;
1888         }
1889
1890         txd->read.cmd_type_len |=
1891             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1892         txr->tx_avail -= nsegs;
1893         txr->next_avail_desc = i;
1894
1895         txbuf->m_head = m_head;
1896         /* Swap the dma map between the first and last descriptor */
1897         txr->tx_buffers[first].map = txbuf->map;
1898         txbuf->map = map;
1899         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1900
1901         /* Set the index of the descriptor that will be marked done */
1902         txbuf = &txr->tx_buffers[first];
1903         txbuf->eop_index = last;
1904
1905         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1906             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1907         /*
1908          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1909          * hardware that this frame is available to transmit.
1910          */
1911         ++txr->total_packets;
1912         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1913
1914         return (0);
1915
1916 xmit_fail:
1917         bus_dmamap_unload(txr->txtag, txbuf->map);
1918         return (error);
1919
1920 }
1921
1922 static void
1923 ixgbe_set_promisc(struct adapter *adapter)
1924 {
1925         u_int32_t       reg_rctl;
1926         struct ifnet   *ifp = adapter->ifp;
1927
1928         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1929         reg_rctl &= (~IXGBE_FCTRL_UPE);
1930         reg_rctl &= (~IXGBE_FCTRL_MPE);
1931         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1932
1933         if (ifp->if_flags & IFF_PROMISC) {
1934                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1935                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1936         } else if (ifp->if_flags & IFF_ALLMULTI) {
1937                 reg_rctl |= IXGBE_FCTRL_MPE;
1938                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1939                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1940         }
1941         return;
1942 }
1943
1944
1945 /*********************************************************************
1946  *  Multicast Update
1947  *
1948  *  This routine is called whenever multicast address list is updated.
1949  *
1950  **********************************************************************/
1951 #define IXGBE_RAR_ENTRIES 16
1952
1953 static void
1954 ixgbe_set_multi(struct adapter *adapter)
1955 {
1956         u32     fctrl;
1957         u8      *mta;
1958         u8      *update_ptr;
1959         struct  ifmultiaddr *ifma;
1960         int     mcnt = 0;
1961         struct ifnet   *ifp = adapter->ifp;
1962
1963         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1964
1965         mta = adapter->mta;
1966         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1967             MAX_NUM_MULTICAST_ADDRESSES);
1968
1969         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1970         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1971         if (ifp->if_flags & IFF_PROMISC)
1972                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1973         else if (ifp->if_flags & IFF_ALLMULTI) {
1974                 fctrl |= IXGBE_FCTRL_MPE;
1975                 fctrl &= ~IXGBE_FCTRL_UPE;
1976         } else
1977                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1978         
1979         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1980
1981         spin_lock(&adapter->mcast_spin);
1982         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1983                 if (ifma->ifma_addr->sa_family != AF_LINK)
1984                         continue;
1985                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1986                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1987                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1988                 mcnt++;
1989         }
1990         spin_unlock(&adapter->mcast_spin);
1991
1992         update_ptr = mta;
1993         ixgbe_update_mc_addr_list(&adapter->hw,
1994             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1995
1996         return;
1997 }
1998
1999 /*
2000  * This is an iterator function now needed by the multicast
2001  * shared code. It simply feeds the shared code routine the
2002  * addresses in the array of ixgbe_set_multi() one by one.
2003  */
2004 static u8 *
2005 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2006 {
2007         u8 *addr = *update_ptr;
2008         u8 *newptr;
2009         *vmdq = 0;
2010
2011         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2012         *update_ptr = newptr;
2013         return addr;
2014 }
2015
2016
2017 /*********************************************************************
2018  *  Timer routine
2019  *
2020  *  This routine checks for link status,updates statistics,
2021  *  and runs the watchdog check.
2022  *
2023  **********************************************************************/
2024
2025 static void
2026 ixgbe_local_timer(void *arg)
2027 {
2028         struct adapter  *adapter = arg;
2029         device_t        dev = adapter->dev;
2030         struct ifnet    *ifp = adapter->ifp;
2031         struct ix_queue *que = adapter->queues;
2032         struct tx_ring  *txr = adapter->tx_rings;
2033         int             hung, busy, paused;
2034
2035         lockmgr(&adapter->core_lock, LK_EXCLUSIVE);
2036         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2037         hung = busy = paused = 0;
2038
2039         /* Check for pluggable optics */
2040         if (adapter->sfp_probe)
2041                 if (!ixgbe_sfp_probe(adapter))
2042                         goto out; /* Nothing to do */
2043
2044         ixgbe_update_link_status(adapter);
2045         ixgbe_update_stats_counters(adapter);
2046
2047         /*
2048          * If the interface has been paused
2049          * then don't do the watchdog check
2050          */
2051         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2052                 paused = 1;
2053
2054         /*
2055         ** Check the TX queues status
2056         **      - central locked handling of OACTIVE
2057         **      - watchdog only if all queues show hung
2058         */          
2059         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2060                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
2061                     (paused == 0))
2062                         ++hung;
2063                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
2064                         ++busy;
2065                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
2066                         taskqueue_enqueue(que->tq, &que->que_task);
2067         }
2068         /* Only truely watchdog if all queues show hung */
2069         if (hung == adapter->num_queues)
2070                 goto watchdog;
2071         /* Only turn off the stack flow when ALL are depleted */
2072         if (busy == adapter->num_queues)
2073                 ifp->if_flags |= IFF_OACTIVE;
2074         else if ((ifp->if_flags & IFF_OACTIVE) &&
2075             (busy < adapter->num_queues))
2076                 ifp->if_flags &= ~IFF_OACTIVE;
2077
2078 out:
2079         ixgbe_rearm_queues(adapter, adapter->que_mask);
2080         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2081         lockmgr(&adapter->core_lock, LK_RELEASE);
2082         return;
2083
2084 watchdog:
2085         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2086         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2087             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2088             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2089         device_printf(dev,"TX(%d) desc avail = %d,"
2090             "Next TX to Clean = %d\n",
2091             txr->me, txr->tx_avail, txr->next_to_clean);
2092         adapter->ifp->if_flags &= ~IFF_RUNNING;
2093         adapter->watchdog_events++;
2094         ixgbe_init_locked(adapter);
2095
2096         lockmgr(&adapter->core_lock, LK_RELEASE);
2097 }
2098
2099 /*
2100 ** Note: this routine updates the OS on the link state
2101 **      the real check of the hardware only happens with
2102 **      a link interrupt.
2103 */
2104 static void
2105 ixgbe_update_link_status(struct adapter *adapter)
2106 {
2107         struct ifnet    *ifp = adapter->ifp;
2108         struct tx_ring *txr = adapter->tx_rings;
2109         device_t dev = adapter->dev;
2110
2111
2112         if (adapter->link_up){ 
2113                 if (adapter->link_active == FALSE) {
2114                         if (bootverbose)
2115                                 device_printf(dev,"Link is up %d Gbps %s \n",
2116                                     ((adapter->link_speed == 128)? 10:1),
2117                                     "Full Duplex");
2118                         adapter->link_active = TRUE;
2119                         /* Update any Flow Control changes */
2120                         ixgbe_fc_enable(&adapter->hw);
2121                         ifp->if_link_state = LINK_STATE_UP;
2122                         if_link_state_change(ifp);
2123                 }
2124         } else { /* Link down */
2125                 if (adapter->link_active == TRUE) {
2126                         if (bootverbose)
2127                                 device_printf(dev,"Link is Down\n");
2128                         ifp->if_link_state = LINK_STATE_DOWN;
2129                         if_link_state_change(ifp);
2130                         adapter->link_active = FALSE;
2131                         for (int i = 0; i < adapter->num_queues;
2132                             i++, txr++)
2133                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2134                 }
2135         }
2136
2137         return;
2138 }
2139
2140
2141 /*********************************************************************
2142  *
2143  *  This routine disables all traffic on the adapter by issuing a
2144  *  global reset on the MAC and deallocates TX/RX buffers.
2145  *
2146  **********************************************************************/
2147
2148 static void
2149 ixgbe_stop(void *arg)
2150 {
2151         struct ifnet   *ifp;
2152         struct adapter *adapter = arg;
2153         struct ixgbe_hw *hw = &adapter->hw;
2154         ifp = adapter->ifp;
2155
2156         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2157
2158         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2159         ixgbe_disable_intr(adapter);
2160         callout_stop(&adapter->timer);
2161
2162         /* Let the stack know...*/
2163         ifp->if_flags &= ~IFF_RUNNING;
2164         ifp->if_flags |= IFF_OACTIVE;
2165
2166         ixgbe_reset_hw(hw);
2167         hw->adapter_stopped = FALSE;
2168         ixgbe_stop_adapter(hw);
2169         /* Turn off the laser */
2170         if (hw->phy.multispeed_fiber)
2171                 ixgbe_disable_tx_laser(hw);
2172
2173         /* reprogram the RAR[0] in case user changed it. */
2174         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2175
2176         return;
2177 }
2178
2179
2180 /*********************************************************************
2181  *
2182  *  Determine hardware revision.
2183  *
2184  **********************************************************************/
2185 static void
2186 ixgbe_identify_hardware(struct adapter *adapter)
2187 {
2188         device_t        dev = adapter->dev;
2189         struct ixgbe_hw *hw = &adapter->hw;
2190
2191         /* Save off the information about this board */
2192         hw->vendor_id = pci_get_vendor(dev);
2193         hw->device_id = pci_get_device(dev);
2194         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2195         hw->subsystem_vendor_id =
2196             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2197         hw->subsystem_device_id =
2198             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2199
2200         /* We need this here to set the num_segs below */
2201         ixgbe_set_mac_type(hw);
2202
2203         /* Pick up the 82599 and VF settings */
2204         if (hw->mac.type != ixgbe_mac_82598EB) {
2205                 hw->phy.smart_speed = ixgbe_smart_speed;
2206                 adapter->num_segs = IXGBE_82599_SCATTER;
2207         } else
2208                 adapter->num_segs = IXGBE_82598_SCATTER;
2209
2210         return;
2211 }
2212
2213 /*********************************************************************
2214  *
2215  *  Determine optic type
2216  *
2217  **********************************************************************/
2218 static void
2219 ixgbe_setup_optics(struct adapter *adapter)
2220 {
2221         struct ixgbe_hw *hw = &adapter->hw;
2222         int             layer;
2223         
2224         layer = ixgbe_get_supported_physical_layer(hw);
2225
2226         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2227                 adapter->optics = IFM_10G_T;
2228                 return;
2229         }
2230
2231         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2232                 adapter->optics = IFM_1000_T;
2233                 return;
2234         }
2235
2236         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2237             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2238                 adapter->optics = IFM_10G_LR;
2239                 return;
2240         }
2241
2242         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2243                 adapter->optics = IFM_10G_SR;
2244                 return;
2245         }
2246
2247         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2248                 adapter->optics = IFM_10G_TWINAX;
2249                 return;
2250         }
2251
2252         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2253             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2254                 adapter->optics = IFM_10G_CX4;
2255                 return;
2256         }
2257
2258         /* If we get here just set the default */
2259         adapter->optics = IFM_ETHER | IFM_AUTO;
2260         return;
2261 }
2262
2263 /*********************************************************************
2264  *
2265  *  Setup the Legacy or MSI Interrupt handler
2266  *
2267  **********************************************************************/
2268 static int
2269 ixgbe_allocate_legacy(struct adapter *adapter)
2270 {
2271         device_t dev = adapter->dev;
2272         struct          ix_queue *que = adapter->queues;
2273         int error, rid = 0;
2274         unsigned int intr_flags;
2275
2276         /* MSI RID at 1 */
2277         if (adapter->msix == 1)
2278                 rid = 1;
2279
2280         /* Try allocating a MSI interrupt first */
2281         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2282                 &rid, &intr_flags);
2283
2284         /* We allocate a single interrupt resource */
2285         adapter->res = bus_alloc_resource_any(dev,
2286             SYS_RES_IRQ, &rid, intr_flags);
2287         if (adapter->res == NULL) {
2288                 device_printf(dev, "Unable to allocate bus resource: "
2289                     "interrupt\n");
2290                 return (ENXIO);
2291         }
2292
2293         /*
2294          * Try allocating a fast interrupt and the associated deferred
2295          * processing contexts.
2296          */
2297         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2298         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2299             taskqueue_thread_enqueue, &que->tq);
2300         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2301             device_get_nameunit(adapter->dev));
2302
2303         /* Tasklets for Link, SFP and Multispeed Fiber */
2304         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2305         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2306         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2307 #ifdef IXGBE_FDIR
2308         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2309 #endif
2310         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2311             taskqueue_thread_enqueue, &adapter->tq);
2312         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2313             device_get_nameunit(adapter->dev));
2314
2315         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2316             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2317                 device_printf(dev, "Failed to register fast interrupt "
2318                     "handler: %d\n", error);
2319                 taskqueue_free(que->tq);
2320                 taskqueue_free(adapter->tq);
2321                 que->tq = NULL;
2322                 adapter->tq = NULL;
2323                 return (error);
2324         }
2325         /* For simplicity in the handlers */
2326         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2327
2328         return (0);
2329 }
2330
2331
2332 /*********************************************************************
2333  *
2334  *  Setup MSIX Interrupt resources and handlers 
2335  *
2336  **********************************************************************/
2337 static int
2338 ixgbe_allocate_msix(struct adapter *adapter)
2339 {
2340         device_t        dev = adapter->dev;
2341         struct          ix_queue *que = adapter->queues;
2342         int             error, rid, vector = 0;
2343
2344         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2345                 rid = vector + 1;
2346                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2347                     RF_SHAREABLE | RF_ACTIVE);
2348                 if (que->res == NULL) {
2349                         device_printf(dev,"Unable to allocate"
2350                             " bus resource: que interrupt [%d]\n", vector);
2351                         return (ENXIO);
2352                 }
2353                 /* Set the handler function */
2354                 error = bus_setup_intr(dev, que->res, INTR_MPSAFE,
2355                     ixgbe_msix_que, que, &que->tag, &que->serializer);
2356                 if (error) {
2357                         que->res = NULL;
2358                         device_printf(dev, "Failed to register QUE handler");
2359                         return (error);
2360                 }
2361 #if 0 /* __FreeBSD_version >= 800504 */
2362                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2363 #endif
2364                 que->msix = vector;
2365                 adapter->que_mask |= (u64)(1 << que->msix);
2366                 /*
2367                 ** Bind the msix vector, and thus the
2368                 ** ring to the corresponding cpu.
2369                 */
2370 #if 0 /* XXX */
2371                 if (adapter->num_queues > 1)
2372                         bus_bind_intr(dev, que->res, i);
2373 #endif
2374
2375                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2376                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2377                     taskqueue_thread_enqueue, &que->tq);
2378                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2379                     device_get_nameunit(adapter->dev));
2380         }
2381
2382         /* and Link */
2383         rid = vector + 1;
2384         adapter->res = bus_alloc_resource_any(dev,
2385             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2386         if (!adapter->res) {
2387                 device_printf(dev,"Unable to allocate"
2388             " bus resource: Link interrupt [%d]\n", rid);
2389                 return (ENXIO);
2390         }
2391         /* Set the link handler function */
2392         error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2393             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer);
2394         if (error) {
2395                 adapter->res = NULL;
2396                 device_printf(dev, "Failed to register LINK handler");
2397                 return (error);
2398         }
2399 #if 0 /* __FreeBSD_version >= 800504 */
2400         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2401 #endif
2402         adapter->linkvec = vector;
2403         /* Tasklets for Link, SFP and Multispeed Fiber */
2404         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2405         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2406         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2407 #ifdef IXGBE_FDIR
2408         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2409 #endif
2410         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2411             taskqueue_thread_enqueue, &adapter->tq);
2412         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2413             device_get_nameunit(adapter->dev));
2414
2415         return (0);
2416 }
2417
2418 #if 0   /* HAVE_MSIX */
2419 /*
2420  * Setup Either MSI/X or MSI
2421  */
2422 static int
2423 ixgbe_setup_msix(struct adapter *adapter)
2424 {
2425         device_t dev = adapter->dev;
2426         int rid, want, queues, msgs;
2427
2428         /* Override by tuneable */
2429         if (ixgbe_enable_msix == 0)
2430                 goto msi;
2431
2432         /* First try MSI/X */
2433         rid = PCIR_BAR(MSIX_82598_BAR);
2434         adapter->msix_mem = bus_alloc_resource_any(dev,
2435             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2436         if (!adapter->msix_mem) {
2437                 rid += 4;       /* 82599 maps in higher BAR */
2438                 adapter->msix_mem = bus_alloc_resource_any(dev,
2439                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2440         }
2441         if (!adapter->msix_mem) {
2442                 /* May not be enabled */
2443                 device_printf(adapter->dev,
2444                     "Unable to map MSIX table \n");
2445                 goto msi;
2446         }
2447
2448         msgs = pci_msix_count(dev); 
2449         if (msgs == 0) { /* system has msix disabled */
2450                 bus_release_resource(dev, SYS_RES_MEMORY,
2451                     rid, adapter->msix_mem);
2452                 adapter->msix_mem = NULL;
2453                 goto msi;
2454         }
2455
2456         /* Figure out a reasonable auto config value */
2457         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2458
2459         if (ixgbe_num_queues != 0)
2460                 queues = ixgbe_num_queues;
2461         /* Set max queues to 8 when autoconfiguring */
2462         else if ((ixgbe_num_queues == 0) && (queues > 8))
2463                 queues = 8;
2464
2465         /*
2466         ** Want one vector (RX/TX pair) per queue
2467         ** plus an additional for Link.
2468         */
2469         want = queues + 1;
2470         if (msgs >= want)
2471                 msgs = want;
2472         else {
2473                 device_printf(adapter->dev,
2474                     "MSIX Configuration Problem, "
2475                     "%d vectors but %d queues wanted!\n",
2476                     msgs, want);
2477                 return (0); /* Will go to Legacy setup */
2478         }
2479         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2480                 device_printf(adapter->dev,
2481                     "Using MSIX interrupts with %d vectors\n", msgs);
2482                 adapter->num_queues = queues;
2483                 return (msgs);
2484         }
2485 msi:
2486         msgs = pci_msi_count(dev);
2487         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2488                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2489         else
2490                 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2491         return (msgs);
2492 }
2493 #endif
2494
2495
2496 static int
2497 ixgbe_allocate_pci_resources(struct adapter *adapter)
2498 {
2499         int             rid;
2500         device_t        dev = adapter->dev;
2501
2502         rid = PCIR_BAR(0);
2503         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2504             &rid, RF_ACTIVE);
2505
2506         if (!(adapter->pci_mem)) {
2507                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2508                 return (ENXIO);
2509         }
2510
2511         adapter->osdep.mem_bus_space_tag =
2512                 rman_get_bustag(adapter->pci_mem);
2513         adapter->osdep.mem_bus_space_handle =
2514                 rman_get_bushandle(adapter->pci_mem);
2515         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2516
2517         /* Legacy defaults */
2518         adapter->num_queues = 1;
2519         adapter->hw.back = &adapter->osdep;
2520
2521         /*
2522         ** Now setup MSI or MSI/X, should
2523         ** return us the number of supported
2524         ** vectors. (Will be 1 for MSI)
2525         */
2526 #if 0   /* HAVE_MSIX */
2527         adapter->msix = ixgbe_setup_msix(adapter);
2528 #endif
2529         return (0);
2530 }
2531
2532 static void
2533 ixgbe_free_pci_resources(struct adapter * adapter)
2534 {
2535         struct          ix_queue *que = adapter->queues;
2536         device_t        dev = adapter->dev;
2537         int             rid, memrid;
2538
2539         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2540                 memrid = PCIR_BAR(MSIX_82598_BAR);
2541         else
2542                 memrid = PCIR_BAR(MSIX_82599_BAR);
2543
2544         /*
2545         ** There is a slight possibility of a failure mode
2546         ** in attach that will result in entering this function
2547         ** before interrupt resources have been initialized, and
2548         ** in that case we do not want to execute the loops below
2549         ** We can detect this reliably by the state of the adapter
2550         ** res pointer.
2551         */
2552         if (adapter->res == NULL)
2553                 goto mem;
2554
2555         /*
2556         **  Release all msix queue resources:
2557         */
2558         for (int i = 0; i < adapter->num_queues; i++, que++) {
2559                 rid = que->msix + 1;
2560                 if (que->tag != NULL) {
2561                         bus_teardown_intr(dev, que->res, que->tag);
2562                         que->tag = NULL;
2563                 }
2564                 if (que->res != NULL)
2565                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2566         }
2567
2568
2569         /* Clean the Legacy or Link interrupt last */
2570         if (adapter->linkvec) /* we are doing MSIX */
2571                 rid = adapter->linkvec + 1;
2572         else
2573                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2574
2575         if (adapter->tag != NULL) {
2576                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2577                 adapter->tag = NULL;
2578         }
2579         if (adapter->res != NULL)
2580                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2581         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2582                 pci_release_msi(adapter->dev);
2583
2584 mem:
2585         if (adapter->msix)
2586                 pci_release_msi(dev);
2587
2588         if (adapter->msix_mem != NULL)
2589                 bus_release_resource(dev, SYS_RES_MEMORY,
2590                     memrid, adapter->msix_mem);
2591
2592         if (adapter->pci_mem != NULL)
2593                 bus_release_resource(dev, SYS_RES_MEMORY,
2594                     PCIR_BAR(0), adapter->pci_mem);
2595
2596         return;
2597 }
2598
2599 /*********************************************************************
2600  *
2601  *  Setup networking device structure and register an interface.
2602  *
2603  **********************************************************************/
2604 static int
2605 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2606 {
2607         struct ixgbe_hw *hw = &adapter->hw;
2608         struct ifnet   *ifp;
2609
2610         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2611
2612         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2613         if (ifp == NULL) {
2614                 device_printf(dev, "can not allocate ifnet structure\n");
2615                 return (-1);
2616         }
2617         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2618         ifp->if_baudrate = 1000000000;
2619         ifp->if_init = ixgbe_init;
2620         ifp->if_softc = adapter;
2621         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2622         ifp->if_ioctl = ixgbe_ioctl;
2623         ifp->if_start = ixgbe_start;
2624 #if 0 /* __FreeBSD_version >= 800000 */
2625         ifp->if_transmit = ixgbe_mq_start;
2626         ifp->if_qflush = ixgbe_qflush;
2627 #endif
2628         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2629
2630         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2631
2632         adapter->max_frame_size =
2633             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2634
2635         /*
2636          * Tell the upper layer(s) we support long frames.
2637          */
2638         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2639
2640 #if 0 /* NET_TSO */
2641         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2642 #endif
2643         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2644         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2645         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2646 #if 0 /* NET_TSO */
2647                              |  IFCAP_VLAN_HWTSO
2648 #endif
2649                              |  IFCAP_VLAN_MTU;
2650         ifp->if_capenable = ifp->if_capabilities;
2651
2652         /* Don't enable LRO by default */
2653 #if 0 /* NET_LRO */
2654         ifp->if_capabilities |= IFCAP_LRO;
2655 #endif
2656
2657         /*
2658         ** Don't turn this on by default, if vlans are
2659         ** created on another pseudo device (eg. lagg)
2660         ** then vlan events are not passed thru, breaking
2661         ** operation, but with HW FILTER off it works. If
2662         ** using vlans directly on the ixgbe driver you can
2663         ** enable this and get full hardware tag filtering.
2664         */
2665         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2666
2667         /*
2668          * Specify the media types supported by this adapter and register
2669          * callbacks to update media and link information
2670          */
2671         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2672                      ixgbe_media_status);
2673         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2674         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2675         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2676                 ifmedia_add(&adapter->media,
2677                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2678                 ifmedia_add(&adapter->media,
2679                     IFM_ETHER | IFM_1000_T, 0, NULL);
2680         }
2681         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2682         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2683
2684         return (0);
2685 }
2686
2687 static void
2688 ixgbe_config_link(struct adapter *adapter)
2689 {
2690         struct ixgbe_hw *hw = &adapter->hw;
2691         u32     autoneg, err = 0;
2692         bool    sfp, negotiate;
2693
2694         sfp = ixgbe_is_sfp(hw);
2695
2696         if (sfp) { 
2697                 if (hw->phy.multispeed_fiber) {
2698                         hw->mac.ops.setup_sfp(hw);
2699                         ixgbe_enable_tx_laser(hw);
2700                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2701                 } else
2702                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2703         } else {
2704                 if (hw->mac.ops.check_link)
2705                         err = ixgbe_check_link(hw, &autoneg,
2706                             &adapter->link_up, FALSE);
2707                 if (err)
2708                         goto out;
2709                 autoneg = hw->phy.autoneg_advertised;
2710                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2711                         err  = hw->mac.ops.get_link_capabilities(hw,
2712                             &autoneg, &negotiate);
2713                 if (err)
2714                         goto out;
2715                 if (hw->mac.ops.setup_link)
2716                         err = hw->mac.ops.setup_link(hw, autoneg,
2717                             negotiate, adapter->link_up);
2718         }
2719 out:
2720         return;
2721 }
2722
2723 /********************************************************************
2724  * Manage DMA'able memory.
2725  *******************************************************************/
2726 static void
2727 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2728 {
2729         if (error)
2730                 return;
2731         *(bus_addr_t *) arg = segs->ds_addr;
2732         return;
2733 }
2734
2735 static int
2736 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2737                 struct ixgbe_dma_alloc *dma, int mapflags)
2738 {
2739         device_t dev = adapter->dev;
2740         int             r;
2741
2742         r = bus_dma_tag_create(NULL,    /* parent */
2743                                DBA_ALIGN, 0,    /* alignment, bounds */
2744                                BUS_SPACE_MAXADDR,       /* lowaddr */
2745                                BUS_SPACE_MAXADDR,       /* highaddr */
2746                                NULL, NULL,      /* filter, filterarg */
2747                                size,    /* maxsize */
2748                                1,       /* nsegments */
2749                                size,    /* maxsegsize */
2750                                BUS_DMA_ALLOCNOW,        /* flags */
2751                                &dma->dma_tag);
2752         if (r != 0) {
2753                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2754                        "error %u\n", r);
2755                 goto fail_0;
2756         }
2757         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2758                              BUS_DMA_NOWAIT, &dma->dma_map);
2759         if (r != 0) {
2760                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2761                        "error %u\n", r);
2762                 goto fail_1;
2763         }
2764         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2765                             size,
2766                             ixgbe_dmamap_cb,
2767                             &dma->dma_paddr,
2768                             mapflags | BUS_DMA_NOWAIT);
2769         if (r != 0) {
2770                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2771                        "error %u\n", r);
2772                 goto fail_2;
2773         }
2774         dma->dma_size = size;
2775         return (0);
2776 fail_2:
2777         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2778 fail_1:
2779         bus_dma_tag_destroy(dma->dma_tag);
2780 fail_0:
2781         dma->dma_map = NULL;
2782         dma->dma_tag = NULL;
2783         return (r);
2784 }
2785
2786 static void
2787 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2788 {
2789         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2790             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2791         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2792         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2793         bus_dma_tag_destroy(dma->dma_tag);
2794 }
2795
2796
2797 /*********************************************************************
2798  *
2799  *  Allocate memory for the transmit and receive rings, and then
2800  *  the descriptors associated with each, called only once at attach.
2801  *
2802  **********************************************************************/
2803 static int
2804 ixgbe_allocate_queues(struct adapter *adapter)
2805 {
2806         device_t        dev = adapter->dev;
2807         struct ix_queue *que;
2808         struct tx_ring  *txr;
2809         struct rx_ring  *rxr;
2810         int rsize, tsize, error = IXGBE_SUCCESS;
2811         int txconf = 0, rxconf = 0;
2812
2813         /* First allocate the top level queue structs */
2814         if (!(adapter->queues =
2815             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2816             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2817                 device_printf(dev, "Unable to allocate queue memory\n");
2818                 error = ENOMEM;
2819                 goto fail;
2820         }
2821
2822         /* First allocate the TX ring struct memory */
2823         if (!(adapter->tx_rings =
2824             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2825             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2826                 device_printf(dev, "Unable to allocate TX ring memory\n");
2827                 error = ENOMEM;
2828                 goto tx_fail;
2829         }
2830
2831         /* Next allocate the RX */
2832         if (!(adapter->rx_rings =
2833             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2834             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2835                 device_printf(dev, "Unable to allocate RX ring memory\n");
2836                 error = ENOMEM;
2837                 goto rx_fail;
2838         }
2839
2840         /* For the ring itself */
2841         tsize = roundup2(adapter->num_tx_desc *
2842             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2843
2844         /*
2845          * Now set up the TX queues, txconf is needed to handle the
2846          * possibility that things fail midcourse and we need to
2847          * undo memory gracefully
2848          */ 
2849         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2850                 /* Set up some basics */
2851                 txr = &adapter->tx_rings[i];
2852                 txr->adapter = adapter;
2853                 txr->me = i;
2854
2855                 /* Initialize the TX side lock */
2856                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2857                     device_get_nameunit(dev), txr->me);
2858                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2859
2860                 if (ixgbe_dma_malloc(adapter, tsize,
2861                         &txr->txdma, BUS_DMA_NOWAIT)) {
2862                         device_printf(dev,
2863                             "Unable to allocate TX Descriptor memory\n");
2864                         error = ENOMEM;
2865                         goto err_tx_desc;
2866                 }
2867                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2868                 bzero((void *)txr->tx_base, tsize);
2869
2870                 /* Now allocate transmit buffers for the ring */
2871                 if (ixgbe_allocate_transmit_buffers(txr)) {
2872                         device_printf(dev,
2873                             "Critical Failure setting up transmit buffers\n");
2874                         error = ENOMEM;
2875                         goto err_tx_desc;
2876                 }
2877 #if 0 /* __FreeBSD_version >= 800000 */
2878                 /* Allocate a buf ring */
2879                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2880                     M_WAITOK, &txr->tx_mtx);
2881                 if (txr->br == NULL) {
2882                         device_printf(dev,
2883                             "Critical Failure setting up buf ring\n");
2884                         error = ENOMEM;
2885                         goto err_tx_desc;
2886                 }
2887 #endif
2888         }
2889
2890         /*
2891          * Next the RX queues...
2892          */ 
2893         rsize = roundup2(adapter->num_rx_desc *
2894             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2895         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2896                 rxr = &adapter->rx_rings[i];
2897                 /* Set up some basics */
2898                 rxr->adapter = adapter;
2899                 rxr->me = i;
2900
2901                 /* Initialize the RX side lock */
2902                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2903                     device_get_nameunit(dev), rxr->me);
2904                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2905
2906                 if (ixgbe_dma_malloc(adapter, rsize,
2907                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2908                         device_printf(dev,
2909                             "Unable to allocate RxDescriptor memory\n");
2910                         error = ENOMEM;
2911                         goto err_rx_desc;
2912                 }
2913                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2914                 bzero((void *)rxr->rx_base, rsize);
2915
2916                 /* Allocate receive buffers for the ring*/
2917                 if (ixgbe_allocate_receive_buffers(rxr)) {
2918                         device_printf(dev,
2919                             "Critical Failure setting up receive buffers\n");
2920                         error = ENOMEM;
2921                         goto err_rx_desc;
2922                 }
2923         }
2924
2925         /*
2926         ** Finally set up the queue holding structs
2927         */
2928         for (int i = 0; i < adapter->num_queues; i++) {
2929                 que = &adapter->queues[i];
2930                 que->adapter = adapter;
2931                 que->txr = &adapter->tx_rings[i];
2932                 que->rxr = &adapter->rx_rings[i];
2933         }
2934
2935         return (0);
2936
2937 err_rx_desc:
2938         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2939                 ixgbe_dma_free(adapter, &rxr->rxdma);
2940 err_tx_desc:
2941         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2942                 ixgbe_dma_free(adapter, &txr->txdma);
2943         kfree(adapter->rx_rings, M_DEVBUF);
2944 rx_fail:
2945         kfree(adapter->tx_rings, M_DEVBUF);
2946 tx_fail:
2947         kfree(adapter->queues, M_DEVBUF);
2948 fail:
2949         return (error);
2950 }
2951
2952 /*********************************************************************
2953  *
2954  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2955  *  the information needed to transmit a packet on the wire. This is
2956  *  called only once at attach, setup is done every reset.
2957  *
2958  **********************************************************************/
2959 static int
2960 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2961 {
2962         struct adapter *adapter = txr->adapter;
2963         device_t dev = adapter->dev;
2964         struct ixgbe_tx_buf *txbuf;
2965         int error, i;
2966
2967         /*
2968          * Setup DMA descriptor areas.
2969          */
2970         if ((error = bus_dma_tag_create(
2971                                NULL,    /* parent */
2972                                1, 0,            /* alignment, bounds */
2973                                BUS_SPACE_MAXADDR,       /* lowaddr */
2974                                BUS_SPACE_MAXADDR,       /* highaddr */
2975                                NULL, NULL,              /* filter, filterarg */
2976                                IXGBE_TSO_SIZE,          /* maxsize */
2977                                adapter->num_segs,       /* nsegments */
2978                                PAGE_SIZE,               /* maxsegsize */
2979                                0,                       /* flags */
2980                                &txr->txtag))) {
2981                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2982                 goto fail;
2983         }
2984
2985         if (!(txr->tx_buffers =
2986             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2987             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2988                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2989                 error = ENOMEM;
2990                 goto fail;
2991         }
2992
2993         /* Create the descriptor buffer dma maps */
2994         txbuf = txr->tx_buffers;
2995         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2996                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2997                 if (error != 0) {
2998                         device_printf(dev, "Unable to create TX DMA map\n");
2999                         goto fail;
3000                 }
3001         }
3002
3003         return 0;
3004 fail:
3005         /* We free all, it handles case where we are in the middle */
3006         ixgbe_free_transmit_structures(adapter);
3007         return (error);
3008 }
3009
3010 /*********************************************************************
3011  *
3012  *  Initialize a transmit ring.
3013  *
3014  **********************************************************************/
3015 static void
3016 ixgbe_setup_transmit_ring(struct tx_ring *txr)
3017 {
3018         struct adapter *adapter = txr->adapter;
3019         struct ixgbe_tx_buf *txbuf;
3020         int i;
3021 #ifdef DEV_NETMAP
3022         struct netmap_adapter *na = NA(adapter->ifp);
3023         struct netmap_slot *slot;
3024 #endif /* DEV_NETMAP */
3025
3026         /* Clear the old ring contents */
3027         IXGBE_TX_LOCK(txr);
3028 #ifdef DEV_NETMAP
3029         /*
3030          * (under lock): if in netmap mode, do some consistency
3031          * checks and set slot to entry 0 of the netmap ring.
3032          */
3033         slot = netmap_reset(na, NR_TX, txr->me, 0);
3034 #endif /* DEV_NETMAP */
3035         bzero((void *)txr->tx_base,
3036               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3037         /* Reset indices */
3038         txr->next_avail_desc = 0;
3039         txr->next_to_clean = 0;
3040
3041         /* Free any existing tx buffers. */
3042         txbuf = txr->tx_buffers;
3043         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3044                 if (txbuf->m_head != NULL) {
3045                         bus_dmamap_sync(txr->txtag, txbuf->map,
3046                             BUS_DMASYNC_POSTWRITE);
3047                         bus_dmamap_unload(txr->txtag, txbuf->map);
3048                         m_freem(txbuf->m_head);
3049                         txbuf->m_head = NULL;
3050                 }
3051 #ifdef DEV_NETMAP
3052                 /*
3053                  * In netmap mode, set the map for the packet buffer.
3054                  * NOTE: Some drivers (not this one) also need to set
3055                  * the physical buffer address in the NIC ring.
3056                  * Slots in the netmap ring (indexed by "si") are
3057                  * kring->nkr_hwofs positions "ahead" wrt the
3058                  * corresponding slot in the NIC ring. In some drivers
3059                  * (not here) nkr_hwofs can be negative. Function
3060                  * netmap_idx_n2k() handles wraparounds properly.
3061                  */
3062                 if (slot) {
3063                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3064                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3065                 }
3066 #endif /* DEV_NETMAP */
3067                 /* Clear the EOP index */
3068                 txbuf->eop_index = -1;
3069         }
3070
3071 #ifdef IXGBE_FDIR
3072         /* Set the rate at which we sample packets */
3073         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3074                 txr->atr_sample = atr_sample_rate;
3075 #endif
3076
3077         /* Set number of descriptors available */
3078         txr->tx_avail = adapter->num_tx_desc;
3079
3080         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3081             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3082         IXGBE_TX_UNLOCK(txr);
3083 }
3084
3085 /*********************************************************************
3086  *
3087  *  Initialize all transmit rings.
3088  *
3089  **********************************************************************/
3090 static int
3091 ixgbe_setup_transmit_structures(struct adapter *adapter)
3092 {
3093         struct tx_ring *txr = adapter->tx_rings;
3094
3095         for (int i = 0; i < adapter->num_queues; i++, txr++)
3096                 ixgbe_setup_transmit_ring(txr);
3097
3098         return (0);
3099 }
3100
3101 /*********************************************************************
3102  *
3103  *  Enable transmit unit.
3104  *
3105  **********************************************************************/
3106 static void
3107 ixgbe_initialize_transmit_units(struct adapter *adapter)
3108 {
3109         struct tx_ring  *txr = adapter->tx_rings;
3110         struct ixgbe_hw *hw = &adapter->hw;
3111
3112         /* Setup the Base and Length of the Tx Descriptor Ring */
3113
3114         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3115                 u64     tdba = txr->txdma.dma_paddr;
3116                 u32     txctrl;
3117
3118                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3119                        (tdba & 0x00000000ffffffffULL));
3120                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3121                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3122                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3123
3124                 /* Setup the HW Tx Head and Tail descriptor pointers */
3125                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3126                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3127
3128                 /* Setup Transmit Descriptor Cmd Settings */
3129                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3130                 txr->queue_status = IXGBE_QUEUE_IDLE;
3131
3132                 /* Disable Head Writeback */
3133                 switch (hw->mac.type) {
3134                 case ixgbe_mac_82598EB:
3135                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3136                         break;
3137                 case ixgbe_mac_82599EB:
3138                 case ixgbe_mac_X540:
3139                 default:
3140                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3141                         break;
3142                 }
3143                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3144                 switch (hw->mac.type) {
3145                 case ixgbe_mac_82598EB:
3146                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3147                         break;
3148                 case ixgbe_mac_82599EB:
3149                 case ixgbe_mac_X540:
3150                 default:
3151                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3152                         break;
3153                 }
3154
3155         }
3156
3157         if (hw->mac.type != ixgbe_mac_82598EB) {
3158                 u32 dmatxctl, rttdcs;
3159                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3160                 dmatxctl |= IXGBE_DMATXCTL_TE;
3161                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3162                 /* Disable arbiter to set MTQC */
3163                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3164                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3165                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3166                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3167                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3168                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3169         }
3170
3171         return;
3172 }
3173
3174 /*********************************************************************
3175  *
3176  *  Free all transmit rings.
3177  *
3178  **********************************************************************/
3179 static void
3180 ixgbe_free_transmit_structures(struct adapter *adapter)
3181 {
3182         struct tx_ring *txr = adapter->tx_rings;
3183
3184         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3185                 IXGBE_TX_LOCK(txr);
3186                 ixgbe_free_transmit_buffers(txr);
3187                 ixgbe_dma_free(adapter, &txr->txdma);
3188                 IXGBE_TX_UNLOCK(txr);
3189                 IXGBE_TX_LOCK_DESTROY(txr);
3190         }
3191         kfree(adapter->tx_rings, M_DEVBUF);
3192 }
3193
3194 /*********************************************************************
3195  *
3196  *  Free transmit ring related data structures.
3197  *
3198  **********************************************************************/
3199 static void
3200 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3201 {
3202         struct adapter *adapter = txr->adapter;
3203         struct ixgbe_tx_buf *tx_buffer;
3204         int             i;
3205
3206         INIT_DEBUGOUT("free_transmit_ring: begin");
3207
3208         if (txr->tx_buffers == NULL)
3209                 return;
3210
3211         tx_buffer = txr->tx_buffers;
3212         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3213                 if (tx_buffer->m_head != NULL) {
3214                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3215                             BUS_DMASYNC_POSTWRITE);
3216                         bus_dmamap_unload(txr->txtag,
3217                             tx_buffer->map);
3218                         m_freem(tx_buffer->m_head);
3219                         tx_buffer->m_head = NULL;
3220                         if (tx_buffer->map != NULL) {
3221                                 bus_dmamap_destroy(txr->txtag,
3222                                     tx_buffer->map);
3223                                 tx_buffer->map = NULL;
3224                         }
3225                 } else if (tx_buffer->map != NULL) {
3226                         bus_dmamap_unload(txr->txtag,
3227                             tx_buffer->map);
3228                         bus_dmamap_destroy(txr->txtag,
3229                             tx_buffer->map);
3230                         tx_buffer->map = NULL;
3231                 }
3232         }
3233 #if 0 /* __FreeBSD_version >= 800000 */
3234         if (txr->br != NULL)
3235                 buf_ring_free(txr->br, M_DEVBUF);
3236 #endif
3237         if (txr->tx_buffers != NULL) {
3238                 kfree(txr->tx_buffers, M_DEVBUF);
3239                 txr->tx_buffers = NULL;
3240         }
3241         if (txr->txtag != NULL) {
3242                 bus_dma_tag_destroy(txr->txtag);
3243                 txr->txtag = NULL;
3244         }
3245         return;
3246 }
3247
3248 /*********************************************************************
3249  *
3250  *  Advanced Context Descriptor setup for VLAN or CSUM
3251  *
3252  **********************************************************************/
3253
3254 static bool
3255 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3256 {
3257         struct adapter *adapter = txr->adapter;
3258         struct ixgbe_adv_tx_context_desc *TXD;
3259         struct ixgbe_tx_buf        *tx_buffer;
3260         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3261         struct ether_vlan_header *eh;
3262         struct ip *ip;
3263         struct ip6_hdr *ip6;
3264         int  ehdrlen, ip_hlen = 0;
3265         u16     etype;
3266         u8      ipproto = 0;
3267         bool    offload = TRUE;
3268         int ctxd = txr->next_avail_desc;
3269 #ifdef NET_VLAN
3270         u16 vtag = 0;
3271 #endif
3272
3273
3274         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3275                 offload = FALSE;
3276
3277         tx_buffer = &txr->tx_buffers[ctxd];
3278         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3279
3280         /*
3281         ** In advanced descriptors the vlan tag must 
3282         ** be placed into the descriptor itself.
3283         */
3284 #ifdef NET_VLAN
3285         if (mp->m_flags & M_VLANTAG) {
3286                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3287                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3288         } else if (offload == FALSE)
3289                 return FALSE;
3290 #endif
3291
3292         /*
3293          * Determine where frame payload starts.
3294          * Jump over vlan headers if already present,
3295          * helpful for QinQ too.
3296          */
3297         eh = mtod(mp, struct ether_vlan_header *);
3298         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3299                 etype = ntohs(eh->evl_proto);
3300                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3301         } else {
3302                 etype = ntohs(eh->evl_encap_proto);
3303                 ehdrlen = ETHER_HDR_LEN;
3304         }
3305
3306         /* Set the ether header length */
3307         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3308
3309         switch (etype) {
3310                 case ETHERTYPE_IP:
3311                         ip = (struct ip *)(mp->m_data + ehdrlen);
3312                         ip_hlen = ip->ip_hl << 2;
3313                         ipproto = ip->ip_p;
3314                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3315                         break;
3316                 case ETHERTYPE_IPV6:
3317                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3318                         ip_hlen = sizeof(struct ip6_hdr);
3319                         /* XXX-BZ this will go badly in case of ext hdrs. */
3320                         ipproto = ip6->ip6_nxt;
3321                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3322                         break;
3323                 default:
3324                         offload = FALSE;
3325                         break;
3326         }
3327
3328         vlan_macip_lens |= ip_hlen;
3329         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3330
3331         switch (ipproto) {
3332                 case IPPROTO_TCP:
3333                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3334                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3335                         break;
3336
3337                 case IPPROTO_UDP:
3338                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3339                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3340                         break;
3341
3342 #if 0
3343                 case IPPROTO_SCTP:
3344                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3345                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3346                         break;
3347 #endif
3348                 default:
3349                         offload = FALSE;
3350                         break;
3351         }
3352
3353         /* Now copy bits into descriptor */
3354         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3355         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3356         TXD->seqnum_seed = htole32(0);
3357         TXD->mss_l4len_idx = htole32(0);
3358
3359         tx_buffer->m_head = NULL;
3360         tx_buffer->eop_index = -1;
3361
3362         /* We've consumed the first desc, adjust counters */
3363         if (++ctxd == adapter->num_tx_desc)
3364                 ctxd = 0;
3365         txr->next_avail_desc = ctxd;
3366         --txr->tx_avail;
3367
3368         return (offload);
3369 }
3370
3371 /**********************************************************************
3372  *
3373  *  Setup work for hardware segmentation offload (TSO) on
3374  *  adapters using advanced tx descriptors
3375  *
3376  **********************************************************************/
3377 #if 0   /* NET_TSO */
3378 static bool
3379 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3380     u32 *olinfo_status)
3381 {
3382         struct adapter *adapter = txr->adapter;
3383         struct ixgbe_adv_tx_context_desc *TXD;
3384         struct ixgbe_tx_buf        *tx_buffer;
3385 #ifdef NET_VLAN
3386         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3387         u16 vtag = 0, eh_type;
3388 #else
3389         u16 eh_type;
3390         u32 type_tucmd_mlhl = 0;
3391 #endif
3392         u32 mss_l4len_idx = 0, len;
3393         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3394         struct ether_vlan_header *eh;
3395 #ifdef INET6
3396         struct ip6_hdr *ip6;
3397 #endif
3398 #ifdef INET
3399         struct ip *ip;
3400 #endif
3401         struct tcphdr *th;
3402
3403
3404         /*
3405          * Determine where frame payload starts.
3406          * Jump over vlan headers if already present
3407          */
3408         eh = mtod(mp, struct ether_vlan_header *);
3409         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3410                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3411                 eh_type = eh->evl_proto;
3412         } else {
3413                 ehdrlen = ETHER_HDR_LEN;
3414                 eh_type = eh->evl_encap_proto;
3415         }
3416
3417         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3418         len = ehdrlen + sizeof(struct tcphdr);
3419         switch (ntohs(eh_type)) {
3420 #ifdef INET6
3421         case ETHERTYPE_IPV6:
3422                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3423                         return FALSE;
3424                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3425                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3426                 if (ip6->ip6_nxt != IPPROTO_TCP)
3427                         return FALSE;
3428                 ip_hlen = sizeof(struct ip6_hdr);
3429                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3430                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3431                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3432                 break;
3433 #endif
3434 #ifdef INET
3435         case ETHERTYPE_IP:
3436                 if (mp->m_len < len + sizeof(struct ip))
3437                         return FALSE;
3438                 ip = (struct ip *)(mp->m_data + ehdrlen);
3439                 if (ip->ip_p != IPPROTO_TCP)
3440                         return FALSE;
3441                 ip->ip_sum = 0;
3442                 ip_hlen = ip->ip_hl << 2;
3443                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3444                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3445                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3446                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3447                 /* Tell transmit desc to also do IPv4 checksum. */
3448                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3449                 break;
3450 #endif
3451         default:
3452                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3453                     __func__, ntohs(eh_type));
3454                 break;
3455         }
3456
3457         ctxd = txr->next_avail_desc;
3458         tx_buffer = &txr->tx_buffers[ctxd];
3459         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3460
3461         tcp_hlen = th->th_off << 2;
3462
3463         /* This is used in the transmit desc in encap */
3464         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3465
3466         /* VLAN MACLEN IPLEN */
3467 #ifdef NET_VLAN
3468         if (mp->m_flags & M_VLANTAG) {
3469                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3470                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3471         }
3472
3473         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3474         vlan_macip_lens |= ip_hlen;
3475         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3476 #endif
3477
3478         /* ADV DTYPE TUCMD */
3479         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3480         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3481         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3482
3483         /* MSS L4LEN IDX */
3484         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3485         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3486         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3487
3488         TXD->seqnum_seed = htole32(0);
3489         tx_buffer->m_head = NULL;
3490         tx_buffer->eop_index = -1;
3491
3492         if (++ctxd == adapter->num_tx_desc)
3493                 ctxd = 0;
3494
3495         txr->tx_avail--;
3496         txr->next_avail_desc = ctxd;
3497         return TRUE;
3498 }
3499 #endif
3500
3501 #ifdef IXGBE_FDIR
3502 /*
3503 ** This routine parses packet headers so that Flow
3504 ** Director can make a hashed filter table entry 
3505 ** allowing traffic flows to be identified and kept
3506 ** on the same cpu.  This would be a performance
3507 ** hit, but we only do it at IXGBE_FDIR_RATE of
3508 ** packets.
3509 */
3510 static void
3511 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3512 {
3513         struct adapter                  *adapter = txr->adapter;
3514         struct ix_queue                 *que;
3515         struct ip                       *ip;
3516         struct tcphdr                   *th;
3517         struct udphdr                   *uh;
3518         struct ether_vlan_header        *eh;
3519         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3520         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3521         int                             ehdrlen, ip_hlen;
3522         u16                             etype;
3523
3524         eh = mtod(mp, struct ether_vlan_header *);
3525         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3526                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3527                 etype = eh->evl_proto;
3528         } else {
3529                 ehdrlen = ETHER_HDR_LEN;
3530                 etype = eh->evl_encap_proto;
3531         }
3532
3533         /* Only handling IPv4 */
3534         if (etype != htons(ETHERTYPE_IP))
3535                 return;
3536
3537         ip = (struct ip *)(mp->m_data + ehdrlen);
3538         ip_hlen = ip->ip_hl << 2;
3539
3540         /* check if we're UDP or TCP */
3541         switch (ip->ip_p) {
3542         case IPPROTO_TCP:
3543                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3544                 /* src and dst are inverted */
3545                 common.port.dst ^= th->th_sport;
3546                 common.port.src ^= th->th_dport;
3547                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3548                 break;
3549         case IPPROTO_UDP:
3550                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3551                 /* src and dst are inverted */
3552                 common.port.dst ^= uh->uh_sport;
3553                 common.port.src ^= uh->uh_dport;
3554                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3555                 break;
3556         default:
3557                 return;
3558         }
3559
3560         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3561         if (mp->m_pkthdr.ether_vtag)
3562                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3563         else
3564                 common.flex_bytes ^= etype;
3565         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3566
3567         que = &adapter->queues[txr->me];
3568         /*
3569         ** This assumes the Rx queue and Tx
3570         ** queue are bound to the same CPU
3571         */
3572         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3573             input, common, que->msix);
3574 }
3575 #endif /* IXGBE_FDIR */
3576
3577 /**********************************************************************
3578  *
3579  *  Examine each tx_buffer in the used queue. If the hardware is done
3580  *  processing the packet then free associated resources. The
3581  *  tx_buffer is put back on the free queue.
3582  *
3583  **********************************************************************/
3584 static bool
3585 ixgbe_txeof(struct tx_ring *txr)
3586 {
3587         struct adapter  *adapter = txr->adapter;
3588         struct ifnet    *ifp = adapter->ifp;
3589         u32     first, last, done, processed;
3590         struct ixgbe_tx_buf *tx_buffer;
3591         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3592
3593         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3594
3595 #ifdef DEV_NETMAP
3596         if (ifp->if_capenable & IFCAP_NETMAP) {
3597                 struct netmap_adapter *na = NA(ifp);
3598                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3599
3600                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3601
3602                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3603                     BUS_DMASYNC_POSTREAD);
3604                 /*
3605                  * In netmap mode, all the work is done in the context
3606                  * of the client thread. Interrupt handlers only wake up
3607                  * clients, which may be sleeping on individual rings
3608                  * or on a global resource for all rings.
3609                  * To implement tx interrupt mitigation, we wake up the client
3610                  * thread roughly every half ring, even if the NIC interrupts
3611                  * more frequently. This is implemented as follows:
3612                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3613                  *   the slot that should wake up the thread (nkr_num_slots
3614                  *   means the user thread should not be woken up);
3615                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3616                  *   or the slot has the DD bit set.
3617                  *
3618                  * When the driver has separate locks, we need to
3619                  * release and re-acquire txlock to avoid deadlocks.
3620                  * XXX see if we can find a better way.
3621                  */
3622                 if (!netmap_mitigate ||
3623                     (kring->nr_kflags < kring->nkr_num_slots &&
3624                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3625                         kring->nr_kflags = kring->nkr_num_slots;
3626                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3627                         IXGBE_TX_UNLOCK(txr);
3628                         IXGBE_CORE_LOCK(adapter);
3629                         selwakeuppri(&na->tx_si, PI_NET);
3630                         IXGBE_CORE_UNLOCK(adapter);
3631                         IXGBE_TX_LOCK(txr);
3632                 }
3633                 return FALSE;
3634         }
3635 #endif /* DEV_NETMAP */
3636
3637         if (txr->tx_avail == adapter->num_tx_desc) {
3638                 txr->queue_status = IXGBE_QUEUE_IDLE;
3639                 return FALSE;
3640         }
3641
3642         processed = 0;
3643         first = txr->next_to_clean;
3644         tx_buffer = &txr->tx_buffers[first];
3645         /* For cleanup we just use legacy struct */
3646         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3647         last = tx_buffer->eop_index;
3648         if (last == -1)
3649                 return FALSE;
3650         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3651
3652         /*
3653         ** Get the index of the first descriptor
3654         ** BEYOND the EOP and call that 'done'.
3655         ** I do this so the comparison in the
3656         ** inner while loop below can be simple
3657         */
3658         if (++last == adapter->num_tx_desc) last = 0;
3659         done = last;
3660
3661         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3662             BUS_DMASYNC_POSTREAD);
3663         /*
3664         ** Only the EOP descriptor of a packet now has the DD
3665         ** bit set, this is what we look for...
3666         */
3667         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3668                 /* We clean the range of the packet */
3669                 while (first != done) {
3670                         tx_desc->upper.data = 0;
3671                         tx_desc->lower.data = 0;
3672                         tx_desc->buffer_addr = 0;
3673                         ++txr->tx_avail;
3674                         ++processed;
3675
3676                         if (tx_buffer->m_head) {
3677                                 txr->bytes +=
3678                                     tx_buffer->m_head->m_pkthdr.len;
3679                                 bus_dmamap_sync(txr->txtag,
3680                                     tx_buffer->map,
3681                                     BUS_DMASYNC_POSTWRITE);
3682                                 bus_dmamap_unload(txr->txtag,
3683                                     tx_buffer->map);
3684                                 m_freem(tx_buffer->m_head);
3685                                 tx_buffer->m_head = NULL;
3686                                 tx_buffer->map = NULL;
3687                         }
3688                         tx_buffer->eop_index = -1;
3689                         txr->watchdog_time = ticks;
3690
3691                         if (++first == adapter->num_tx_desc)
3692                                 first = 0;
3693
3694                         tx_buffer = &txr->tx_buffers[first];
3695                         tx_desc =
3696                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3697                 }
3698                 ++txr->packets;
3699                 ++ifp->if_opackets;
3700                 /* See if there is more work now */
3701                 last = tx_buffer->eop_index;
3702                 if (last != -1) {
3703                         eop_desc =
3704                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3705                         /* Get next done point */
3706                         if (++last == adapter->num_tx_desc) last = 0;
3707                         done = last;
3708                 } else
3709                         break;
3710         }
3711         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3712             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3713
3714         txr->next_to_clean = first;
3715
3716         /*
3717         ** Watchdog calculation, we know there's
3718         ** work outstanding or the first return
3719         ** would have been taken, so none processed
3720         ** for too long indicates a hang.
3721         */
3722         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3723                 txr->queue_status = IXGBE_QUEUE_HUNG;
3724
3725         /* With a minimum free clear the depleted state bit.  */
3726         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3727                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3728
3729         if (txr->tx_avail == adapter->num_tx_desc) {
3730                 txr->queue_status = IXGBE_QUEUE_IDLE;
3731                 return (FALSE);
3732         }
3733
3734         return TRUE;
3735 }
3736
3737 /*********************************************************************
3738  *
3739  *  Refresh mbuf buffers for RX descriptor rings
3740  *   - now keeps its own state so discards due to resource
3741  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3742  *     it just returns, keeping its placeholder, thus it can simply
3743  *     be recalled to try again.
3744  *
3745  **********************************************************************/
3746 static void
3747 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3748 {
3749         struct adapter          *adapter = rxr->adapter;
3750         bus_dma_segment_t       hseg[1];
3751         bus_dma_segment_t       pseg[1];
3752         struct ixgbe_rx_buf     *rxbuf;
3753         struct mbuf             *mh, *mp;
3754         int                     i, j, nsegs, error;
3755         bool                    refreshed = FALSE;
3756
3757         i = j = rxr->next_to_refresh;
3758         /* Control the loop with one beyond */
3759         if (++j == adapter->num_rx_desc)
3760                 j = 0;
3761
3762         while (j != limit) {
3763                 rxbuf = &rxr->rx_buffers[i];
3764                 if (rxr->hdr_split == FALSE)
3765                         goto no_split;
3766
3767                 if (rxbuf->m_head == NULL) {
3768                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3769                         if (mh == NULL)
3770                                 goto update;
3771                 } else
3772                         mh = rxbuf->m_head;
3773
3774                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3775                 mh->m_len = MHLEN;
3776                 mh->m_flags |= M_PKTHDR;
3777                 /* Get the memory mapping */
3778                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3779                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3780                 if (error != 0) {
3781                         kprintf("Refresh mbufs: hdr dmamap load"
3782                             " failure - %d\n", error);
3783                         m_free(mh);
3784                         rxbuf->m_head = NULL;
3785                         goto update;
3786                 }
3787                 rxbuf->m_head = mh;
3788                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3789                     BUS_DMASYNC_PREREAD);
3790                 rxr->rx_base[i].read.hdr_addr =
3791                     htole64(hseg[0].ds_addr);
3792
3793 no_split:
3794                 if (rxbuf->m_pack == NULL) {
3795                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3796                             M_PKTHDR, adapter->rx_mbuf_sz);
3797                         if (mp == NULL)
3798                                 goto update;
3799                 } else
3800                         mp = rxbuf->m_pack;
3801
3802                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3803                 /* Get the memory mapping */
3804                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3805                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3806                 if (error != 0) {
3807                         kprintf("Refresh mbufs: payload dmamap load"
3808                             " failure - %d\n", error);
3809                         m_free(mp);
3810                         rxbuf->m_pack = NULL;
3811                         goto update;
3812                 }
3813                 rxbuf->m_pack = mp;
3814                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3815                     BUS_DMASYNC_PREREAD);
3816                 rxr->rx_base[i].read.pkt_addr =
3817                     htole64(pseg[0].ds_addr);
3818
3819                 refreshed = TRUE;
3820                 /* Next is precalculated */
3821                 i = j;
3822                 rxr->next_to_refresh = i;
3823                 if (++j == adapter->num_rx_desc)
3824                         j = 0;
3825         }
3826 update:
3827         if (refreshed) /* Update hardware tail index */
3828                 IXGBE_WRITE_REG(&adapter->hw,
3829                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3830         return;
3831 }
3832
3833 /*********************************************************************
3834  *
3835  *  Allocate memory for rx_buffer structures. Since we use one
3836  *  rx_buffer per received packet, the maximum number of rx_buffer's
3837  *  that we'll need is equal to the number of receive descriptors
3838  *  that we've allocated.
3839  *
3840  **********************************************************************/
3841 static int
3842 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3843 {
3844         struct  adapter         *adapter = rxr->adapter;
3845         device_t                dev = adapter->dev;
3846         struct ixgbe_rx_buf     *rxbuf;
3847         int                     i, bsize, error;
3848
3849         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3850         if (!(rxr->rx_buffers =
3851             (struct ixgbe_rx_buf *) kmalloc(bsize,
3852             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3853                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3854                 error = ENOMEM;
3855                 goto fail;
3856         }
3857
3858         if ((error = bus_dma_tag_create(NULL,   /* parent */
3859                                    1, 0,        /* alignment, bounds */
3860                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3861                                    BUS_SPACE_MAXADDR,   /* highaddr */
3862                                    NULL, NULL,          /* filter, filterarg */
3863                                    MSIZE,               /* maxsize */
3864                                    1,                   /* nsegments */
3865                                    MSIZE,               /* maxsegsize */
3866                                    0,                   /* flags */
3867                                    &rxr->htag))) {
3868                 device_printf(dev, "Unable to create RX DMA tag\n");
3869                 goto fail;
3870         }
3871
3872         if ((error = bus_dma_tag_create(NULL,   /* parent */
3873                                    1, 0,        /* alignment, bounds */
3874                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3875                                    BUS_SPACE_MAXADDR,   /* highaddr */
3876                                    NULL, NULL,          /* filter, filterarg */
3877                                    MJUM16BYTES,         /* maxsize */
3878                                    1,                   /* nsegments */
3879                                    MJUM16BYTES,         /* maxsegsize */
3880                                    0,                   /* flags */
3881                                    &rxr->ptag))) {
3882                 device_printf(dev, "Unable to create RX DMA tag\n");
3883                 goto fail;
3884         }
3885
3886         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3887                 rxbuf = &rxr->rx_buffers[i];
3888                 error = bus_dmamap_create(rxr->htag,
3889                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3890                 if (error) {
3891                         device_printf(dev, "Unable to create RX head map\n");
3892                         goto fail;
3893                 }
3894                 error = bus_dmamap_create(rxr->ptag,
3895                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3896                 if (error) {
3897                         device_printf(dev, "Unable to create RX pkt map\n");
3898                         goto fail;
3899                 }
3900         }
3901
3902         return (0);
3903
3904 fail:
3905         /* Frees all, but can handle partial completion */
3906         ixgbe_free_receive_structures(adapter);
3907         return (error);
3908 }
3909
3910 /*
3911 ** Used to detect a descriptor that has
3912 ** been merged by Hardware RSC.
3913 */
3914 static inline u32
3915 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3916 {
3917         return (le32toh(rx->wb.lower.lo_dword.data) &
3918             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3919 }
3920
3921 /*********************************************************************
3922  *
3923  *  Initialize Hardware RSC (LRO) feature on 82599
3924  *  for an RX ring, this is toggled by the LRO capability
3925  *  even though it is transparent to the stack.
3926  *
3927  **********************************************************************/
3928 #if 0   /* NET_LRO */
3929 static void
3930 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3931 {
3932         struct  adapter         *adapter = rxr->adapter;
3933         struct  ixgbe_hw        *hw = &adapter->hw;
3934         u32                     rscctrl, rdrxctl;
3935
3936         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3937         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3938 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3939         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3940 #endif /* DEV_NETMAP */
3941         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3942         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3943         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3944
3945         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3946         rscctrl |= IXGBE_RSCCTL_RSCEN;
3947         /*
3948         ** Limit the total number of descriptors that
3949         ** can be combined, so it does not exceed 64K
3950         */
3951         if (adapter->rx_mbuf_sz == MCLBYTES)
3952                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3953         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3954                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3955         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3956                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3957         else  /* Using 16K cluster */
3958                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3959
3960         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3961
3962         /* Enable TCP header recognition */
3963         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3964             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3965             IXGBE_PSRTYPE_TCPHDR));
3966
3967         /* Disable RSC for ACK packets */
3968         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3969             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3970
3971         rxr->hw_rsc = TRUE;
3972 }
3973 #endif
3974
3975 static void     
3976 ixgbe_free_receive_ring(struct rx_ring *rxr)
3977
3978         struct  adapter         *adapter;
3979         struct ixgbe_rx_buf       *rxbuf;
3980         int i;
3981
3982         adapter = rxr->adapter;
3983         for (i = 0; i < adapter->num_rx_desc; i++) {
3984                 rxbuf = &rxr->rx_buffers[i];
3985                 if (rxbuf->m_head != NULL) {
3986                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3987                             BUS_DMASYNC_POSTREAD);
3988                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3989                         rxbuf->m_head->m_flags |= M_PKTHDR;
3990                         m_freem(rxbuf->m_head);
3991                 }
3992                 if (rxbuf->m_pack != NULL) {
3993                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3994                             BUS_DMASYNC_POSTREAD);
3995                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3996                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3997                         m_freem(rxbuf->m_pack);
3998                 }
3999                 rxbuf->m_head = NULL;
4000                 rxbuf->m_pack = NULL;
4001         }
4002 }
4003
4004
4005 /*********************************************************************
4006  *
4007  *  Initialize a receive ring and its buffers.
4008  *
4009  **********