2 * Copyright (c) 2001-2014, Intel Corporation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the Intel Corporation nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 #include <sys/taskqueue.h>
53 #include <net/ethernet.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 #include <net/ifq_var.h>
59 #include <net/if_ringmap.h>
60 #include <net/toeplitz.h>
61 #include <net/toeplitz2.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/vlan/if_vlan_ether.h>
64 #include <net/if_poll.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
70 #include <bus/pci/pcivar.h>
71 #include <bus/pci/pcireg.h>
73 #include <dev/netif/ix/ixgbe_api.h>
74 #include <dev/netif/ix/if_ix.h>
76 #define IX_IFM_DEFAULT (IFM_ETHER | IFM_AUTO)
79 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
81 if (sc->rss_debug >= lvl) \
82 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
84 #else /* !IX_RSS_DEBUG */
85 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0)
86 #endif /* IX_RSS_DEBUG */
88 #define IX_NAME "Intel(R) PRO/10GbE "
89 #define IX_DEVICE(id) \
90 { IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
91 #define IX_DEVICE_NULL { 0, 0, NULL }
93 static struct ix_device {
98 IX_DEVICE(82598AF_DUAL_PORT),
99 IX_DEVICE(82598AF_SINGLE_PORT),
100 IX_DEVICE(82598EB_CX4),
104 IX_DEVICE(82598_DA_DUAL_PORT),
105 IX_DEVICE(82598_CX4_DUAL_PORT),
106 IX_DEVICE(82598EB_XF_LR),
107 IX_DEVICE(82598_SR_DUAL_PORT_EM),
108 IX_DEVICE(82598EB_SFP_LOM),
109 IX_DEVICE(82599_KX4),
110 IX_DEVICE(82599_KX4_MEZZ),
111 IX_DEVICE(82599_SFP),
112 IX_DEVICE(82599_XAUI_LOM),
113 IX_DEVICE(82599_CX4),
114 IX_DEVICE(82599_T3_LOM),
115 IX_DEVICE(82599_COMBO_BACKPLANE),
116 IX_DEVICE(82599_BACKPLANE_FCOE),
117 IX_DEVICE(82599_SFP_SF2),
118 IX_DEVICE(82599_SFP_FCOE),
119 IX_DEVICE(82599EN_SFP),
120 IX_DEVICE(82599_SFP_SF_QP),
121 IX_DEVICE(82599_QSFP_SF_QP),
125 IX_DEVICE(X550EM_X_KR),
126 IX_DEVICE(X550EM_X_KX4),
127 IX_DEVICE(X550EM_X_10G_T),
129 /* required last entry */
133 static int ix_probe(device_t);
134 static int ix_attach(device_t);
135 static int ix_detach(device_t);
136 static int ix_shutdown(device_t);
138 static void ix_serialize(struct ifnet *, enum ifnet_serialize);
139 static void ix_deserialize(struct ifnet *, enum ifnet_serialize);
140 static int ix_tryserialize(struct ifnet *, enum ifnet_serialize);
142 static void ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
145 static void ix_start(struct ifnet *, struct ifaltq_subque *);
146 static void ix_watchdog(struct ifaltq_subque *);
147 static int ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
148 static void ix_init(void *);
149 static void ix_stop(struct ix_softc *);
150 static void ix_media_status(struct ifnet *, struct ifmediareq *);
151 static int ix_media_change(struct ifnet *);
152 static void ix_timer(void *);
154 static void ix_npoll(struct ifnet *, struct ifpoll_info *);
155 static void ix_npoll_rx(struct ifnet *, void *, int);
156 static void ix_npoll_rx_direct(struct ifnet *, void *, int);
157 static void ix_npoll_tx(struct ifnet *, void *, int);
158 static void ix_npoll_status(struct ifnet *);
161 static void ix_add_sysctl(struct ix_softc *);
162 static void ix_add_intr_rate_sysctl(struct ix_softc *, int,
163 const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
164 static int ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
165 static int ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS);
166 static int ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
167 static int ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
168 static int ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
169 static int ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
170 static int ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
171 static int ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
172 static int ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
173 static int ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
174 static int ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
176 static void ix_add_hw_stats(struct ix_softc *);
179 static void ix_watchdog_reset(struct ix_softc *);
180 static void ix_watchdog_task(void *, int);
181 static void ix_sync_netisr(struct ix_softc *, int);
182 static void ix_slot_info(struct ix_softc *);
183 static int ix_alloc_rings(struct ix_softc *);
184 static void ix_free_rings(struct ix_softc *);
185 static void ix_setup_ifp(struct ix_softc *);
186 static void ix_setup_serialize(struct ix_softc *);
187 static void ix_set_ring_inuse(struct ix_softc *, boolean_t);
188 static void ix_set_timer_cpuid(struct ix_softc *, boolean_t);
189 static void ix_update_stats(struct ix_softc *);
191 static void ix_set_promisc(struct ix_softc *);
192 static void ix_set_multi(struct ix_softc *);
193 static void ix_set_vlan(struct ix_softc *);
194 static uint8_t *ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
195 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
196 static const char *ix_ifmedia2str(int);
197 static const char *ix_fc2str(enum ixgbe_fc_mode);
199 static void ix_get_txring_cnt(const struct ix_softc *, int *, int *);
200 static int ix_get_txring_inuse(const struct ix_softc *, boolean_t);
201 static void ix_init_tx_ring(struct ix_tx_ring *);
202 static void ix_free_tx_ring(struct ix_tx_ring *);
203 static int ix_create_tx_ring(struct ix_tx_ring *);
204 static void ix_destroy_tx_ring(struct ix_tx_ring *, int);
205 static void ix_init_tx_unit(struct ix_softc *);
206 static int ix_encap(struct ix_tx_ring *, struct mbuf **,
208 static int ix_tx_ctx_setup(struct ix_tx_ring *,
209 const struct mbuf *, uint32_t *, uint32_t *);
210 static int ix_tso_ctx_setup(struct ix_tx_ring *,
211 const struct mbuf *, uint32_t *, uint32_t *);
212 static void ix_txeof(struct ix_tx_ring *, int);
213 static void ix_txgc(struct ix_tx_ring *);
214 static void ix_txgc_timer(void *);
216 static void ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
217 static int ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
218 static int ix_init_rx_ring(struct ix_rx_ring *);
219 static void ix_free_rx_ring(struct ix_rx_ring *);
220 static int ix_create_rx_ring(struct ix_rx_ring *);
221 static void ix_destroy_rx_ring(struct ix_rx_ring *, int);
222 static void ix_init_rx_unit(struct ix_softc *, boolean_t);
224 static void ix_setup_hw_rsc(struct ix_rx_ring *);
226 static int ix_newbuf(struct ix_rx_ring *, int, boolean_t);
227 static void ix_rxeof(struct ix_rx_ring *, int);
228 static void ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
229 static void ix_enable_rx_drop(struct ix_softc *);
230 static void ix_disable_rx_drop(struct ix_softc *);
232 static void ix_alloc_msix(struct ix_softc *);
233 static void ix_free_msix(struct ix_softc *, boolean_t);
234 static void ix_setup_msix_eims(const struct ix_softc *, int,
235 uint32_t *, uint32_t *);
236 static int ix_alloc_intr(struct ix_softc *);
237 static void ix_free_intr(struct ix_softc *);
238 static int ix_setup_intr(struct ix_softc *);
239 static void ix_teardown_intr(struct ix_softc *, int);
240 static void ix_enable_intr(struct ix_softc *);
241 static void ix_disable_intr(struct ix_softc *);
242 static void ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
243 static void ix_set_eitr(struct ix_softc *, int, int);
244 static void ix_intr_status(struct ix_softc *, uint32_t);
245 static void ix_intr(void *);
246 static void ix_msix_rxtx(void *);
247 static void ix_msix_rx(void *);
248 static void ix_msix_tx(void *);
249 static void ix_msix_status(void *);
251 static void ix_config_link(struct ix_softc *);
252 static boolean_t ix_sfp_probe(struct ix_softc *);
253 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
254 static void ix_update_link_status(struct ix_softc *);
255 static void ix_handle_link(struct ix_softc *);
256 static void ix_handle_mod(struct ix_softc *);
257 static void ix_handle_msf(struct ix_softc *);
258 static void ix_handle_phy(struct ix_softc *);
259 static int ix_powerdown(struct ix_softc *);
260 static void ix_config_flowctrl(struct ix_softc *);
261 static void ix_config_dmac(struct ix_softc *);
262 static void ix_init_media(struct ix_softc *);
264 /* XXX Missing shared code prototype */
265 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
267 static device_method_t ix_methods[] = {
268 /* Device interface */
269 DEVMETHOD(device_probe, ix_probe),
270 DEVMETHOD(device_attach, ix_attach),
271 DEVMETHOD(device_detach, ix_detach),
272 DEVMETHOD(device_shutdown, ix_shutdown),
276 static driver_t ix_driver = {
279 sizeof(struct ix_softc)
282 static devclass_t ix_devclass;
284 DECLARE_DUMMY_MODULE(if_ix);
285 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
287 static int ix_msi_enable = 1;
288 static int ix_msix_enable = 1;
289 static int ix_rxr = 0;
290 static int ix_txr = 0;
291 static int ix_txd = IX_PERF_TXD;
292 static int ix_rxd = IX_PERF_RXD;
293 static int ix_unsupported_sfp = 0;
294 static int ix_direct_input = 1;
296 static char ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_NONE;
298 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
299 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
300 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
301 TUNABLE_INT("hw.ix.txr", &ix_txr);
302 TUNABLE_INT("hw.ix.txd", &ix_txd);
303 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
304 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
305 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
306 TUNABLE_INT("hw.ix.direct_input", &ix_direct_input);
309 * Smart speed setting, default to on. This only works
310 * as a compile option right now as its during attach,
311 * set this to 'ixgbe_smart_speed_off' to disable.
313 static const enum ixgbe_smart_speed ix_smart_speed =
314 ixgbe_smart_speed_on;
317 ix_try_txgc(struct ix_tx_ring *txr, int8_t dec)
320 if (txr->tx_running > 0) {
321 txr->tx_running -= dec;
322 if (txr->tx_running <= 0 && txr->tx_nmbuf &&
323 txr->tx_avail < txr->tx_ndesc &&
324 txr->tx_avail + txr->tx_intr_nsegs > txr->tx_ndesc)
330 ix_txgc_timer(void *xtxr)
332 struct ix_tx_ring *txr = xtxr;
333 struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
335 if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
336 (IFF_RUNNING | IFF_UP))
339 if (!lwkt_serialize_try(&txr->tx_serialize))
342 if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
343 (IFF_RUNNING | IFF_UP)) {
344 lwkt_serialize_exit(&txr->tx_serialize);
347 ix_try_txgc(txr, IX_TX_RUNNING_DEC);
349 lwkt_serialize_exit(&txr->tx_serialize);
351 callout_reset(&txr->tx_gc_timer, 1, ix_txgc_timer, txr);
355 ix_tx_intr(struct ix_tx_ring *txr, int hdr)
359 if (!ifsq_is_empty(txr->tx_ifsq))
360 ifsq_devstart(txr->tx_ifsq);
364 ix_free_txbuf(struct ix_tx_ring *txr, struct ix_tx_buf *txbuf)
367 KKASSERT(txbuf->m_head != NULL);
368 KKASSERT(txr->tx_nmbuf > 0);
371 bus_dmamap_unload(txr->tx_tag, txbuf->map);
372 m_freem(txbuf->m_head);
373 txbuf->m_head = NULL;
377 ix_probe(device_t dev)
379 const struct ix_device *d;
382 vid = pci_get_vendor(dev);
383 did = pci_get_device(dev);
385 for (d = ix_devices; d->desc != NULL; ++d) {
386 if (vid == d->vid && did == d->did) {
387 device_set_desc(dev, d->desc);
395 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
398 switch (sc->hw.mac.type) {
400 case ixgbe_mac_X550EM_x:
401 case ixgbe_mac_X550EM_a:
402 *ring_cntmax = IX_MAX_RXRING_X550;
406 *ring_cntmax = IX_MAX_RXRING;
409 *ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
413 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
416 switch (sc->hw.mac.type) {
417 case ixgbe_mac_82598EB:
418 *ring_cntmax = IX_MAX_TXRING_82598;
421 case ixgbe_mac_82599EB:
422 *ring_cntmax = IX_MAX_TXRING_82599;
426 *ring_cntmax = IX_MAX_TXRING_X540;
430 case ixgbe_mac_X550EM_x:
431 case ixgbe_mac_X550EM_a:
432 *ring_cntmax = IX_MAX_TXRING_X550;
436 *ring_cntmax = IX_MAX_TXRING;
439 *ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
443 ix_attach(device_t dev)
445 struct ix_softc *sc = device_get_softc(dev);
447 int error, ring_cnt, ring_cntmax;
450 char flowctrl[IFM_ETH_FC_STRLEN];
452 sc->dev = sc->osdep.dev = dev;
455 if_initname(&sc->arpcom.ac_if, device_get_name(dev),
456 device_get_unit(dev));
457 ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
458 ix_media_change, ix_media_status);
460 /* Save frame size */
461 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
463 sc->direct_input = ix_direct_input;
464 TASK_INIT(&sc->wdog_task, 0, ix_watchdog_task, sc);
466 callout_init_mp(&sc->timer);
467 lwkt_serialize_init(&sc->main_serialize);
470 * Save off the information about this board
472 hw->vendor_id = pci_get_vendor(dev);
473 hw->device_id = pci_get_device(dev);
474 hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
475 hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
476 hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
478 ixgbe_set_mac_type(hw);
480 /* Pick up the 82599 */
481 if (hw->mac.type != ixgbe_mac_82598EB)
482 hw->phy.smart_speed = ix_smart_speed;
484 /* Enable bus mastering */
485 pci_enable_busmaster(dev);
490 sc->mem_rid = PCIR_BAR(0);
491 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 &sc->mem_rid, RF_ACTIVE);
493 if (sc->mem_res == NULL) {
494 device_printf(dev, "Unable to allocate bus resource: memory\n");
499 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
500 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
502 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
503 sc->hw.back = &sc->osdep;
506 * Configure total supported RX/TX ring count
508 ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
509 sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
510 ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
511 sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
512 if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
514 sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
515 sc->rx_ring_inuse = sc->rx_ring_cnt;
516 sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
517 sc->tx_ring_inuse = sc->tx_ring_cnt;
519 /* Allocate TX/RX rings */
520 error = ix_alloc_rings(sc);
524 /* Allocate interrupt */
525 error = ix_alloc_intr(sc);
529 /* Setup serializes */
530 ix_setup_serialize(sc);
532 /* Allocate multicast array memory. */
533 sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
536 /* Initialize the shared code */
537 hw->allow_unsupported_sfp = ix_unsupported_sfp;
538 error = ixgbe_init_shared_code(hw);
539 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
541 * No optics in this port; ask timer routine
542 * to probe for later insertion.
544 sc->sfp_probe = TRUE;
546 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
547 device_printf(dev, "Unsupported SFP+ module detected!\n");
551 device_printf(dev, "Unable to initialize the shared code\n");
556 /* Make sure we have a good EEPROM before we read from it */
557 if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
558 device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
563 error = ixgbe_init_hw(hw);
564 if (error == IXGBE_ERR_EEPROM_VERSION) {
565 device_printf(dev, "Pre-production device detected\n");
566 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
567 device_printf(dev, "Unsupported SFP+ Module\n");
570 } else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
571 device_printf(dev, "No SFP+ Module found\n");
574 sc->ifm_media = IX_IFM_DEFAULT;
575 /* Get default flow control settings */
576 device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
578 sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
579 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
581 /* Setup OS specific network interface */
584 /* Add sysctl tree */
587 error = ix_setup_intr(sc);
589 ether_ifdetach(&sc->arpcom.ac_if);
593 /* Initialize statistics */
596 /* Check PCIE slot type/speed/width */
599 /* Save initial wake up filter configuration */
600 sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
602 /* Let hardware know driver is loaded */
603 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
604 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
605 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
614 ix_detach(device_t dev)
616 struct ix_softc *sc = device_get_softc(dev);
618 if (device_is_attached(dev)) {
619 struct ifnet *ifp = &sc->arpcom.ac_if;
622 ix_sync_netisr(sc, IFF_UP);
623 taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
625 ifnet_serialize_all(ifp);
628 ix_teardown_intr(sc, sc->intr_cnt);
630 ifnet_deserialize_all(ifp);
632 callout_terminate(&sc->timer);
635 /* Let hardware know driver is unloading */
636 ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
637 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
638 IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
641 ifmedia_removeall(&sc->media);
642 bus_generic_detach(dev);
646 if (sc->msix_mem_res != NULL) {
647 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
650 if (sc->mem_res != NULL) {
651 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
658 kfree(sc->mta, M_DEVBUF);
659 if (sc->serializes != NULL)
660 kfree(sc->serializes, M_DEVBUF);
662 if (sc->rx_rmap != NULL)
663 if_ringmap_free(sc->rx_rmap);
664 if (sc->rx_rmap_intr != NULL)
665 if_ringmap_free(sc->rx_rmap_intr);
666 if (sc->tx_rmap != NULL)
667 if_ringmap_free(sc->tx_rmap);
668 if (sc->tx_rmap_intr != NULL)
669 if_ringmap_free(sc->tx_rmap_intr);
675 ix_shutdown(device_t dev)
677 struct ix_softc *sc = device_get_softc(dev);
678 struct ifnet *ifp = &sc->arpcom.ac_if;
680 ix_sync_netisr(sc, IFF_UP);
681 taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
683 ifnet_serialize_all(ifp);
685 ifnet_deserialize_all(ifp);
691 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
693 struct ix_softc *sc = ifp->if_softc;
694 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
698 KKASSERT(txr->tx_ifsq == ifsq);
699 ASSERT_SERIALIZED(&txr->tx_serialize);
701 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
704 if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
709 while (!ifsq_is_empty(ifsq)) {
712 if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
713 ifsq_set_oactive(ifsq);
714 txr->tx_watchdog.wd_timer = 5;
718 m_head = ifsq_dequeue(ifsq);
722 if (ix_encap(txr, &m_head, &nsegs, &idx)) {
723 IFNET_STAT_INC(ifp, oerrors, 1);
728 * TX interrupt are aggressively aggregated, so increasing
729 * opackets at TX interrupt time will make the opackets
730 * statistics vastly inaccurate; we do the opackets increment
733 IFNET_STAT_INC(ifp, opackets, 1);
735 if (nsegs >= txr->tx_wreg_nsegs) {
736 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
741 ETHER_BPF_MTAP(ifp, m_head);
744 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
745 txr->tx_running = IX_TX_RUNNING;
749 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
751 struct ix_softc *sc = ifp->if_softc;
752 struct ifreq *ifr = (struct ifreq *) data;
753 int error = 0, mask, reinit;
755 ASSERT_IFNET_SERIALIZED_ALL(ifp);
759 if (ifr->ifr_mtu > IX_MAX_MTU) {
762 ifp->if_mtu = ifr->ifr_mtu;
763 sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
769 if (ifp->if_flags & IFF_UP) {
770 if (ifp->if_flags & IFF_RUNNING) {
771 if ((ifp->if_flags ^ sc->if_flags) &
772 (IFF_PROMISC | IFF_ALLMULTI))
777 } else if (ifp->if_flags & IFF_RUNNING) {
780 sc->if_flags = ifp->if_flags;
785 if (ifp->if_flags & IFF_RUNNING) {
789 if ((ifp->if_flags & IFF_NPOLLING) == 0)
797 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
802 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
803 if (mask & IFCAP_RXCSUM) {
804 ifp->if_capenable ^= IFCAP_RXCSUM;
807 if (mask & IFCAP_VLAN_HWTAGGING) {
808 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
811 if (mask & IFCAP_TXCSUM) {
812 ifp->if_capenable ^= IFCAP_TXCSUM;
813 if (ifp->if_capenable & IFCAP_TXCSUM)
814 ifp->if_hwassist |= CSUM_OFFLOAD;
816 ifp->if_hwassist &= ~CSUM_OFFLOAD;
818 if (mask & IFCAP_TSO) {
819 ifp->if_capenable ^= IFCAP_TSO;
820 if (ifp->if_capenable & IFCAP_TSO)
821 ifp->if_hwassist |= CSUM_TSO;
823 ifp->if_hwassist &= ~CSUM_TSO;
825 if (mask & IFCAP_RSS)
826 ifp->if_capenable ^= IFCAP_RSS;
827 if (reinit && (ifp->if_flags & IFF_RUNNING))
834 struct ixgbe_i2c_req i2c;
835 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
838 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
842 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
843 i2c.dev_addr, i2c.data);
844 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
850 error = ether_ioctl(ifp, command, data);
856 #define IXGBE_MHADD_MFS_SHIFT 16
861 struct ix_softc *sc = xsc;
862 struct ifnet *ifp = &sc->arpcom.ac_if;
863 struct ixgbe_hw *hw = &sc->hw;
864 uint32_t gpie, rxctrl;
868 ASSERT_IFNET_SERIALIZED_ALL(ifp);
874 if (ifp->if_flags & IFF_NPOLLING)
878 /* Configure # of used RX/TX rings */
879 ix_set_ring_inuse(sc, polling);
880 ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
882 /* Get the latest mac address, User can use a LAA */
883 bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
884 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
885 hw->addr_ctrl.rar_used_count = 1;
887 /* Prepare transmit descriptors and buffers */
888 for (i = 0; i < sc->tx_ring_inuse; ++i)
889 ix_init_tx_ring(&sc->tx_rings[i]);
894 /* Setup Multicast table */
897 /* Prepare receive descriptors and buffers */
898 for (i = 0; i < sc->rx_ring_inuse; ++i) {
899 error = ix_init_rx_ring(&sc->rx_rings[i]);
901 if_printf(ifp, "Could not initialize RX ring%d\n", i);
907 /* Configure RX settings */
908 ix_init_rx_unit(sc, polling);
910 gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
912 /* Enable Fan Failure Interrupt */
913 gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
915 /* Add for Module detection */
916 if (hw->mac.type == ixgbe_mac_82599EB)
917 gpie |= IXGBE_SDP2_GPIEN;
920 * Thermal Failure Detection (X540)
921 * Link Detection (X552)
923 if (hw->mac.type == ixgbe_mac_X540 ||
924 hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
925 hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
926 gpie |= IXGBE_SDP0_GPIEN_X540;
928 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
929 /* Enable Enhanced MSIX mode */
930 gpie |= IXGBE_GPIE_MSIX_MODE;
931 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
934 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
937 if (ifp->if_mtu > ETHERMTU) {
940 /* aka IXGBE_MAXFRS on 82599 and newer */
941 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
942 mhadd &= ~IXGBE_MHADD_MFS_MASK;
943 mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
944 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
950 for (i = 0; i < sc->tx_ring_inuse; ++i) {
953 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
954 txdctl |= IXGBE_TXDCTL_ENABLE;
957 * Set WTHRESH to 0, since TX head write-back is used
959 txdctl &= ~(0x7f << 16);
962 * When the internal queue falls below PTHRESH (32),
963 * start prefetching as long as there are at least
964 * HTHRESH (1) buffers ready. The values are taken
965 * from the Intel linux driver 3.8.21.
966 * Prefetching enables tx line rate even with 1 queue.
968 txdctl |= (32 << 0) | (1 << 8);
969 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
975 for (i = 0; i < sc->rx_ring_inuse; ++i) {
979 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
980 if (hw->mac.type == ixgbe_mac_82598EB) {
989 rxdctl |= IXGBE_RXDCTL_ENABLE;
990 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
991 for (k = 0; k < 10; ++k) {
992 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
999 IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
1000 sc->rx_rings[0].rx_ndesc - 1);
1003 /* Enable Receive engine */
1004 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1005 if (hw->mac.type == ixgbe_mac_82598EB)
1006 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1007 rxctrl |= IXGBE_RXCTRL_RXEN;
1008 ixgbe_enable_rx_dma(hw, rxctrl);
1010 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1011 const struct ix_tx_ring *txr = &sc->tx_rings[i];
1013 if (txr->tx_intr_vec >= 0) {
1014 ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
1015 } else if (!polling) {
1017 * Unconfigured TX interrupt vector could only
1020 KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
1021 ("TX intr vector is not set"));
1023 if_printf(ifp, "IVAR skips TX ring %d\n", i);
1026 for (i = 0; i < sc->rx_ring_inuse; ++i) {
1027 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
1029 if (polling && rxr->rx_intr_vec < 0)
1032 KKASSERT(rxr->rx_intr_vec >= 0);
1033 ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
1034 if (rxr->rx_txr != NULL) {
1036 * Piggyback the TX ring interrupt onto the RX
1037 * ring interrupt vector.
1039 KASSERT(rxr->rx_txr->tx_intr_vec < 0,
1040 ("piggybacked TX ring configured intr vector"));
1041 ix_set_ivar(sc, rxr->rx_txr->tx_idx,
1042 rxr->rx_intr_vec, 1);
1044 if_printf(ifp, "IVAR RX ring %d piggybacks "
1045 "TX ring %u\n", i, rxr->rx_txr->tx_idx);
1049 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
1050 /* Set up status MSI-X vector; it is using fixed entry 1 */
1051 ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
1053 /* Set up auto-mask for TX and RX rings */
1054 if (hw->mac.type == ixgbe_mac_82598EB) {
1055 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1057 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1058 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1061 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1063 for (i = 0; i < sc->intr_cnt; ++i)
1064 ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1067 * Check on any SFP devices that need to be kick-started
1069 if (hw->phy.type == ixgbe_phy_none) {
1070 error = hw->phy.ops.identify(hw);
1071 if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1073 "Unsupported SFP+ module type was detected.\n");
1079 /* Config/Enable Link */
1082 /* Hardware Packet Buffer & Flow Control setup */
1083 ix_config_flowctrl(sc);
1085 /* Initialize the FC settings */
1088 /* Set up VLAN support and filter */
1091 /* Setup DMA Coalescing */
1095 * Only enable interrupts if we are not polling, make sure
1096 * they are off otherwise.
1099 ix_disable_intr(sc);
1103 ifp->if_flags |= IFF_RUNNING;
1104 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1105 struct ix_tx_ring *txr = &sc->tx_rings[i];
1107 ifsq_clr_oactive(txr->tx_ifsq);
1108 ifsq_watchdog_start(&txr->tx_watchdog);
1111 callout_reset_bycpu(&txr->tx_gc_timer, 1,
1112 ix_txgc_timer, txr, txr->tx_intr_cpuid);
1116 ix_set_timer_cpuid(sc, polling);
1117 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1123 struct ix_softc *sc = xsc;
1124 struct ixgbe_hw *hw = &sc->hw;
1127 ASSERT_SERIALIZED(&sc->main_serialize);
1129 eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1131 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1135 if (eicr & IX_RX0_INTR_MASK) {
1136 struct ix_rx_ring *rxr = &sc->rx_rings[0];
1138 lwkt_serialize_enter(&rxr->rx_serialize);
1140 lwkt_serialize_exit(&rxr->rx_serialize);
1142 if (eicr & IX_RX1_INTR_MASK) {
1143 struct ix_rx_ring *rxr;
1145 KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1146 rxr = &sc->rx_rings[1];
1148 lwkt_serialize_enter(&rxr->rx_serialize);
1150 lwkt_serialize_exit(&rxr->rx_serialize);
1153 if (eicr & IX_TX_INTR_MASK) {
1154 struct ix_tx_ring *txr = &sc->tx_rings[0];
1156 lwkt_serialize_enter(&txr->tx_serialize);
1157 ix_tx_intr(txr, *(txr->tx_hdr));
1158 lwkt_serialize_exit(&txr->tx_serialize);
1161 if (__predict_false(eicr & IX_EICR_STATUS))
1162 ix_intr_status(sc, eicr);
1164 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1168 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1170 struct ix_softc *sc = ifp->if_softc;
1171 struct ifmedia *ifm = &sc->media;
1174 ix_update_link_status(sc);
1176 ifmr->ifm_status = IFM_AVALID;
1177 ifmr->ifm_active = IFM_ETHER;
1179 if (!sc->link_active) {
1180 if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1181 ifmr->ifm_active |= ifm->ifm_media;
1183 ifmr->ifm_active |= IFM_NONE;
1186 ifmr->ifm_status |= IFM_ACTIVE;
1188 layer = ixgbe_get_supported_physical_layer(&sc->hw);
1190 if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1191 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1192 (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1193 switch (sc->link_speed) {
1194 case IXGBE_LINK_SPEED_10GB_FULL:
1195 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1197 case IXGBE_LINK_SPEED_1GB_FULL:
1198 ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1200 case IXGBE_LINK_SPEED_100_FULL:
1201 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1204 } else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1205 (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1206 switch (sc->link_speed) {
1207 case IXGBE_LINK_SPEED_10GB_FULL:
1208 ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1211 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1212 switch (sc->link_speed) {
1213 case IXGBE_LINK_SPEED_10GB_FULL:
1214 ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1216 case IXGBE_LINK_SPEED_1GB_FULL:
1217 ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1220 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1221 switch (sc->link_speed) {
1222 case IXGBE_LINK_SPEED_10GB_FULL:
1223 ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1225 case IXGBE_LINK_SPEED_1GB_FULL:
1226 ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1229 } else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1230 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1231 switch (sc->link_speed) {
1232 case IXGBE_LINK_SPEED_10GB_FULL:
1233 ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1235 case IXGBE_LINK_SPEED_1GB_FULL:
1236 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1239 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1240 switch (sc->link_speed) {
1241 case IXGBE_LINK_SPEED_10GB_FULL:
1242 ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1245 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1247 * XXX: These need to use the proper media types once
1250 switch (sc->link_speed) {
1251 case IXGBE_LINK_SPEED_10GB_FULL:
1252 ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1254 case IXGBE_LINK_SPEED_2_5GB_FULL:
1255 ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1257 case IXGBE_LINK_SPEED_1GB_FULL:
1258 ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1261 } else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1262 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1264 * XXX: These need to use the proper media types once
1267 switch (sc->link_speed) {
1268 case IXGBE_LINK_SPEED_10GB_FULL:
1269 ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1271 case IXGBE_LINK_SPEED_2_5GB_FULL:
1272 ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1274 case IXGBE_LINK_SPEED_1GB_FULL:
1275 ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1280 /* If nothing is recognized... */
1281 if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1282 ifmr->ifm_active |= IFM_NONE;
1284 if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1285 ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1287 switch (sc->hw.fc.current_mode) {
1289 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1291 case ixgbe_fc_rx_pause:
1292 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1294 case ixgbe_fc_tx_pause:
1295 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1303 ix_media_change(struct ifnet *ifp)
1305 struct ix_softc *sc = ifp->if_softc;
1306 struct ifmedia *ifm = &sc->media;
1307 struct ixgbe_hw *hw = &sc->hw;
1309 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1312 if (hw->phy.media_type == ixgbe_media_type_backplane ||
1313 hw->mac.ops.setup_link == NULL) {
1314 if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1315 /* Only flow control setting changes are allowed */
1316 return (EOPNOTSUPP);
1320 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1322 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1327 case IFM_10G_SR: /* XXX also KR */
1329 case IFM_10G_CX4: /* XXX also KX4 */
1330 case IFM_10G_TWINAX:
1331 sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1337 case IFM_1000_CX: /* XXX is KX */
1338 sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1342 sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1347 if_printf(ifp, "Invalid media type %d!\n",
1352 sc->ifm_media = ifm->ifm_media;
1355 if (hw->mac.ops.setup_link != NULL) {
1356 hw->mac.autotry_restart = TRUE;
1357 hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1360 if (ifp->if_flags & IFF_RUNNING)
1367 ix_tso_pullup(struct mbuf **mp)
1369 int hoff, iphlen, thoff;
1373 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1375 iphlen = m->m_pkthdr.csum_iphlen;
1376 thoff = m->m_pkthdr.csum_thlen;
1377 hoff = m->m_pkthdr.csum_lhlen;
1379 KASSERT(iphlen > 0, ("invalid ip hlen"));
1380 KASSERT(thoff > 0, ("invalid tcp hlen"));
1381 KASSERT(hoff > 0, ("invalid ether hlen"));
1383 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1384 m = m_pullup(m, hoff + iphlen + thoff);
1395 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1396 uint16_t *segs_used, int *idx)
1398 uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1399 int i, j, error, nsegs, first, maxsegs;
1400 struct mbuf *m_head = *m_headp;
1401 bus_dma_segment_t segs[IX_MAX_SCATTER];
1403 struct ix_tx_buf *txbuf;
1404 union ixgbe_adv_tx_desc *txd = NULL;
1406 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1407 error = ix_tso_pullup(m_headp);
1408 if (__predict_false(error))
1413 /* Basic descriptor defines */
1414 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1415 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1417 if (m_head->m_flags & M_VLANTAG)
1418 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1421 * Important to capture the first descriptor
1422 * used because it will contain the index of
1423 * the one we tell the hardware to report back
1425 first = txr->tx_next_avail;
1426 txbuf = &txr->tx_buf[first];
1430 * Map the packet for DMA.
1432 maxsegs = txr->tx_avail - IX_TX_RESERVED;
1433 if (maxsegs > IX_MAX_SCATTER)
1434 maxsegs = IX_MAX_SCATTER;
1436 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1437 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1438 if (__predict_false(error)) {
1443 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1448 * Set up the appropriate offload context if requested,
1449 * this may consume one TX descriptor.
1451 if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1456 *segs_used += nsegs;
1457 txr->tx_nsegs += nsegs;
1458 if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1460 * Report Status (RS) is turned on every intr_nsegs
1461 * descriptors (roughly).
1464 cmd_rs = IXGBE_TXD_CMD_RS;
1467 i = txr->tx_next_avail;
1468 for (j = 0; j < nsegs; j++) {
1472 txbuf = &txr->tx_buf[i];
1473 txd = &txr->tx_base[i];
1474 seglen = segs[j].ds_len;
1475 segaddr = htole64(segs[j].ds_addr);
1477 txd->read.buffer_addr = segaddr;
1478 txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1479 cmd_type_len |seglen);
1480 txd->read.olinfo_status = htole32(olinfo_status);
1482 if (++i == txr->tx_ndesc)
1485 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1487 txr->tx_avail -= nsegs;
1488 txr->tx_next_avail = i;
1491 txbuf->m_head = m_head;
1492 txr->tx_buf[first].map = txbuf->map;
1496 * Defer TDT updating, until enough descrptors are setup
1504 ix_set_promisc(struct ix_softc *sc)
1506 struct ifnet *ifp = &sc->arpcom.ac_if;
1510 reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1511 reg_rctl &= ~IXGBE_FCTRL_UPE;
1512 if (ifp->if_flags & IFF_ALLMULTI) {
1513 mcnt = IX_MAX_MCASTADDR;
1515 struct ifmultiaddr *ifma;
1517 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1518 if (ifma->ifma_addr->sa_family != AF_LINK)
1520 if (mcnt == IX_MAX_MCASTADDR)
1525 if (mcnt < IX_MAX_MCASTADDR)
1526 reg_rctl &= ~IXGBE_FCTRL_MPE;
1527 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1529 if (ifp->if_flags & IFF_PROMISC) {
1530 reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1531 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1532 } else if (ifp->if_flags & IFF_ALLMULTI) {
1533 reg_rctl |= IXGBE_FCTRL_MPE;
1534 reg_rctl &= ~IXGBE_FCTRL_UPE;
1535 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1540 ix_set_multi(struct ix_softc *sc)
1542 struct ifnet *ifp = &sc->arpcom.ac_if;
1543 struct ifmultiaddr *ifma;
1549 bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1551 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1552 if (ifma->ifma_addr->sa_family != AF_LINK)
1554 if (mcnt == IX_MAX_MCASTADDR)
1556 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1557 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1558 IXGBE_ETH_LENGTH_OF_ADDRESS);
1562 fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1563 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1564 if (ifp->if_flags & IFF_PROMISC) {
1565 fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1566 } else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1567 fctrl |= IXGBE_FCTRL_MPE;
1568 fctrl &= ~IXGBE_FCTRL_UPE;
1570 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1572 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1574 if (mcnt < IX_MAX_MCASTADDR) {
1575 ixgbe_update_mc_addr_list(&sc->hw,
1576 mta, mcnt, ix_mc_array_itr, TRUE);
1581 * This is an iterator function now needed by the multicast
1582 * shared code. It simply feeds the shared code routine the
1583 * addresses in the array of ix_set_multi() one by one.
1586 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1588 uint8_t *addr = *update_ptr;
1592 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1593 *update_ptr = newptr;
1600 struct ix_softc *sc = arg;
1602 lwkt_serialize_enter(&sc->main_serialize);
1604 if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1605 lwkt_serialize_exit(&sc->main_serialize);
1609 /* Check for pluggable optics */
1610 if (sc->sfp_probe) {
1611 if (!ix_sfp_probe(sc))
1612 goto done; /* Nothing to do */
1615 ix_update_link_status(sc);
1616 ix_update_stats(sc);
1619 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1620 lwkt_serialize_exit(&sc->main_serialize);
1624 ix_update_link_status(struct ix_softc *sc)
1626 struct ifnet *ifp = &sc->arpcom.ac_if;
1629 if (sc->link_active == FALSE) {
1631 if_printf(ifp, "Link is up %d Gbps %s\n",
1632 sc->link_speed == 128 ? 10 : 1,
1637 * Update any Flow Control changes
1639 ixgbe_fc_enable(&sc->hw);
1640 /* MUST after ixgbe_fc_enable() */
1641 if (sc->rx_ring_inuse > 1) {
1642 switch (sc->hw.fc.current_mode) {
1643 case ixgbe_fc_rx_pause:
1644 case ixgbe_fc_tx_pause:
1646 ix_disable_rx_drop(sc);
1650 ix_enable_rx_drop(sc);
1658 /* Update DMA coalescing config */
1661 sc->link_active = TRUE;
1663 ifp->if_link_state = LINK_STATE_UP;
1664 if_link_state_change(ifp);
1666 } else { /* Link down */
1667 if (sc->link_active == TRUE) {
1669 if_printf(ifp, "Link is Down\n");
1670 ifp->if_link_state = LINK_STATE_DOWN;
1671 if_link_state_change(ifp);
1673 sc->link_active = FALSE;
1679 ix_stop(struct ix_softc *sc)
1681 struct ixgbe_hw *hw = &sc->hw;
1682 struct ifnet *ifp = &sc->arpcom.ac_if;
1685 ASSERT_IFNET_SERIALIZED_ALL(ifp);
1687 ix_disable_intr(sc);
1688 callout_stop(&sc->timer);
1690 ifp->if_flags &= ~IFF_RUNNING;
1691 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1692 struct ix_tx_ring *txr = &sc->tx_rings[i];
1694 ifsq_clr_oactive(txr->tx_ifsq);
1695 ifsq_watchdog_stop(&txr->tx_watchdog);
1696 txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1698 txr->tx_running = 0;
1699 callout_stop(&txr->tx_gc_timer);
1703 hw->adapter_stopped = FALSE;
1704 ixgbe_stop_adapter(hw);
1705 if (hw->mac.type == ixgbe_mac_82599EB)
1706 ixgbe_stop_mac_link_on_d3_82599(hw);
1707 /* Turn off the laser - noop with no optics */
1708 ixgbe_disable_tx_laser(hw);
1710 /* Update the stack */
1711 sc->link_up = FALSE;
1712 ix_update_link_status(sc);
1714 /* Reprogram the RAR[0] in case user changed it. */
1715 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1717 for (i = 0; i < sc->tx_ring_cnt; ++i)
1718 ix_free_tx_ring(&sc->tx_rings[i]);
1720 for (i = 0; i < sc->rx_ring_cnt; ++i)
1721 ix_free_rx_ring(&sc->rx_rings[i]);
1725 ix_setup_ifp(struct ix_softc *sc)
1727 struct ixgbe_hw *hw = &sc->hw;
1728 struct ifnet *ifp = &sc->arpcom.ac_if;
1731 ifp->if_baudrate = IF_Gbps(10UL);
1734 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1735 ifp->if_init = ix_init;
1736 ifp->if_ioctl = ix_ioctl;
1737 ifp->if_start = ix_start;
1738 ifp->if_serialize = ix_serialize;
1739 ifp->if_deserialize = ix_deserialize;
1740 ifp->if_tryserialize = ix_tryserialize;
1742 ifp->if_serialize_assert = ix_serialize_assert;
1744 #ifdef IFPOLL_ENABLE
1745 ifp->if_npoll = ix_npoll;
1748 /* Increase TSO burst length */
1749 ifp->if_tsolen = (8 * ETHERMTU);
1751 ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1752 ifp->if_nmbjclusters = ifp->if_nmbclusters;
1754 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1755 ifq_set_ready(&ifp->if_snd);
1756 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1758 ifp->if_mapsubq = ifq_mapsubq_modulo;
1759 ifq_set_subq_divisor(&ifp->if_snd, 1);
1761 ether_ifattach(ifp, hw->mac.addr, NULL);
1763 ifp->if_capabilities =
1764 IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1765 if (IX_ENABLE_HWRSS(sc))
1766 ifp->if_capabilities |= IFCAP_RSS;
1767 ifp->if_capenable = ifp->if_capabilities;
1768 ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1771 * Tell the upper layer(s) we support long frames.
1773 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1775 /* Setup TX rings and subqueues */
1776 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1777 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1778 struct ix_tx_ring *txr = &sc->tx_rings[i];
1780 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1781 ifsq_set_priv(ifsq, txr);
1782 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1783 txr->tx_ifsq = ifsq;
1785 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1788 /* Specify the media types supported by this adapter */
1793 ix_is_sfp(const struct ixgbe_hw *hw)
1795 switch (hw->phy.type) {
1796 case ixgbe_phy_sfp_avago:
1797 case ixgbe_phy_sfp_ftl:
1798 case ixgbe_phy_sfp_intel:
1799 case ixgbe_phy_sfp_unknown:
1800 case ixgbe_phy_sfp_passive_tyco:
1801 case ixgbe_phy_sfp_passive_unknown:
1802 case ixgbe_phy_qsfp_passive_unknown:
1803 case ixgbe_phy_qsfp_active_unknown:
1804 case ixgbe_phy_qsfp_intel:
1805 case ixgbe_phy_qsfp_unknown:
1813 ix_config_link(struct ix_softc *sc)
1815 struct ixgbe_hw *hw = &sc->hw;
1818 sfp = ix_is_sfp(hw);
1820 if (hw->phy.multispeed_fiber) {
1821 hw->mac.ops.setup_sfp(hw);
1822 ixgbe_enable_tx_laser(hw);
1828 uint32_t autoneg, err = 0;
1830 if (hw->mac.ops.check_link != NULL) {
1831 err = ixgbe_check_link(hw, &sc->link_speed,
1832 &sc->link_up, FALSE);
1837 if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1838 autoneg = sc->advspeed;
1840 autoneg = hw->phy.autoneg_advertised;
1841 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1844 err = hw->mac.ops.get_link_capabilities(hw,
1845 &autoneg, &negotiate);
1850 if (hw->mac.ops.setup_link != NULL) {
1851 err = hw->mac.ops.setup_link(hw,
1852 autoneg, sc->link_up);
1860 ix_alloc_rings(struct ix_softc *sc)
1865 * Create top level busdma tag
1867 error = bus_dma_tag_create(NULL, 1, 0,
1868 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1869 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1872 device_printf(sc->dev, "could not create top level DMA tag\n");
1877 * Allocate TX descriptor rings and buffers
1879 sc->tx_rings = kmalloc_cachealign(
1880 sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1881 M_DEVBUF, M_WAITOK | M_ZERO);
1882 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1883 struct ix_tx_ring *txr = &sc->tx_rings[i];
1887 txr->tx_intr_vec = -1;
1888 txr->tx_intr_cpuid = -1;
1889 lwkt_serialize_init(&txr->tx_serialize);
1890 callout_init_mp(&txr->tx_gc_timer);
1892 error = ix_create_tx_ring(txr);
1898 * Allocate RX descriptor rings and buffers
1900 sc->rx_rings = kmalloc_cachealign(
1901 sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1902 M_DEVBUF, M_WAITOK | M_ZERO);
1903 for (i = 0; i < sc->rx_ring_cnt; ++i) {
1904 struct ix_rx_ring *rxr = &sc->rx_rings[i];
1908 rxr->rx_intr_vec = -1;
1909 lwkt_serialize_init(&rxr->rx_serialize);
1911 error = ix_create_rx_ring(rxr);
1920 ix_create_tx_ring(struct ix_tx_ring *txr)
1922 int error, i, tsize, ntxd;
1925 * Validate number of transmit descriptors. It must not exceed
1926 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1928 ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1929 if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1930 ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1931 device_printf(txr->tx_sc->dev,
1932 "Using %d TX descriptors instead of %d!\n",
1934 txr->tx_ndesc = IX_DEF_TXD;
1936 txr->tx_ndesc = ntxd;
1940 * Allocate TX head write-back buffer
1942 txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1943 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1944 &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1945 if (txr->tx_hdr == NULL) {
1946 device_printf(txr->tx_sc->dev,
1947 "Unable to allocate TX head write-back buffer\n");
1952 * Allocate TX descriptor ring
1954 tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1956 txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1957 IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1958 &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1959 if (txr->tx_base == NULL) {
1960 device_printf(txr->tx_sc->dev,
1961 "Unable to allocate TX Descriptor memory\n");
1965 tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1966 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1969 * Create DMA tag for TX buffers
1971 error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1972 1, 0, /* alignment, bounds */
1973 BUS_SPACE_MAXADDR, /* lowaddr */
1974 BUS_SPACE_MAXADDR, /* highaddr */
1975 NULL, NULL, /* filter, filterarg */
1976 IX_TSO_SIZE, /* maxsize */
1977 IX_MAX_SCATTER, /* nsegments */
1978 PAGE_SIZE, /* maxsegsize */
1979 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1980 BUS_DMA_ONEBPAGE, /* flags */
1983 device_printf(txr->tx_sc->dev,
1984 "Unable to allocate TX DMA tag\n");
1985 kfree(txr->tx_buf, M_DEVBUF);
1991 * Create DMA maps for TX buffers
1993 for (i = 0; i < txr->tx_ndesc; ++i) {
1994 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1996 error = bus_dmamap_create(txr->tx_tag,
1997 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1999 device_printf(txr->tx_sc->dev,
2000 "Unable to create TX DMA map\n");
2001 ix_destroy_tx_ring(txr, i);
2007 * Initialize various watermark
2009 txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
2010 txr->tx_intr_nsegs = txr->tx_ndesc / 16;
2016 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
2020 if (txr->tx_hdr != NULL) {
2021 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
2022 bus_dmamem_free(txr->tx_hdr_dtag,
2023 __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
2024 bus_dma_tag_destroy(txr->tx_hdr_dtag);
2028 if (txr->tx_base != NULL) {
2029 bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
2030 bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
2032 bus_dma_tag_destroy(txr->tx_base_dtag);
2033 txr->tx_base = NULL;
2036 if (txr->tx_buf == NULL)
2039 for (i = 0; i < ndesc; ++i) {
2040 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2042 KKASSERT(txbuf->m_head == NULL);
2043 bus_dmamap_destroy(txr->tx_tag, txbuf->map);
2045 bus_dma_tag_destroy(txr->tx_tag);
2047 kfree(txr->tx_buf, M_DEVBUF);
2052 ix_init_tx_ring(struct ix_tx_ring *txr)
2054 /* Clear the old ring contents */
2055 bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
2057 /* Clear TX head write-back buffer */
2061 txr->tx_next_avail = 0;
2062 txr->tx_next_clean = 0;
2065 txr->tx_running = 0;
2067 /* Set number of descriptors available */
2068 txr->tx_avail = txr->tx_ndesc;
2070 /* Enable this TX ring */
2071 txr->tx_flags |= IX_TXFLAG_ENABLED;
2075 ix_init_tx_unit(struct ix_softc *sc)
2077 struct ixgbe_hw *hw = &sc->hw;
2081 * Setup the Base and Length of the Tx Descriptor Ring
2083 for (i = 0; i < sc->tx_ring_inuse; ++i) {
2084 struct ix_tx_ring *txr = &sc->tx_rings[i];
2085 uint64_t tdba = txr->tx_base_paddr;
2086 uint64_t hdr_paddr = txr->tx_hdr_paddr;
2089 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2090 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2091 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2092 txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2094 /* Setup the HW Tx Head and Tail descriptor pointers */
2095 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2096 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2098 /* Disable TX head write-back relax ordering */
2099 switch (hw->mac.type) {
2100 case ixgbe_mac_82598EB:
2101 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2103 case ixgbe_mac_82599EB:
2104 case ixgbe_mac_X540:
2106 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2109 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2110 switch (hw->mac.type) {
2111 case ixgbe_mac_82598EB:
2112 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2114 case ixgbe_mac_82599EB:
2115 case ixgbe_mac_X540:
2117 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2121 /* Enable TX head write-back */
2122 IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2123 (uint32_t)(hdr_paddr >> 32));
2124 IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2125 ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2128 if (hw->mac.type != ixgbe_mac_82598EB) {
2129 uint32_t dmatxctl, rttdcs;
2131 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2132 dmatxctl |= IXGBE_DMATXCTL_TE;
2133 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2135 /* Disable arbiter to set MTQC */
2136 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2137 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2138 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2140 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2142 /* Reenable aribter */
2143 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2144 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2149 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2150 uint32_t *cmd_type_len, uint32_t *olinfo_status)
2152 struct ixgbe_adv_tx_context_desc *TXD;
2153 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2154 int ehdrlen, ip_hlen = 0, ctxd;
2155 boolean_t offload = TRUE;
2157 /* First check if TSO is to be used */
2158 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2159 return ix_tso_ctx_setup(txr, mp,
2160 cmd_type_len, olinfo_status);
2163 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2166 /* Indicate the whole packet as payload when not doing TSO */
2167 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2170 * In advanced descriptors the vlan tag must be placed into the
2171 * context descriptor. Hence we need to make one even if not
2172 * doing checksum offloads.
2174 if (mp->m_flags & M_VLANTAG) {
2175 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2176 IXGBE_ADVTXD_VLAN_SHIFT;
2177 } else if (!offload) {
2178 /* No TX descriptor is consumed */
2182 /* Set the ether header length */
2183 ehdrlen = mp->m_pkthdr.csum_lhlen;
2184 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2185 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2187 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2188 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2189 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2190 ip_hlen = mp->m_pkthdr.csum_iphlen;
2191 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2193 vlan_macip_lens |= ip_hlen;
2195 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2196 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2197 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2198 else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2199 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2201 if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2202 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2204 /* Now ready a context descriptor */
2205 ctxd = txr->tx_next_avail;
2206 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2208 /* Now copy bits into descriptor */
2209 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2210 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2211 TXD->seqnum_seed = htole32(0);
2212 TXD->mss_l4len_idx = htole32(0);
2214 /* We've consumed the first desc, adjust counters */
2215 if (++ctxd == txr->tx_ndesc)
2217 txr->tx_next_avail = ctxd;
2220 /* One TX descriptor is consumed */
2225 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2226 uint32_t *cmd_type_len, uint32_t *olinfo_status)
2228 struct ixgbe_adv_tx_context_desc *TXD;
2229 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2230 uint32_t mss_l4len_idx = 0, paylen;
2231 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2233 ehdrlen = mp->m_pkthdr.csum_lhlen;
2234 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2236 ip_hlen = mp->m_pkthdr.csum_iphlen;
2237 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2239 tcp_hlen = mp->m_pkthdr.csum_thlen;
2240 KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2242 ctxd = txr->tx_next_avail;
2243 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2245 if (mp->m_flags & M_VLANTAG) {
2246 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2247 IXGBE_ADVTXD_VLAN_SHIFT;
2249 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2250 vlan_macip_lens |= ip_hlen;
2251 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2253 /* ADV DTYPE TUCMD */
2254 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2255 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2256 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2257 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2260 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2261 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2262 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2264 TXD->seqnum_seed = htole32(0);
2266 if (++ctxd == txr->tx_ndesc)
2270 txr->tx_next_avail = ctxd;
2272 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2274 /* This is used in the transmit desc in encap */
2275 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2277 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2278 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2279 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2281 /* One TX descriptor is consumed */
2286 ix_txeof(struct ix_tx_ring *txr, int hdr)
2290 if (txr->tx_avail == txr->tx_ndesc)
2293 first = txr->tx_next_clean;
2297 avail = txr->tx_avail;
2298 while (first != hdr) {
2299 struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2301 KKASSERT(avail < txr->tx_ndesc);
2304 if (txbuf->m_head != NULL)
2305 ix_free_txbuf(txr, txbuf);
2306 if (++first == txr->tx_ndesc)
2309 txr->tx_next_clean = first;
2310 txr->tx_avail = avail;
2312 if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2313 ifsq_clr_oactive(txr->tx_ifsq);
2314 txr->tx_watchdog.wd_timer = 0;
2316 txr->tx_running = IX_TX_RUNNING;
2320 ix_txgc(struct ix_tx_ring *txr)
2327 if (txr->tx_avail == txr->tx_ndesc)
2330 hdr = IXGBE_READ_REG(&txr->tx_sc->hw, IXGBE_TDH(txr->tx_idx));
2331 first = txr->tx_next_clean;
2337 avail = txr->tx_avail;
2339 while (first != hdr) {
2340 struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2343 KKASSERT(avail < txr->tx_ndesc);
2346 if (txbuf->m_head != NULL)
2347 ix_free_txbuf(txr, txbuf);
2348 if (++first == txr->tx_ndesc)
2353 txr->tx_running = IX_TX_RUNNING;
2357 ix_create_rx_ring(struct ix_rx_ring *rxr)
2359 int i, rsize, error, nrxd;
2362 * Validate number of receive descriptors. It must not exceed
2363 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2365 nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2366 if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2367 nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2368 device_printf(rxr->rx_sc->dev,
2369 "Using %d RX descriptors instead of %d!\n",
2371 rxr->rx_ndesc = IX_DEF_RXD;
2373 rxr->rx_ndesc = nrxd;
2377 * Allocate RX descriptor ring
2379 rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2381 rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2382 IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2383 &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2384 if (rxr->rx_base == NULL) {
2385 device_printf(rxr->rx_sc->dev,
2386 "Unable to allocate TX Descriptor memory\n");
2390 rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2391 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2394 * Create DMA tag for RX buffers
2396 error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2397 1, 0, /* alignment, bounds */
2398 BUS_SPACE_MAXADDR, /* lowaddr */
2399 BUS_SPACE_MAXADDR, /* highaddr */
2400 NULL, NULL, /* filter, filterarg */
2401 PAGE_SIZE, /* maxsize */
2403 PAGE_SIZE, /* maxsegsize */
2404 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2407 device_printf(rxr->rx_sc->dev,
2408 "Unable to create RX DMA tag\n");
2409 kfree(rxr->rx_buf, M_DEVBUF);
2415 * Create spare DMA map for RX buffers
2417 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2420 device_printf(rxr->rx_sc->dev,
2421 "Unable to create spare RX DMA map\n");
2422 bus_dma_tag_destroy(rxr->rx_tag);
2423 kfree(rxr->rx_buf, M_DEVBUF);
2429 * Create DMA maps for RX buffers
2431 for (i = 0; i < rxr->rx_ndesc; ++i) {
2432 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2434 error = bus_dmamap_create(rxr->rx_tag,
2435 BUS_DMA_WAITOK, &rxbuf->map);
2437 device_printf(rxr->rx_sc->dev,
2438 "Unable to create RX dma map\n");
2439 ix_destroy_rx_ring(rxr, i);
2445 * Initialize various watermark
2447 rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2453 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2457 if (rxr->rx_base != NULL) {
2458 bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2459 bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2461 bus_dma_tag_destroy(rxr->rx_base_dtag);
2462 rxr->rx_base = NULL;
2465 if (rxr->rx_buf == NULL)
2468 for (i = 0; i < ndesc; ++i) {
2469 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2471 KKASSERT(rxbuf->m_head == NULL);
2472 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2474 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2475 bus_dma_tag_destroy(rxr->rx_tag);
2477 kfree(rxr->rx_buf, M_DEVBUF);
2482 ** Used to detect a descriptor that has
2483 ** been merged by Hardware RSC.
2485 static __inline uint32_t
2486 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2488 return (le32toh(rx->wb.lower.lo_dword.data) &
2489 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2493 /*********************************************************************
2495 * Initialize Hardware RSC (LRO) feature on 82599
2496 * for an RX ring, this is toggled by the LRO capability
2497 * even though it is transparent to the stack.
2499 * NOTE: since this HW feature only works with IPV4 and
2500 * our testing has shown soft LRO to be as effective
2501 * I have decided to disable this by default.
2503 **********************************************************************/
2505 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2507 struct ix_softc *sc = rxr->rx_sc;
2508 struct ixgbe_hw *hw = &sc->hw;
2509 uint32_t rscctrl, rdrxctl;
2512 /* If turning LRO/RSC off we need to disable it */
2513 if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2514 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2515 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2520 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2521 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2522 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2523 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2524 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2526 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2527 rscctrl |= IXGBE_RSCCTL_RSCEN;
2529 ** Limit the total number of descriptors that
2530 ** can be combined, so it does not exceed 64K
2532 if (rxr->mbuf_sz == MCLBYTES)
2533 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2534 else if (rxr->mbuf_sz == MJUMPAGESIZE)
2535 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2536 else if (rxr->mbuf_sz == MJUM9BYTES)
2537 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2538 else /* Using 16K cluster */
2539 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2541 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2543 /* Enable TCP header recognition */
2544 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2545 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2546 IXGBE_PSRTYPE_TCPHDR));
2548 /* Disable RSC for ACK packets */
2549 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2550 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2557 ix_init_rx_ring(struct ix_rx_ring *rxr)
2561 /* Clear the ring contents */
2562 bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2564 /* XXX we need JUMPAGESIZE for RSC too */
2565 if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2566 rxr->rx_mbuf_sz = MCLBYTES;
2568 rxr->rx_mbuf_sz = MJUMPAGESIZE;
2570 /* Now replenish the mbufs */
2571 for (i = 0; i < rxr->rx_ndesc; ++i) {
2574 error = ix_newbuf(rxr, i, TRUE);
2579 /* Setup our descriptor indices */
2580 rxr->rx_next_check = 0;
2581 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2585 ** Now set up the LRO interface:
2587 if (ixgbe_rsc_enable)
2588 ix_setup_hw_rsc(rxr);
2594 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2596 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2599 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2601 struct ixgbe_hw *hw = &sc->hw;
2602 struct ifnet *ifp = &sc->arpcom.ac_if;
2603 uint32_t bufsz, fctrl, rxcsum, hlreg;
2607 * Make sure receives are disabled while setting up the descriptor ring
2609 ixgbe_disable_rx(hw);
2611 /* Enable broadcasts */
2612 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2613 fctrl |= IXGBE_FCTRL_BAM;
2614 if (hw->mac.type == ixgbe_mac_82598EB) {
2615 fctrl |= IXGBE_FCTRL_DPF;
2616 fctrl |= IXGBE_FCTRL_PMCF;
2618 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2620 /* Set for Jumbo Frames? */
2621 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2622 if (ifp->if_mtu > ETHERMTU)
2623 hlreg |= IXGBE_HLREG0_JUMBOEN;
2625 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2626 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2628 KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2629 bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2630 IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2632 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2633 struct ix_rx_ring *rxr = &sc->rx_rings[i];
2634 uint64_t rdba = rxr->rx_base_paddr;
2637 /* Setup the Base and Length of the Rx Descriptor Ring */
2638 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2639 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2640 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2641 rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2644 * Set up the SRRCTL register
2646 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2648 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2649 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2651 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2652 if (sc->rx_ring_inuse > 1) {
2653 /* See the commend near ix_enable_rx_drop() */
2655 (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2656 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2657 if (i == 0 && bootverbose) {
2658 if_printf(ifp, "flow control %s, "
2659 "disable RX drop\n",
2660 ix_ifmedia2str(sc->ifm_media));
2663 srrctl |= IXGBE_SRRCTL_DROP_EN;
2664 if (i == 0 && bootverbose) {
2665 if_printf(ifp, "flow control %s, "
2667 ix_ifmedia2str(sc->ifm_media));
2671 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2673 /* Setup the HW Rx Head and Tail Descriptor Pointers */
2674 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2675 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2678 if (sc->hw.mac.type != ixgbe_mac_82598EB)
2679 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2681 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2686 if (sc->rx_ring_inuse > 1) {
2687 uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2688 const struct if_ringmap *rm;
2689 int j, r, nreta, table_nent;
2693 * When we reach here, RSS has already been disabled
2694 * in ix_stop(), so we could safely configure RSS key
2695 * and redirect table.
2701 toeplitz_get_key(key, sizeof(key));
2702 for (i = 0; i < IX_NRSSRK; ++i) {
2705 rssrk = IX_RSSRK_VAL(key, i);
2706 IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2709 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2713 * Configure RSS redirect table.
2716 /* Table size will differ based on MAC */
2717 switch (hw->mac.type) {
2718 case ixgbe_mac_X550:
2719 case ixgbe_mac_X550EM_x:
2720 case ixgbe_mac_X550EM_a:
2721 nreta = IX_NRETA_X550;
2728 table_nent = nreta * IX_RETA_SIZE;
2729 KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2730 ("invalid RETA count %d", nreta));
2734 rm = sc->rx_rmap_intr;
2735 if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2738 for (j = 0; j < nreta; ++j) {
2741 for (i = 0; i < IX_RETA_SIZE; ++i) {
2744 q = sc->rdr_table[r];
2745 KASSERT(q < sc->rx_ring_inuse,
2746 ("invalid RX ring index %d", q));
2747 reta |= q << (8 * i);
2750 IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2752 IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2754 IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2760 * Enable multiple receive queues.
2761 * Enable IPv4 RSS standard hash functions.
2763 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2765 IXGBE_MRQC_RSS_FIELD_IPV4 |
2766 IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2770 * PCSD must be enabled to enable multiple
2773 rxcsum |= IXGBE_RXCSUM_PCSD;
2776 if (ifp->if_capenable & IFCAP_RXCSUM)
2777 rxcsum |= IXGBE_RXCSUM_PCSD;
2779 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2782 static __inline void
2783 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2786 i = rxr->rx_ndesc - 1;
2787 IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2790 static __inline void
2791 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2794 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2799 if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2800 IXGBE_RXD_STAT_IPCS)
2801 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2804 (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2806 * - Neither TCP nor UDP
2812 if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2813 IXGBE_RXD_STAT_L4CS) {
2814 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2815 CSUM_FRAG_NOT_CHECKED;
2816 mp->m_pkthdr.csum_data = htons(0xffff);
2820 static __inline struct pktinfo *
2821 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2822 uint32_t hash, uint32_t hashtype, uint32_t ptype)
2825 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2826 pi->pi_netisr = NETISR_IP;
2828 pi->pi_l3proto = IPPROTO_TCP;
2831 case IXGBE_RXDADV_RSSTYPE_IPV4:
2832 if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2833 /* Not UDP or is fragment */
2836 pi->pi_netisr = NETISR_IP;
2838 pi->pi_l3proto = IPPROTO_UDP;
2845 m_sethash(m, toeplitz_hash(hash));
2849 static __inline void
2850 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2852 rxd->read.pkt_addr = htole64(rxbuf->paddr);
2853 rxd->wb.upper.status_error = 0;
2857 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2859 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2862 * XXX discard may not be correct
2865 IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2866 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2868 rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2870 if (rxbuf->fmp != NULL) {
2871 m_freem(rxbuf->fmp);
2875 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2879 ix_rxeof(struct ix_rx_ring *rxr, int count)
2881 struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2882 int i, nsegs = 0, cpuid = mycpuid;
2884 i = rxr->rx_next_check;
2885 while (count != 0) {
2886 struct ix_rx_buf *rxbuf, *nbuf = NULL;
2887 union ixgbe_adv_rx_desc *cur;
2888 struct mbuf *sendmp = NULL, *mp;
2889 struct pktinfo *pi = NULL, pi0;
2890 uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2894 cur = &rxr->rx_base[i];
2895 staterr = le32toh(cur->wb.upper.status_error);
2897 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2901 rxbuf = &rxr->rx_buf[i];
2904 len = le16toh(cur->wb.upper.length);
2905 ptype = le32toh(cur->wb.lower.lo_dword.data) &
2906 IXGBE_RXDADV_PKTTYPE_MASK;
2907 hash = le32toh(cur->wb.lower.hi_dword.rss);
2908 hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2909 IXGBE_RXDADV_RSSTYPE_MASK;
2911 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2916 * Make sure bad packets are discarded
2918 if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2919 (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2920 ix_rx_discard(rxr, i, eop);
2924 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2925 if (ix_newbuf(rxr, i, FALSE) != 0) {
2926 ix_rx_discard(rxr, i, eop);
2931 * On 82599 which supports a hardware LRO, packets
2932 * need not be fragmented across sequential descriptors,
2933 * rather the next descriptor is indicated in bits
2934 * of the descriptor. This also means that we might
2935 * proceses more than one packet at a time, something
2936 * that has never been true before, it required
2937 * eliminating global chain pointers in favor of what
2938 * we are doing here.
2944 * Figure out the next descriptor
2947 if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2948 rsc = ix_rsc_count(cur);
2949 if (rsc) { /* Get hardware index */
2951 IXGBE_RXDADV_NEXTP_MASK) >>
2952 IXGBE_RXDADV_NEXTP_SHIFT);
2953 } else { /* Just sequential */
2955 if (nextp == rxr->rx_ndesc)
2958 nbuf = &rxr->rx_buf[nextp];
2964 * Rather than using the fmp/lmp global pointers
2965 * we now keep the head of a packet chain in the
2966 * buffer struct and pass this along from one
2967 * descriptor to the next, until we get EOP.
2969 if (rxbuf->fmp == NULL) {
2970 mp->m_pkthdr.len = len;
2974 rxbuf->fmp->m_pkthdr.len += len;
2975 rxbuf->lmp->m_next = mp;
2981 * Not the last fragment of this frame,
2982 * pass this fragment list on
2984 nbuf->fmp = rxbuf->fmp;
2985 nbuf->lmp = rxbuf->lmp;
2990 sendmp = rxbuf->fmp;
2992 sendmp->m_pkthdr.rcvif = ifp;
2993 IFNET_STAT_INC(ifp, ipackets, 1);
2998 /* Process vlan info */
2999 if (staterr & IXGBE_RXD_STAT_VP) {
3000 sendmp->m_pkthdr.ether_vlantag =
3001 le16toh(cur->wb.upper.vlan);
3002 sendmp->m_flags |= M_VLANTAG;
3004 if (ifp->if_capenable & IFCAP_RXCSUM)
3005 ix_rxcsum(staterr, sendmp, ptype);
3006 if (ifp->if_capenable & IFCAP_RSS) {
3007 pi = ix_rssinfo(sendmp, &pi0,
3008 hash, hashtype, ptype);
3014 /* Advance our pointers to the next descriptor. */
3015 if (++i == rxr->rx_ndesc)
3019 ifp->if_input(ifp, sendmp, pi, cpuid);
3021 if (nsegs >= rxr->rx_wreg_nsegs) {
3022 ix_rx_refresh(rxr, i);
3026 rxr->rx_next_check = i;
3029 ix_rx_refresh(rxr, i);
3033 ix_set_vlan(struct ix_softc *sc)
3035 struct ixgbe_hw *hw = &sc->hw;
3038 if (hw->mac.type == ixgbe_mac_82598EB) {
3039 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3040 ctrl |= IXGBE_VLNCTRL_VME;
3041 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
3046 * On 82599 and later chips the VLAN enable is
3047 * per queue in RXDCTL
3049 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3050 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3051 ctrl |= IXGBE_RXDCTL_VME;
3052 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
3058 ix_enable_intr(struct ix_softc *sc)
3060 struct ixgbe_hw *hw = &sc->hw;
3064 for (i = 0; i < sc->intr_cnt; ++i)
3065 lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
3067 sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
3069 /* Enable Fan Failure detection */
3070 if (hw->device_id == IXGBE_DEV_ID_82598AT)
3071 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3073 switch (hw->mac.type) {
3074 case ixgbe_mac_82599EB:
3075 sc->intr_mask |= IXGBE_EIMS_ECC;
3076 /* Temperature sensor on some adapters */
3077 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
3078 /* SFP+ (RX_LOS_N & MOD_ABS_N) */
3079 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3080 sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
3083 case ixgbe_mac_X540:
3084 sc->intr_mask |= IXGBE_EIMS_ECC;
3085 /* Detect if Thermal Sensor is enabled */
3086 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
3087 if (fwsm & IXGBE_FWSM_TS_ENABLED)
3088 sc->intr_mask |= IXGBE_EIMS_TS;
3091 case ixgbe_mac_X550:
3092 case ixgbe_mac_X550EM_a:
3093 case ixgbe_mac_X550EM_x:
3094 sc->intr_mask |= IXGBE_EIMS_ECC;
3095 /* MAC thermal sensor is automatically enabled */
3096 sc->intr_mask |= IXGBE_EIMS_TS;
3097 /* Some devices use SDP0 for important information */
3098 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3099 hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
3100 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
3106 /* With MSI-X we use auto clear for RX and TX rings */
3107 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3109 * There are no EIAC1/EIAC2 for newer chips; the related
3110 * bits for TX and RX rings > 16 are always auto clear.
3112 * XXX which bits? There are _no_ documented EICR1 and
3113 * EICR2 at all; only EICR.
3115 IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
3117 sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
3119 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3120 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3121 sc->intr_mask |= IX_RX1_INTR_MASK;
3124 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3127 * Enable RX and TX rings for MSI-X
3129 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3130 for (i = 0; i < sc->tx_ring_inuse; ++i) {
3131 const struct ix_tx_ring *txr = &sc->tx_rings[i];
3133 if (txr->tx_intr_vec >= 0) {
3134 IXGBE_WRITE_REG(hw, txr->tx_eims,
3138 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3139 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3141 KKASSERT(rxr->rx_intr_vec >= 0);
3142 IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3146 IXGBE_WRITE_FLUSH(hw);
3150 ix_disable_intr(struct ix_softc *sc)
3154 if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3155 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3157 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3158 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3160 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3161 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3162 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3164 IXGBE_WRITE_FLUSH(&sc->hw);
3166 for (i = 0; i < sc->intr_cnt; ++i)
3167 lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3171 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3173 return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3178 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3180 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3185 ix_slot_info(struct ix_softc *sc)
3187 struct ixgbe_hw *hw = &sc->hw;
3188 device_t dev = sc->dev;
3189 struct ixgbe_mac_info *mac = &hw->mac;
3193 /* For most devices simply call the shared code routine */
3194 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3195 ixgbe_get_bus_info(hw);
3196 /* These devices don't use PCI-E */
3197 if (hw->mac.type == ixgbe_mac_X550EM_x ||
3198 hw->mac.type == ixgbe_mac_X550EM_a)
3204 * For the Quad port adapter we need to parse back
3205 * up the PCI tree to find the speed of the expansion
3206 * slot into which this adapter is plugged. A bit more work.
3208 dev = device_get_parent(device_get_parent(dev));
3210 device_printf(dev, "parent pcib = %x,%x,%x\n",
3211 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3213 dev = device_get_parent(device_get_parent(dev));
3215 device_printf(dev, "slot pcib = %x,%x,%x\n",
3216 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3218 /* Now get the PCI Express Capabilities offset */
3219 offset = pci_get_pciecap_ptr(dev);
3220 /* ...and read the Link Status Register */
3221 link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3222 switch (link & IXGBE_PCI_LINK_WIDTH) {
3223 case IXGBE_PCI_LINK_WIDTH_1:
3224 hw->bus.width = ixgbe_bus_width_pcie_x1;
3226 case IXGBE_PCI_LINK_WIDTH_2:
3227 hw->bus.width = ixgbe_bus_width_pcie_x2;
3229 case IXGBE_PCI_LINK_WIDTH_4:
3230 hw->bus.width = ixgbe_bus_width_pcie_x4;
3232 case IXGBE_PCI_LINK_WIDTH_8:
3233 hw->bus.width = ixgbe_bus_width_pcie_x8;
3236 hw->bus.width = ixgbe_bus_width_unknown;
3240 switch (link & IXGBE_PCI_LINK_SPEED) {
3241 case IXGBE_PCI_LINK_SPEED_2500:
3242 hw->bus.speed = ixgbe_bus_speed_2500;
3244 case IXGBE_PCI_LINK_SPEED_5000:
3245 hw->bus.speed = ixgbe_bus_speed_5000;
3247 case IXGBE_PCI_LINK_SPEED_8000:
3248 hw->bus.speed = ixgbe_bus_speed_8000;
3251 hw->bus.speed = ixgbe_bus_speed_unknown;
3255 mac->ops.set_lan_id(hw);
3258 device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3259 hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3260 hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3261 hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3262 hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3263 hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3264 hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3266 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3267 hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3268 hw->bus.speed == ixgbe_bus_speed_2500) {
3269 device_printf(dev, "For optimal performance a x8 "
3270 "PCIE, or x4 PCIE Gen2 slot is required.\n");
3271 } else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3272 hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3273 hw->bus.speed < ixgbe_bus_speed_8000) {
3274 device_printf(dev, "For optimal performance a x8 "
3275 "PCIE Gen3 slot is required.\n");
3280 * TODO comment is incorrect
3282 * Setup the correct IVAR register for a particular MSIX interrupt
3283 * - entry is the register array entry
3284 * - vector is the MSIX vector for this queue
3285 * - type is RX/TX/MISC
3288 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3291 struct ixgbe_hw *hw = &sc->hw;
3292 uint32_t ivar, index;
3294 vector |= IXGBE_IVAR_ALLOC_VAL;
3296 switch (hw->mac.type) {
3297 case ixgbe_mac_82598EB:
3299 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3301 entry += (type * 64);
3302 index = (entry >> 2) & 0x1F;
3303 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3304 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3305 ivar |= (vector << (8 * (entry & 0x3)));
3306 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3309 case ixgbe_mac_82599EB:
3310 case ixgbe_mac_X540:
3311 case ixgbe_mac_X550:
3312 case ixgbe_mac_X550EM_a:
3313 case ixgbe_mac_X550EM_x:
3314 if (type == -1) { /* MISC IVAR */
3315 index = (entry & 1) * 8;
3316 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3317 ivar &= ~(0xFF << index);
3318 ivar |= (vector << index);
3319 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3320 } else { /* RX/TX IVARS */
3321 index = (16 * (entry & 1)) + (8 * type);
3322 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3323 ivar &= ~(0xFF << index);
3324 ivar |= (vector << index);
3325 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3334 ix_sfp_probe(struct ix_softc *sc)
3336 struct ixgbe_hw *hw = &sc->hw;
3338 if (hw->phy.type == ixgbe_phy_nl &&
3339 hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3342 ret = hw->phy.ops.identify_sfp(hw);
3346 ret = hw->phy.ops.reset(hw);
3347 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3348 if_printf(&sc->arpcom.ac_if,
3349 "Unsupported SFP+ module detected! "
3350 "Reload driver with supported module.\n");
3351 sc->sfp_probe = FALSE;
3354 if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3356 /* We now have supported optics */
3357 sc->sfp_probe = FALSE;
3365 ix_handle_link(struct ix_softc *sc)
3367 ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3368 ix_update_link_status(sc);
3372 * Handling SFP module
3375 ix_handle_mod(struct ix_softc *sc)
3377 struct ixgbe_hw *hw = &sc->hw;
3380 err = hw->phy.ops.identify_sfp(hw);
3381 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3382 if_printf(&sc->arpcom.ac_if,
3383 "Unsupported SFP+ module type was detected.\n");
3386 err = hw->mac.ops.setup_sfp(hw);
3387 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3388 if_printf(&sc->arpcom.ac_if,
3389 "Setup failure - unsupported SFP+ module type.\n");
3396 * Handling MSF (multispeed fiber)
3399 ix_handle_msf(struct ix_softc *sc)
3401 struct ixgbe_hw *hw = &sc->hw;
3404 hw->phy.ops.identify_sfp(hw);
3407 if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3408 autoneg = sc->advspeed;
3410 autoneg = hw->phy.autoneg_advertised;
3411 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3414 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3416 if (hw->mac.ops.setup_link != NULL)
3417 hw->mac.ops.setup_link(hw, autoneg, TRUE);
3421 ix_handle_phy(struct ix_softc *sc)
3423 struct ixgbe_hw *hw = &sc->hw;
3426 error = hw->phy.ops.handle_lasi(hw);
3427 if (error == IXGBE_ERR_OVERTEMP) {
3428 if_printf(&sc->arpcom.ac_if,
3429 "CRITICAL: EXTERNAL PHY OVER TEMP!! "
3430 "PHY will downshift to lower power state!\n");
3432 if_printf(&sc->arpcom.ac_if,
3433 "Error handling LASI interrupt: %d\n", error);
3438 ix_update_stats(struct ix_softc *sc)
3440 struct ifnet *ifp = &sc->arpcom.ac_if;
3441 struct ixgbe_hw *hw = &sc->hw;
3442 uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3443 uint64_t total_missed_rx = 0;
3446 sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3447 sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3448 sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3449 sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3451 for (i = 0; i < 16; i++) {
3452 sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3453 sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3454 sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3456 sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3457 sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3458 sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3460 /* Hardware workaround, gprc counts missed packets */
3461 sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3462 sc->stats.gprc -= missed_rx;
3464 if (hw->mac.type != ixgbe_mac_82598EB) {
3465 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3466 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3467 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3468 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3469 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3470 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3471 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3472 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3474 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3475 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3476 /* 82598 only has a counter in the high register */
3477 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3478 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3479 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3483 * Workaround: mprc hardware is incorrectly counting
3484 * broadcasts, so for now we subtract those.
3486 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3487 sc->stats.bprc += bprc;
3488 sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3489 if (hw->mac.type == ixgbe_mac_82598EB)
3490 sc->stats.mprc -= bprc;
3492 sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3493 sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3494 sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3495 sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3496 sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3497 sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3499 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3500 sc->stats.lxontxc += lxon;
3501 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3502 sc->stats.lxofftxc += lxoff;
3503 total = lxon + lxoff;
3505 sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3506 sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3507 sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3508 sc->stats.gptc -= total;
3509 sc->stats.mptc -= total;
3510 sc->stats.ptc64 -= total;
3511 sc->stats.gotc -= total * ETHER_MIN_LEN;
3513 sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3514 sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3515 sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3516 sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3517 sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3518 sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3519 sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3520 sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3521 sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3522 sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3523 sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3524 sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3525 sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3526 sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3527 sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3528 sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3529 sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3530 sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3531 /* Only read FCOE on 82599 */
3532 if (hw->mac.type != ixgbe_mac_82598EB) {
3533 sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3534 sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3535 sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3536 sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3537 sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3541 IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3542 IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3547 * Add sysctl variables, one per statistic, to the system.
3550 ix_add_hw_stats(struct ix_softc *sc)
3553 device_t dev = sc->dev;
3555 struct ix_tx_ring *txr = sc->tx_rings;
3556 struct ix_rx_ring *rxr = sc->rx_rings;
3558 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3559 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3560 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3561 struct ixgbe_hw_stats *stats = &sc->stats;
3563 struct sysctl_oid *stat_node, *queue_node;
3564 struct sysctl_oid_list *stat_list, *queue_list;
3566 #define QUEUE_NAME_LEN 32
3567 char namebuf[QUEUE_NAME_LEN];
3569 /* MAC stats get the own sub node */
3571 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3572 CTLFLAG_RD, NULL, "MAC Statistics");
3573 stat_list = SYSCTL_CHILDREN(stat_node);
3575 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3576 CTLFLAG_RD, &stats->crcerrs,
3578 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3579 CTLFLAG_RD, &stats->illerrc,
3580 "Illegal Byte Errors");
3581 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3582 CTLFLAG_RD, &stats->errbc,
3584 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3585 CTLFLAG_RD, &stats->mspdc,
3586 "MAC Short Packets Discarded");
3587 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3588 CTLFLAG_RD, &stats->mlfc,
3589 "MAC Local Faults");
3590 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3591 CTLFLAG_RD, &stats->mrfc,
3592 "MAC Remote Faults");
3593 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3594 CTLFLAG_RD, &stats->rlec,
3595 "Receive Length Errors");
3597 /* Flow Control stats */
3598 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3599 CTLFLAG_RD, &stats->lxontxc,
3600 "Link XON Transmitted");
3601 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3602 CTLFLAG_RD, &stats->lxonrxc,
3603 "Link XON Received");
3604 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3605 CTLFLAG_RD, &stats->lxofftxc,
3606 "Link XOFF Transmitted");
3607 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3608 CTLFLAG_RD, &stats->lxoffrxc,
3609 "Link XOFF Received");
3611 /* Packet Reception Stats */
3612 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3613 CTLFLAG_RD, &stats->tor,
3614 "Total Octets Received");
3615 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3616 CTLFLAG_RD, &stats->gorc,
3617 "Good Octets Received");
3618 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3619 CTLFLAG_RD, &stats->tpr,
3620 "Total Packets Received");
3621 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3622 CTLFLAG_RD, &stats->gprc,
3623 "Good Packets Received");
3624 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3625 CTLFLAG_RD, &stats->mprc,
3626 "Multicast Packets Received");
3627 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3628 CTLFLAG_RD, &stats->bprc,
3629 "Broadcast Packets Received");
3630 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3631 CTLFLAG_RD, &stats->prc64,
3632 "64 byte frames received ");
3633 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3634 CTLFLAG_RD, &stats->prc127,
3635 "65-127 byte frames received");
3636 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3637 CTLFLAG_RD, &stats->prc255,
3638 "128-255 byte frames received");
3639 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3640 CTLFLAG_RD, &stats->prc511,
3641 "256-511 byte frames received");
3642 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3643 CTLFLAG_RD, &stats->prc1023,
3644 "512-1023 byte frames received");
3645 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3646 CTLFLAG_RD, &stats->prc1522,
3647 "1023-1522 byte frames received");
3648 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3649 CTLFLAG_RD, &stats->ruc,
3650 "Receive Undersized");
3651 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3652 CTLFLAG_RD, &stats->rfc,
3653 "Fragmented Packets Received ");
3654 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3655 CTLFLAG_RD, &stats->roc,
3656 "Oversized Packets Received");
3657 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3658 CTLFLAG_RD, &stats->rjc,
3660 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3661 CTLFLAG_RD, &stats->mngprc,
3662 "Management Packets Received");
3663 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3664 CTLFLAG_RD, &stats->mngptc,
3665 "Management Packets Dropped");
3666 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3667 CTLFLAG_RD, &stats->xec,
3670 /* Packet Transmission Stats */
3671 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3672 CTLFLAG_RD, &stats->gotc,
3673 "Good Octets Transmitted");
3674 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3675 CTLFLAG_RD, &stats->tpt,
3676 "Total Packets Transmitted");
3677 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3678 CTLFLAG_RD, &stats->gptc,
3679 "Good Packets Transmitted");
3680 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3681 CTLFLAG_RD, &stats->bptc,
3682 "Broadcast Packets Transmitted");
3683 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3684 CTLFLAG_RD, &stats->mptc,
3685 "Multicast Packets Transmitted");
3686 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3687 CTLFLAG_RD, &stats->mngptc,
3688 "Management Packets Transmitted");
3689 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3690 CTLFLAG_RD, &stats->ptc64,
3691 "64 byte frames transmitted ");
3692 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3693 CTLFLAG_RD, &stats->ptc127,
3694 "65-127 byte frames transmitted");
3695 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3696 CTLFLAG_RD, &stats->ptc255,
3697 "128-255 byte frames transmitted");
3698 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3699 CTLFLAG_RD, &stats->ptc511,
3700 "256-511 byte frames transmitted");
3701 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3702 CTLFLAG_RD, &stats->ptc1023,
3703 "512-1023 byte frames transmitted");
3704 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3705 CTLFLAG_RD, &stats->ptc1522,
3706 "1024-1522 byte frames transmitted");
3711 * Enable the hardware to drop packets when the buffer is full.
3712 * This is useful when multiple RX rings are used, so that no
3713 * single RX ring being full stalls the entire RX engine. We
3714 * only enable this when multiple RX rings are used and when
3715 * flow control is disabled.
3718 ix_enable_rx_drop(struct ix_softc *sc)
3720 struct ixgbe_hw *hw = &sc->hw;
3724 if_printf(&sc->arpcom.ac_if,
3725 "flow control %s, enable RX drop\n",
3726 ix_fc2str(sc->hw.fc.current_mode));
3729 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3730 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3732 srrctl |= IXGBE_SRRCTL_DROP_EN;
3733 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3738 ix_disable_rx_drop(struct ix_softc *sc)
3740 struct ixgbe_hw *hw = &sc->hw;
3744 if_printf(&sc->arpcom.ac_if,
3745 "flow control %s, disable RX drop\n",
3746 ix_fc2str(sc->hw.fc.current_mode));
3749 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3750 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3752 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3753 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3758 ix_setup_serialize(struct ix_softc *sc)
3762 /* Main + RX + TX */
3763 sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3765 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3766 M_DEVBUF, M_WAITOK | M_ZERO);
3771 * NOTE: Order is critical
3774 KKASSERT(i < sc->nserialize);
3775 sc->serializes[i++] = &sc->main_serialize;
3777 for (j = 0; j < sc->rx_ring_cnt; ++j) {
3778 KKASSERT(i < sc->nserialize);
3779 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3782 for (j = 0; j < sc->tx_ring_cnt; ++j) {
3783 KKASSERT(i < sc->nserialize);
3784 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3787 KKASSERT(i == sc->nserialize);
3791 ix_alloc_intr(struct ix_softc *sc)
3793 struct ix_intr_data *intr;
3794 struct ix_tx_ring *txr;
3799 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3800 ix_set_ring_inuse(sc, FALSE);
3805 * Reset some settings changed by ix_alloc_msix().
3807 if (sc->rx_rmap_intr != NULL) {
3808 if_ringmap_free(sc->rx_rmap_intr);
3809 sc->rx_rmap_intr = NULL;
3811 if (sc->tx_rmap_intr != NULL) {
3812 if_ringmap_free(sc->tx_rmap_intr);
3813 sc->tx_rmap_intr = NULL;
3815 if (sc->intr_data != NULL) {
3816 kfree(sc->intr_data, M_DEVBUF);
3817 sc->intr_data = NULL;
3819 for (i = 0; i < sc->tx_ring_cnt; ++i) {
3820 txr = &sc->tx_rings[i];
3821 txr->tx_intr_vec = -1;
3822 txr->tx_intr_cpuid = -1;
3824 for (i = 0; i < sc->rx_ring_cnt; ++i) {
3825 struct ix_rx_ring *rxr = &sc->rx_rings[i];
3827 rxr->rx_intr_vec = -1;
3832 sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3834 intr = &sc->intr_data[0];
3837 * Allocate MSI/legacy interrupt resource
3839 sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3840 &intr->intr_rid, &intr_flags);
3842 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3843 &intr->intr_rid, intr_flags);
3844 if (intr->intr_res == NULL) {
3845 device_printf(sc->dev, "Unable to allocate bus resource: "
3850 intr->intr_serialize = &sc->main_serialize;
3851 intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3852 intr->intr_func = ix_intr;
3853 intr->intr_funcarg = sc;
3854 intr->intr_rate = IX_INTR_RATE;
3855 intr->intr_use = IX_INTR_USE_RXTX;
3857 sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3858 sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3860 sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3862 ix_set_ring_inuse(sc, FALSE);
3864 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3865 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3866 sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3869 * Allocate RX ring map for RSS setup.
3871 sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3872 IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3873 KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3874 sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3877 for (i = 0; i < sc->tx_ring_cnt; ++i) {
3878 txr = &sc->tx_rings[i];
3879 if (txr->tx_intr_cpuid < 0)
3880 txr->tx_intr_cpuid = 0;
3886 ix_free_intr(struct ix_softc *sc)
3888 if (sc->intr_data == NULL)
3891 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3892 struct ix_intr_data *intr = &sc->intr_data[0];
3894 KKASSERT(sc->intr_cnt == 1);
3895 if (intr->intr_res != NULL) {
3896 bus_release_resource(sc->dev, SYS_RES_IRQ,
3897 intr->intr_rid, intr->intr_res);
3899 if (sc->intr_type == PCI_INTR_TYPE_MSI)
3900 pci_release_msi(sc->dev);
3902 kfree(sc->intr_data, M_DEVBUF);
3904 ix_free_msix(sc, TRUE);
3909 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3911 sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3912 sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3914 if_printf(&sc->arpcom.ac_if,
3915 "RX rings %d/%d, TX rings %d/%d\n",
3916 sc->rx_ring_inuse, sc->rx_ring_cnt,
3917 sc->tx_ring_inuse, sc->tx_ring_cnt);
3922 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3924 if (!IX_ENABLE_HWRSS(sc))
3928 return sc->rx_ring_cnt;
3929 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3930 return IX_MIN_RXRING_RSS;
3932 return sc->rx_ring_msix;
3936 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3938 if (!IX_ENABLE_HWTSS(sc))
3942 return sc->tx_ring_cnt;
3943 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3946 return sc->tx_ring_msix;
3950 ix_setup_intr(struct ix_softc *sc)
3954 for (i = 0; i < sc->intr_cnt; ++i) {
3955 struct ix_intr_data *intr = &sc->intr_data[i];
3958 error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3959 INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3960 &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3962 device_printf(sc->dev, "can't setup %dth intr\n", i);
3963 ix_teardown_intr(sc, i);
3971 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3975 if (sc->intr_data == NULL)
3978 for (i = 0; i < intr_cnt; ++i) {
3979 struct ix_intr_data *intr = &sc->intr_data[i];
3981 bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3986 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3988 struct ix_softc *sc = ifp->if_softc;
3990 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3994 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3996 struct ix_softc *sc = ifp->if_softc;
3998 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4002 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4004 struct ix_softc *sc = ifp->if_softc;
4006 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4012 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4013 boolean_t serialized)
4015 struct ix_softc *sc = ifp->if_softc;
4017 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
4021 #endif /* INVARIANTS */
4024 ix_free_rings(struct ix_softc *sc)
4028 if (sc->tx_rings != NULL) {
4029 for (i = 0; i < sc->tx_ring_cnt; ++i) {
4030 struct ix_tx_ring *txr = &sc->tx_rings[i];
4032 ix_destroy_tx_ring(txr, txr->tx_ndesc);
4034 kfree(sc->tx_rings, M_DEVBUF);
4037 if (sc->rx_rings != NULL) {
4038 for (i =0; i < sc->rx_ring_cnt; ++i) {
4039 struct ix_rx_ring *rxr = &sc->rx_rings[i];
4041 ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
4043 kfree(sc->rx_rings, M_DEVBUF);
4046 if (sc->parent_tag != NULL)
4047 bus_dma_tag_destroy(sc->parent_tag);
4051 ix_watchdog_reset(struct ix_softc *sc)
4055 ASSERT_IFNET_SERIALIZED_ALL(&sc->arpcom.ac_if);
4057 for (i = 0; i < sc->tx_ring_inuse; ++i)
4058 ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
4062 ix_sync_netisr(struct ix_softc *sc, int flags)
4064 struct ifnet *ifp = &sc->arpcom.ac_if;
4066 ifnet_serialize_all(ifp);
4067 if (ifp->if_flags & IFF_RUNNING) {
4068 ifp->if_flags &= ~(IFF_RUNNING | flags);
4070 ifnet_deserialize_all(ifp);
4073 ifnet_deserialize_all(ifp);
4075 /* Make sure that polling stopped. */
4076 netmsg_service_sync();
4080 ix_watchdog_task(void *xsc, int pending __unused)
4082 struct ix_softc *sc = xsc;
4083 struct ifnet *ifp = &sc->arpcom.ac_if;
4085 ix_sync_netisr(sc, 0);
4087 ifnet_serialize_all(ifp);
4088 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP)
4089 ix_watchdog_reset(sc);
4090 ifnet_deserialize_all(ifp);
4094 ix_watchdog(struct ifaltq_subque *ifsq)
4096 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
4097 struct ifnet *ifp = ifsq_get_ifp(ifsq);
4098 struct ix_softc *sc = ifp->if_softc;
4100 KKASSERT(txr->tx_ifsq == ifsq);
4101 ASSERT_IFNET_SERIALIZED_ALL(ifp);
4104 * If the interface has been paused then don't do the watchdog check
4106 if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
4107 txr->tx_watchdog.wd_timer = 5;
4111 if_printf(ifp, "Watchdog timeout -- resetting\n");
4112 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
4113 IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
4114 IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
4115 if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
4116 txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
4118 if ((ifp->if_flags & (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING)) ==
4119 (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING))
4120 taskqueue_enqueue(taskqueue_thread[0], &sc->wdog_task);
4122 ix_watchdog_reset(sc);
4126 ix_free_tx_ring(struct ix_tx_ring *txr)
4130 for (i = 0; i < txr->tx_ndesc; ++i) {
4131 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
4133 if (txbuf->m_head != NULL)
4134 ix_free_txbuf(txr, txbuf);
4139 ix_free_rx_ring(struct ix_rx_ring *rxr)
4143 for (i = 0; i < rxr->rx_ndesc; ++i) {
4144 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
4146 if (rxbuf->fmp != NULL) {
4147 m_freem(rxbuf->fmp);
4151 KKASSERT(rxbuf->lmp == NULL);
4153 if (rxbuf->m_head != NULL) {
4154 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4155 m_freem(rxbuf->m_head);
4156 rxbuf->m_head = NULL;
4162 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
4165 bus_dma_segment_t seg;
4167 struct ix_rx_buf *rxbuf;
4168 int flags, error, nseg;
4171 if (__predict_false(wait))
4174 m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4177 if_printf(&rxr->rx_sc->arpcom.ac_if,
4178 "Unable to allocate RX mbuf\n");
4182 m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4184 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4185 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4189 if_printf(&rxr->rx_sc->arpcom.ac_if,
4190 "Unable to load RX mbuf\n");
4195 rxbuf = &rxr->rx_buf[i];
4196 if (rxbuf->m_head != NULL)
4197 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4200 rxbuf->map = rxr->rx_sparemap;
4201 rxr->rx_sparemap = map;
4204 rxbuf->paddr = seg.ds_addr;
4206 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4211 ix_add_sysctl(struct ix_softc *sc)
4213 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4214 struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4218 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4219 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4220 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4221 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4222 "# of RX rings used");
4223 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4224 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4225 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4226 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4227 "# of TX rings used");
4228 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4229 OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4230 sc, 0, ix_sysctl_rxd, "I",
4232 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4233 OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4234 sc, 0, ix_sysctl_txd, "I",
4236 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4237 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4238 sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4239 "# of segments sent before write to hardware register");
4240 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4241 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4242 sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4243 "# of received segments sent before write to hardware register");
4244 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4245 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4246 sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4247 "# of segments per TX interrupt");
4248 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4249 OID_AUTO, "direct_input", CTLFLAG_RW, &sc->direct_input, 0,
4250 "Enable direct input");
4251 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4252 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4253 OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4254 sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4255 "TX MSI-X CPU map");
4256 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4257 OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4258 sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4259 "RX MSI-X CPU map");
4261 #ifdef IFPOLL_ENABLE
4262 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4263 OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4264 sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4265 "TX polling CPU map");
4266 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4267 OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4268 sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4269 "RX polling CPU map");
4272 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4274 ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4275 ix_sysctl_##name, #use " interrupt rate"); \
4278 IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4279 IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4280 IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4281 IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4283 #undef IX_ADD_INTR_RATE_SYSCTL
4286 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4287 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4289 for (i = 0; i < sc->rx_ring_cnt; ++i) {
4290 ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4291 SYSCTL_ADD_ULONG(ctx,
4292 SYSCTL_CHILDREN(tree), OID_AUTO, node,
4293 CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4296 for (i = 0; i < sc->tx_ring_cnt; ++i) {
4297 struct ix_tx_ring *txr = &sc->tx_rings[i];
4299 ksnprintf(node, sizeof(node), "tx%d_nmbuf", i);
4300 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4301 CTLTYPE_INT | CTLFLAG_RD, txr, 0, ix_sysctl_tx_nmbuf, "I",
4302 "# of pending TX mbufs");
4304 ksnprintf(node, sizeof(node), "tx%d_gc", i);
4305 SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4306 CTLFLAG_RW, &txr->tx_gc, "# of TX desc GC");
4310 ix_add_hw_stats(sc);
4316 ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS)
4318 struct ix_tx_ring *txr = (void *)arg1;
4321 nmbuf = txr->tx_nmbuf;
4322 return (sysctl_handle_int(oidp, &nmbuf, 0, req));
4326 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4328 struct ix_softc *sc = (void *)arg1;
4329 struct ifnet *ifp = &sc->arpcom.ac_if;
4330 int error, nsegs, i;
4332 nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4333 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4334 if (error || req->newptr == NULL)
4339 ifnet_serialize_all(ifp);
4340 for (i = 0; i < sc->tx_ring_cnt; ++i)
4341 sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4342 ifnet_deserialize_all(ifp);
4348 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4350 struct ix_softc *sc = (void *)arg1;
4351 struct ifnet *ifp = &sc->arpcom.ac_if;
4352 int error, nsegs, i;
4354 nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4355 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4356 if (error || req->newptr == NULL)
4361 ifnet_serialize_all(ifp);
4362 for (i = 0; i < sc->rx_ring_cnt; ++i)
4363 sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4364 ifnet_deserialize_all(ifp);
4370 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4372 struct ix_softc *sc = (void *)arg1;
4375 txd = sc->tx_rings[0].tx_ndesc;
4376 return sysctl_handle_int(oidp, &txd, 0, req);
4380 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4382 struct ix_softc *sc = (void *)arg1;
4385 rxd = sc->rx_rings[0].rx_ndesc;
4386 return sysctl_handle_int(oidp, &rxd, 0, req);
4390 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4392 struct ix_softc *sc = (void *)arg1;
4393 struct ifnet *ifp = &sc->arpcom.ac_if;
4394 struct ix_tx_ring *txr = &sc->tx_rings[0];
4397 nsegs = txr->tx_intr_nsegs;
4398 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4399 if (error || req->newptr == NULL)
4404 ifnet_serialize_all(ifp);
4406 if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4412 for (i = 0; i < sc->tx_ring_cnt; ++i)
4413 sc->tx_rings[i].tx_intr_nsegs = nsegs;
4416 ifnet_deserialize_all(ifp);
4422 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4424 uint32_t eitr, eitr_intvl;
4426 eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4427 eitr_intvl = 1000000000 / 256 / rate;
4429 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4430 eitr &= ~IX_EITR_INTVL_MASK_82598;
4431 if (eitr_intvl == 0)
4433 else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4434 eitr_intvl = IX_EITR_INTVL_MASK_82598;
4436 eitr &= ~IX_EITR_INTVL_MASK;
4438 eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4439 if (eitr_intvl == 0)
4440 eitr_intvl = IX_EITR_INTVL_MIN;
4441 else if (eitr_intvl > IX_EITR_INTVL_MAX)
4442 eitr_intvl = IX_EITR_INTVL_MAX;
4446 IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4450 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4452 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4456 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4458 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4462 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4464 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4468 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4470 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4474 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4476 struct ix_softc *sc = (void *)arg1;
4477 struct ifnet *ifp = &sc->arpcom.ac_if;
4481 for (i = 0; i < sc->intr_cnt; ++i) {
4482 if (sc->intr_data[i].intr_use == use) {
4483 rate = sc->intr_data[i].intr_rate;
4488 error = sysctl_handle_int(oidp, &rate, 0, req);
4489 if (error || req->newptr == NULL)
4494 ifnet_serialize_all(ifp);
4496 for (i = 0; i < sc->intr_cnt; ++i) {
4497 if (sc->intr_data[i].intr_use == use) {
4498 sc->intr_data[i].intr_rate = rate;
4499 if (ifp->if_flags & IFF_RUNNING)
4500 ix_set_eitr(sc, i, rate);
4504 ifnet_deserialize_all(ifp);
4510 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4511 const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4515 for (i = 0; i < sc->intr_cnt; ++i) {
4516 if (sc->intr_data[i].intr_use == use) {
4517 SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4518 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4519 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4520 sc, 0, handler, "I", desc);
4527 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4529 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4530 sc->timer_cpuid = 0; /* XXX fixed */
4532 sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4536 ix_alloc_msix(struct ix_softc *sc)
4538 int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4539 struct ix_intr_data *intr;
4541 int ring_cnt, ring_cntmax;
4542 boolean_t setup = FALSE;
4544 msix_enable = ix_msix_enable;
4546 * Don't enable MSI-X on 82598 by default, see:
4547 * 82598 specification update errata #38
4549 if (sc->hw.mac.type == ixgbe_mac_82598EB)
4551 msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4555 msix_cnt = pci_msix_count(sc->dev);
4556 #ifdef IX_MSIX_DEBUG
4557 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4559 if (msix_cnt <= 1) {
4560 /* One MSI-X model does not make sense. */
4565 * Make sure that we don't break interrupt related registers
4566 * (EIMS, etc) limitation.
4568 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4569 if (msix_cnt > IX_MAX_MSIX_82598)
4570 msix_cnt = IX_MAX_MSIX_82598;
4572 if (msix_cnt > IX_MAX_MSIX)
4573 msix_cnt = IX_MAX_MSIX;
4576 device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4577 msix_ring = msix_cnt - 1; /* -1 for status */
4580 * Configure # of RX/TX rings usable by MSI-X.
4582 ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4583 if (ring_cntmax > msix_ring)
4584 ring_cntmax = msix_ring;
4585 sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4587 ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4588 if (ring_cntmax > msix_ring)
4589 ring_cntmax = msix_ring;
4590 sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4592 if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4593 sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4594 KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4595 ("total RX ring count %d, MSI-X RX ring count %d",
4596 sc->rx_ring_cnt, sc->rx_ring_msix));
4597 sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4598 KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4599 ("total TX ring count %d, MSI-X TX ring count %d",
4600 sc->tx_ring_cnt, sc->tx_ring_msix));
4603 * Aggregate TX/RX MSI-X
4605 ring_cntmax = sc->rx_ring_msix;
4606 if (ring_cntmax < sc->tx_ring_msix)
4607 ring_cntmax = sc->tx_ring_msix;
4608 KASSERT(ring_cntmax <= msix_ring,
4609 ("invalid ring count max %d, MSI-X count for rings %d",
4610 ring_cntmax, msix_ring));
4612 alloc_cnt = ring_cntmax + 1; /* +1 for status */
4614 device_printf(sc->dev, "MSI-X alloc %d, "
4615 "RX ring %d, TX ring %d\n", alloc_cnt,
4616 sc->rx_ring_msix, sc->tx_ring_msix);
4619 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4620 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4621 &sc->msix_mem_rid, RF_ACTIVE);
4622 if (sc->msix_mem_res == NULL) {
4623 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4624 sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4625 SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4626 if (sc->msix_mem_res == NULL) {
4627 device_printf(sc->dev, "Unable to map MSI-X table\n");
4632 sc->intr_cnt = alloc_cnt;
4633 sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4634 M_DEVBUF, M_WAITOK | M_ZERO);
4635 for (x = 0; x < sc->intr_cnt; ++x) {
4636 intr = &sc->intr_data[x];
4637 intr->intr_rid = -1;
4638 intr->intr_rate = IX_INTR_RATE;
4642 for (i = 0; i < sc->rx_ring_msix; ++i) {
4643 struct ix_rx_ring *rxr = &sc->rx_rings[i];
4644 struct ix_tx_ring *txr = NULL;
4647 KKASSERT(x < sc->intr_cnt);
4648 rxr->rx_intr_vec = x;
4649 ix_setup_msix_eims(sc, x,
4650 &rxr->rx_eims, &rxr->rx_eims_val);
4652 cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4655 * Try finding TX ring to piggyback.
4657 for (j = 0; j < sc->tx_ring_msix; ++j) {
4659 if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4660 txr = &sc->tx_rings[j];
4661 KKASSERT(txr->tx_intr_cpuid < 0);
4667 intr = &sc->intr_data[x++];
4668 intr->intr_serialize = &rxr->rx_serialize;
4670 ksnprintf(intr->intr_desc0,
4671 sizeof(intr->intr_desc0), "%s rx%dtx%d",
4672 device_get_nameunit(sc->dev), i, txr->tx_idx);
4673 intr->intr_use = IX_INTR_USE_RXTX;
4674 intr->intr_func = ix_msix_rxtx;
4676 ksnprintf(intr->intr_desc0,
4677 sizeof(intr->intr_desc0), "%s rx%d",
4678 device_get_nameunit(sc->dev), i);
4679 intr->intr_rate = IX_MSIX_RX_RATE;
4680 intr->intr_use = IX_INTR_USE_RX;
4681 intr->intr_func = ix_msix_rx;
4683 intr->intr_funcarg = rxr;
4684 intr->intr_cpuid = cpuid;
4685 KKASSERT(intr->intr_cpuid < netisr_ncpus);
4686 intr->intr_desc = intr->intr_desc0;
4689 txr->tx_intr_cpuid = intr->intr_cpuid;
4690 /* NOTE: Leave TX ring's intr_vec negative. */
4694 for (i = 0; i < sc->tx_ring_msix; ++i) {
4695 struct ix_tx_ring *txr = &sc->tx_rings[i];
4697 if (txr->tx_intr_cpuid >= 0) {
4698 /* Piggybacked by RX ring. */
4702 KKASSERT(x < sc->intr_cnt);
4703 txr->tx_intr_vec = x;
4704 ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4706 intr = &sc->intr_data[x++];
4707 intr->intr_serialize = &txr->tx_serialize;
4708 intr->intr_rate = IX_MSIX_TX_RATE;
4709 intr->intr_use = IX_INTR_USE_TX;
4710 intr->intr_func = ix_msix_tx;
4711 intr->intr_funcarg = txr;
4712 intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4713 KKASSERT(intr->intr_cpuid < netisr_ncpus);
4714 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4715 device_get_nameunit(sc->dev), i);
4716 intr->intr_desc = intr->intr_desc0;
4718 txr->tx_intr_cpuid = intr->intr_cpuid;
4724 KKASSERT(x < sc->intr_cnt);
4725 sc->sts_msix_vec = x;
4727 intr = &sc->intr_data[x++];
4729 intr->intr_serialize = &sc->main_serialize;
4730 intr->intr_func = ix_msix_status;
4731 intr->intr_funcarg = sc;
4732 intr->intr_cpuid = 0;
4733 intr->intr_use = IX_INTR_USE_STATUS;
4735 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4736 device_get_nameunit(sc->dev));
4737 intr->intr_desc = intr->intr_desc0;
4739 KKASSERT(x == sc->intr_cnt);
4741 error = pci_setup_msix(sc->dev);
4743 device_printf(sc->dev, "Setup MSI-X failed\n");
4748 for (i = 0; i < sc->intr_cnt; ++i) {
4749 intr = &sc->intr_data[i];
4751 error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4754 device_printf(sc->dev,
4755 "Unable to allocate MSI-X %d on cpu%d\n", i,
4760 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4761 &intr->intr_rid, RF_ACTIVE);
4762 if (intr->intr_res == NULL) {
4763 device_printf(sc->dev,
4764 "Unable to allocate MSI-X %d resource\n", i);
4770 pci_enable_msix(sc->dev);
4771 sc->intr_type = PCI_INTR_TYPE_MSIX;
4774 ix_free_msix(sc, setup);
4778 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4782 KKASSERT(sc->intr_cnt > 1);
4784 for (i = 0; i < sc->intr_cnt; ++i) {
4785 struct ix_intr_data *intr = &sc->intr_data[i];
4787 if (intr->intr_res != NULL) {
4788 bus_release_resource(sc->dev, SYS_RES_IRQ,
4789 intr->intr_rid, intr->intr_res);
4791 if (intr->intr_rid >= 0)
4792 pci_release_msix_vector(sc->dev, intr->intr_rid);
4795 pci_teardown_msix(sc->dev);
4798 kfree(sc->intr_data, M_DEVBUF);
4799 sc->intr_data = NULL;
4803 ix_msix_rx(void *xrxr)
4805 struct ix_rx_ring *rxr = xrxr;
4807 ASSERT_SERIALIZED(&rxr->rx_serialize);
4810 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4814 ix_msix_tx(void *xtxr)
4816 struct ix_tx_ring *txr = xtxr;
4818 ASSERT_SERIALIZED(&txr->tx_serialize);
4820 ix_tx_intr(txr, *(txr->tx_hdr));
4821 IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4825 ix_msix_rxtx(void *xrxr)
4827 struct ix_rx_ring *rxr = xrxr;
4828 struct ix_tx_ring *txr;
4831 ASSERT_SERIALIZED(&rxr->rx_serialize);
4837 * Since tx_next_clean is only changed by ix_txeof(),
4838 * which is called only in interrupt handler, the
4839 * check w/o holding tx serializer is MPSAFE.
4842 hdr = *(txr->tx_hdr);
4843 if (hdr != txr->tx_next_clean) {
4844 lwkt_serialize_enter(&txr->tx_serialize);
4845 ix_tx_intr(txr, hdr);
4846 lwkt_serialize_exit(&txr->tx_serialize);
4849 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4853 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4855 struct ixgbe_hw *hw = &sc->hw;
4857 /* Link status change */
4858 if (eicr & IXGBE_EICR_LSC)
4861 if (hw->mac.type != ixgbe_mac_82598EB) {
4862 if (eicr & IXGBE_EICR_ECC)
4863 if_printf(&sc->arpcom.ac_if, "ECC ERROR!! Reboot!!\n");
4865 /* Check for over temp condition */
4866 if (eicr & IXGBE_EICR_TS) {
4867 if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!! "
4868 "PHY IS SHUT DOWN!! Shutdown!!\n");
4872 if (ix_is_sfp(hw)) {
4875 /* Pluggable optics-related interrupt */
4876 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4877 mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4879 mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4880 if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4882 else if (eicr & mod_mask)
4886 /* Check for fan failure */
4887 if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4888 (eicr & IXGBE_EICR_GPI_SDP1))
4889 if_printf(&sc->arpcom.ac_if, "FAN FAILURE!! Replace!!\n");
4891 /* External PHY interrupt */
4892 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4893 (eicr & IXGBE_EICR_GPI_SDP0_X540))
4898 ix_msix_status(void *xsc)
4900 struct ix_softc *sc = xsc;
4903 ASSERT_SERIALIZED(&sc->main_serialize);
4905 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4906 ix_intr_status(sc, eicr);
4908 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4912 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4913 uint32_t *eims, uint32_t *eims_val)
4916 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4917 KASSERT(x < IX_MAX_MSIX_82598,
4918 ("%s: invalid vector %d for 82598",
4919 device_get_nameunit(sc->dev), x));
4922 *eims = IXGBE_EIMS_EX(0);
4926 KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4927 device_get_nameunit(sc->dev), x));
4928 KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4929 ("%s: invalid vector %d for 82598",
4930 device_get_nameunit(sc->dev), x));
4931 *eims = IXGBE_EIMS_EX(1);
4932 *eims_val = 1 << (x - 32);
4936 #ifdef IFPOLL_ENABLE
4939 ix_npoll_status(struct ifnet *ifp)
4941 struct ix_softc *sc = ifp->if_softc;
4944 ASSERT_SERIALIZED(&sc->main_serialize);
4946 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4947 ix_intr_status(sc, eicr);
4951 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4953 struct ix_tx_ring *txr = arg;
4955 ASSERT_SERIALIZED(&txr->tx_serialize);
4957 ix_tx_intr(txr, *(txr->tx_hdr));
4958 ix_try_txgc(txr, 1);
4962 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4964 struct ix_rx_ring *rxr = arg;
4966 ASSERT_SERIALIZED(&rxr->rx_serialize);
4967 ix_rxeof(rxr, cycle);
4971 ix_npoll_rx_direct(struct ifnet *ifp __unused, void *arg, int cycle)
4973 struct ix_rx_ring *rxr = arg;
4975 ASSERT_NOT_SERIALIZED(&rxr->rx_serialize);
4976 ix_rxeof(rxr, cycle);
4980 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4982 struct ix_softc *sc = ifp->if_softc;
4983 int i, txr_cnt, rxr_cnt, idirect;
4985 ASSERT_IFNET_SERIALIZED_ALL(ifp);
4987 idirect = sc->direct_input;
4993 info->ifpi_status.status_func = ix_npoll_status;
4994 info->ifpi_status.serializer = &sc->main_serialize;
4996 txr_cnt = ix_get_txring_inuse(sc, TRUE);
4997 for (i = 0; i < txr_cnt; ++i) {
4998 struct ix_tx_ring *txr = &sc->tx_rings[i];
5000 cpu = if_ringmap_cpumap(sc->tx_rmap, i);
5001 KKASSERT(cpu < netisr_ncpus);
5002 info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
5003 info->ifpi_tx[cpu].arg = txr;
5004 info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
5005 ifsq_set_cpuid(txr->tx_ifsq, cpu);
5008 rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
5009 for (i = 0; i < rxr_cnt; ++i) {
5010 struct ix_rx_ring *rxr = &sc->rx_rings[i];
5012 cpu = if_ringmap_cpumap(sc->rx_rmap, i);
5013 KKASSERT(cpu < netisr_ncpus);
5014 info->ifpi_rx[cpu].arg = rxr;
5016 info->ifpi_rx[cpu].poll_func =
5018 info->ifpi_rx[cpu].serializer = NULL;
5020 info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
5021 info->ifpi_rx[cpu].serializer =
5026 ifp->if_flags |= IFF_IDIRECT;
5028 ifp->if_flags &= ~IFF_IDIRECT;
5029 for (i = 0; i < sc->tx_ring_cnt; ++i) {
5030 struct ix_tx_ring *txr = &sc->tx_rings[i];
5032 ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
5035 if (ifp->if_flags & IFF_RUNNING)
5039 #endif /* IFPOLL_ENABLE */
5041 static enum ixgbe_fc_mode
5042 ix_ifmedia2fc(int ifm)
5044 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5047 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5048 return ixgbe_fc_full;
5050 case IFM_ETH_RXPAUSE:
5051 return ixgbe_fc_rx_pause;
5053 case IFM_ETH_TXPAUSE:
5054 return ixgbe_fc_tx_pause;
5057 return ixgbe_fc_none;
5062 ix_ifmedia2str(int ifm)
5064 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5067 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5068 return IFM_ETH_FC_FULL;
5070 case IFM_ETH_RXPAUSE:
5071 return IFM_ETH_FC_RXPAUSE;
5073 case IFM_ETH_TXPAUSE:
5074 return IFM_ETH_FC_TXPAUSE;
5077 return IFM_ETH_FC_NONE;
5082 ix_fc2str(enum ixgbe_fc_mode fc)
5086 return IFM_ETH_FC_FULL;
5088 case ixgbe_fc_rx_pause:
5089 return IFM_ETH_FC_RXPAUSE;
5091 case ixgbe_fc_tx_pause:
5092 return IFM_ETH_FC_TXPAUSE;
5095 return IFM_ETH_FC_NONE;
5100 ix_powerdown(struct ix_softc *sc)
5102 struct ixgbe_hw *hw = &sc->hw;
5105 /* Limit power managment flow to X550EM baseT */
5106 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5107 hw->phy.ops.enter_lplu) {
5108 /* Turn off support for APM wakeup. (Using ACPI instead) */
5109 IXGBE_WRITE_REG(hw, IXGBE_GRC,
5110 IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5113 * Clear Wake Up Status register to prevent any previous wakeup
5114 * events from waking us up immediately after we suspend.
5116 IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5119 * Program the Wakeup Filter Control register with user filter
5122 IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5124 /* Enable wakeups and power management in Wakeup Control */
5125 IXGBE_WRITE_REG(hw, IXGBE_WUC,
5126 IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5128 /* X550EM baseT adapters need a special LPLU flow */
5129 hw->phy.reset_disable = true;
5131 error = hw->phy.ops.enter_lplu(hw);
5133 if_printf(&sc->arpcom.ac_if,
5134 "Error entering LPLU: %d\n", error);
5136 hw->phy.reset_disable = false;
5138 /* Just stop for other adapters */
5145 ix_config_flowctrl(struct ix_softc *sc)
5147 struct ixgbe_hw *hw = &sc->hw;
5148 uint32_t rxpb, frame, size, tmp;
5150 frame = sc->max_frame_size;
5152 /* Calculate High Water */
5153 switch (hw->mac.type) {
5154 case ixgbe_mac_X540:
5155 case ixgbe_mac_X550:
5156 case ixgbe_mac_X550EM_a:
5157 case ixgbe_mac_X550EM_x:
5158 tmp = IXGBE_DV_X540(frame, frame);
5161 tmp = IXGBE_DV(frame, frame);
5164 size = IXGBE_BT2KB(tmp);
5165 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5166 hw->fc.high_water[0] = rxpb - size;
5168 /* Now calculate Low Water */
5169 switch (hw->mac.type) {
5170 case ixgbe_mac_X540:
5171 case ixgbe_mac_X550:
5172 case ixgbe_mac_X550EM_a:
5173 case ixgbe_mac_X550EM_x:
5174 tmp = IXGBE_LOW_DV_X540(frame);
5177 tmp = IXGBE_LOW_DV(frame);
5180 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5182 hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5183 if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5184 hw->fc.disable_fc_autoneg = TRUE;
5186 hw->fc.disable_fc_autoneg = FALSE;
5187 hw->fc.pause_time = IX_FC_PAUSE;
5188 hw->fc.send_xon = TRUE;
5192 ix_config_dmac(struct ix_softc *sc)
5194 struct ixgbe_hw *hw = &sc->hw;
5195 struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5197 if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5200 if ((dcfg->watchdog_timer ^ sc->dmac) ||
5201 (dcfg->link_speed ^ sc->link_speed)) {
5202 dcfg->watchdog_timer = sc->dmac;
5203 dcfg->fcoe_en = false;
5204 dcfg->link_speed = sc->link_speed;
5208 if_printf(&sc->arpcom.ac_if, "dmac settings: "
5209 "watchdog %d, link speed %d\n",
5210 dcfg->watchdog_timer, dcfg->link_speed);
5213 hw->mac.ops.dmac_config(hw);
5218 ix_init_media(struct ix_softc *sc)
5220 struct ixgbe_hw *hw = &sc->hw;
5221 int layer, msf_ifm = IFM_NONE;
5223 ifmedia_removeall(&sc->media);
5225 layer = ixgbe_get_supported_physical_layer(hw);
5228 * Media types with matching DragonFlyBSD media defines
5230 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5231 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5234 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5235 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5238 if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5239 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5241 /* No half-duplex support */
5244 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5245 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5247 msf_ifm = IFM_1000_LX;
5249 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5250 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5252 msf_ifm = IFM_1000_LX;
5254 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5255 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5257 msf_ifm = IFM_1000_SX;
5260 /* Add media for multispeed fiber */
5261 if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5265 hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5266 if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5267 ifmedia_add_nodup(&sc->media,
5268 IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5271 if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5272 (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5273 ifmedia_add_nodup(&sc->media,
5274 IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5276 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5277 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5280 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5281 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5286 * XXX Other (no matching DragonFlyBSD media type):
5287 * To workaround this, we'll assign these completely
5288 * inappropriate media types.
5290 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5291 if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5292 if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5293 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5296 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5297 if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5298 if_printf(&sc->arpcom.ac_if,
5299 "10GbaseKX4 mapped to 10GbaseCX4\n");
5300 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5303 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5304 if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5305 if_printf(&sc->arpcom.ac_if,
5306 "1000baseKX mapped to 1000baseCX\n");
5307 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5310 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5311 /* Someday, someone will care about you... */
5312 if_printf(&sc->arpcom.ac_if,
5313 "Media supported: 1000baseBX, ignored\n");
5316 /* XXX we probably don't need this */
5317 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5318 ifmedia_add_nodup(&sc->media,
5319 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5322 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5324 if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5325 int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5327 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5328 sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5329 ifmedia_set(&sc->media, sc->ifm_media);