2 * Copyright (c) 2001-2014, Intel Corporation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the Intel Corporation nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
52 #include <net/ethernet.h>
54 #include <net/if_arp.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/ifq_var.h>
58 #include <net/toeplitz.h>
59 #include <net/toeplitz2.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/vlan/if_vlan_ether.h>
62 #include <net/if_poll.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/in.h>
66 #include <netinet/ip.h>
68 #include <bus/pci/pcivar.h>
69 #include <bus/pci/pcireg.h>
71 #include <dev/netif/ix/ixgbe_api.h>
72 #include <dev/netif/ix/if_ix.h>
74 #define IX_IFM_DEFAULT (IFM_ETHER | IFM_AUTO)
77 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
79 if (sc->rss_debug >= lvl) \
80 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
82 #else /* !IX_RSS_DEBUG */
83 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0)
84 #endif /* IX_RSS_DEBUG */
86 #define IX_NAME "Intel(R) PRO/10GbE "
87 #define IX_DEVICE(id) \
88 { IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
89 #define IX_DEVICE_NULL { 0, 0, NULL }
91 static struct ix_device {
96 IX_DEVICE(82598AF_DUAL_PORT),
97 IX_DEVICE(82598AF_SINGLE_PORT),
98 IX_DEVICE(82598EB_CX4),
102 IX_DEVICE(82598_DA_DUAL_PORT),
103 IX_DEVICE(82598_CX4_DUAL_PORT),
104 IX_DEVICE(82598EB_XF_LR),
105 IX_DEVICE(82598_SR_DUAL_PORT_EM),
106 IX_DEVICE(82598EB_SFP_LOM),
107 IX_DEVICE(82599_KX4),
108 IX_DEVICE(82599_KX4_MEZZ),
109 IX_DEVICE(82599_SFP),
110 IX_DEVICE(82599_XAUI_LOM),
111 IX_DEVICE(82599_CX4),
112 IX_DEVICE(82599_T3_LOM),
113 IX_DEVICE(82599_COMBO_BACKPLANE),
114 IX_DEVICE(82599_BACKPLANE_FCOE),
115 IX_DEVICE(82599_SFP_SF2),
116 IX_DEVICE(82599_SFP_FCOE),
117 IX_DEVICE(82599EN_SFP),
118 IX_DEVICE(82599_SFP_SF_QP),
119 IX_DEVICE(82599_QSFP_SF_QP),
123 IX_DEVICE(X550EM_X_KR),
124 IX_DEVICE(X550EM_X_KX4),
125 IX_DEVICE(X550EM_X_10G_T),
127 /* required last entry */
131 static int ix_probe(device_t);
132 static int ix_attach(device_t);
133 static int ix_detach(device_t);
134 static int ix_shutdown(device_t);
136 static void ix_serialize(struct ifnet *, enum ifnet_serialize);
137 static void ix_deserialize(struct ifnet *, enum ifnet_serialize);
138 static int ix_tryserialize(struct ifnet *, enum ifnet_serialize);
140 static void ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
143 static void ix_start(struct ifnet *, struct ifaltq_subque *);
144 static void ix_watchdog(struct ifaltq_subque *);
145 static int ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
146 static void ix_init(void *);
147 static void ix_stop(struct ix_softc *);
148 static void ix_media_status(struct ifnet *, struct ifmediareq *);
149 static int ix_media_change(struct ifnet *);
150 static void ix_timer(void *);
152 static void ix_npoll(struct ifnet *, struct ifpoll_info *);
153 static void ix_npoll_rx(struct ifnet *, void *, int);
154 static void ix_npoll_tx(struct ifnet *, void *, int);
155 static void ix_npoll_status(struct ifnet *);
158 static void ix_add_sysctl(struct ix_softc *);
159 static void ix_add_intr_rate_sysctl(struct ix_softc *, int,
160 const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
161 static int ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
162 static int ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
163 static int ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
164 static int ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
165 static int ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
166 static int ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
167 static int ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
168 static int ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
169 static int ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
170 static int ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
172 static void ix_add_hw_stats(struct ix_softc *);
175 static int ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
176 static int ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
179 static void ix_slot_info(struct ix_softc *);
180 static int ix_alloc_rings(struct ix_softc *);
181 static void ix_free_rings(struct ix_softc *);
182 static void ix_setup_ifp(struct ix_softc *);
183 static void ix_setup_serialize(struct ix_softc *);
184 static void ix_set_ring_inuse(struct ix_softc *, boolean_t);
185 static void ix_set_timer_cpuid(struct ix_softc *, boolean_t);
186 static void ix_update_stats(struct ix_softc *);
188 static void ix_set_promisc(struct ix_softc *);
189 static void ix_set_multi(struct ix_softc *);
190 static void ix_set_vlan(struct ix_softc *);
191 static uint8_t *ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
192 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
193 static const char *ix_ifmedia2str(int);
194 static const char *ix_fc2str(enum ixgbe_fc_mode);
196 static int ix_get_txring_inuse(const struct ix_softc *, boolean_t);
197 static void ix_init_tx_ring(struct ix_tx_ring *);
198 static void ix_free_tx_ring(struct ix_tx_ring *);
199 static int ix_create_tx_ring(struct ix_tx_ring *);
200 static void ix_destroy_tx_ring(struct ix_tx_ring *, int);
201 static void ix_init_tx_unit(struct ix_softc *);
202 static int ix_encap(struct ix_tx_ring *, struct mbuf **,
204 static int ix_tx_ctx_setup(struct ix_tx_ring *,
205 const struct mbuf *, uint32_t *, uint32_t *);
206 static int ix_tso_ctx_setup(struct ix_tx_ring *,
207 const struct mbuf *, uint32_t *, uint32_t *);
208 static void ix_txeof(struct ix_tx_ring *, int);
210 static int ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
211 static int ix_init_rx_ring(struct ix_rx_ring *);
212 static void ix_free_rx_ring(struct ix_rx_ring *);
213 static int ix_create_rx_ring(struct ix_rx_ring *);
214 static void ix_destroy_rx_ring(struct ix_rx_ring *, int);
215 static void ix_init_rx_unit(struct ix_softc *);
217 static void ix_setup_hw_rsc(struct ix_rx_ring *);
219 static int ix_newbuf(struct ix_rx_ring *, int, boolean_t);
220 static void ix_rxeof(struct ix_rx_ring *, int);
221 static void ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
222 static void ix_enable_rx_drop(struct ix_softc *);
223 static void ix_disable_rx_drop(struct ix_softc *);
225 static void ix_alloc_msix(struct ix_softc *);
226 static void ix_free_msix(struct ix_softc *, boolean_t);
227 static void ix_conf_rx_msix(struct ix_softc *, int, int *, int);
228 static void ix_conf_tx_msix(struct ix_softc *, int, int *, int);
229 static void ix_setup_msix_eims(const struct ix_softc *, int,
230 uint32_t *, uint32_t *);
231 static int ix_alloc_intr(struct ix_softc *);
232 static void ix_free_intr(struct ix_softc *);
233 static int ix_setup_intr(struct ix_softc *);
234 static void ix_teardown_intr(struct ix_softc *, int);
235 static void ix_enable_intr(struct ix_softc *);
236 static void ix_disable_intr(struct ix_softc *);
237 static void ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
238 static void ix_set_eitr(struct ix_softc *, int, int);
239 static void ix_intr_status(struct ix_softc *, uint32_t);
240 static void ix_intr(void *);
241 static void ix_msix_rxtx(void *);
242 static void ix_msix_rx(void *);
243 static void ix_msix_tx(void *);
244 static void ix_msix_status(void *);
246 static void ix_config_link(struct ix_softc *);
247 static boolean_t ix_sfp_probe(struct ix_softc *);
248 static boolean_t ix_is_sfp(const struct ixgbe_hw *);
249 static void ix_update_link_status(struct ix_softc *);
250 static void ix_handle_link(struct ix_softc *);
251 static void ix_handle_mod(struct ix_softc *);
252 static void ix_handle_msf(struct ix_softc *);
253 static void ix_handle_phy(struct ix_softc *);
254 static int ix_powerdown(struct ix_softc *);
255 static void ix_config_flowctrl(struct ix_softc *);
256 static void ix_config_dmac(struct ix_softc *);
257 static void ix_init_media(struct ix_softc *);
259 /* XXX Missing shared code prototype */
260 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
262 static device_method_t ix_methods[] = {
263 /* Device interface */
264 DEVMETHOD(device_probe, ix_probe),
265 DEVMETHOD(device_attach, ix_attach),
266 DEVMETHOD(device_detach, ix_detach),
267 DEVMETHOD(device_shutdown, ix_shutdown),
271 static driver_t ix_driver = {
274 sizeof(struct ix_softc)
277 static devclass_t ix_devclass;
279 DECLARE_DUMMY_MODULE(if_ix);
280 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
282 static int ix_msi_enable = 1;
283 static int ix_msix_enable = 1;
284 static int ix_msix_agg_rxtx = 1;
285 static int ix_rxr = 0;
286 static int ix_txr = 0;
287 static int ix_txd = IX_PERF_TXD;
288 static int ix_rxd = IX_PERF_RXD;
289 static int ix_unsupported_sfp = 0;
291 static char ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL;
293 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
294 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
295 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
296 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
297 TUNABLE_INT("hw.ix.txr", &ix_txr);
298 TUNABLE_INT("hw.ix.txd", &ix_txd);
299 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
300 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
301 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
304 * Smart speed setting, default to on. This only works
305 * as a compile option right now as its during attach,
306 * set this to 'ixgbe_smart_speed_off' to disable.
308 static const enum ixgbe_smart_speed ix_smart_speed =
309 ixgbe_smart_speed_on;
312 ix_probe(device_t dev)
314 const struct ix_device *d;
317 vid = pci_get_vendor(dev);
318 did = pci_get_device(dev);
320 for (d = ix_devices; d->desc != NULL; ++d) {
321 if (vid == d->vid && did == d->did) {
322 device_set_desc(dev, d->desc);
330 ix_attach(device_t dev)
332 struct ix_softc *sc = device_get_softc(dev);
334 int error, ring_cnt_max;
338 int offset, offset_def;
340 char flowctrl[IFM_ETH_FC_STRLEN];
342 sc->dev = sc->osdep.dev = dev;
345 if_initname(&sc->arpcom.ac_if, device_get_name(dev),
346 device_get_unit(dev));
347 ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
348 ix_media_change, ix_media_status);
350 /* Save frame size */
351 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
353 callout_init_mp(&sc->timer);
354 lwkt_serialize_init(&sc->main_serialize);
357 * Save off the information about this board
359 hw->vendor_id = pci_get_vendor(dev);
360 hw->device_id = pci_get_device(dev);
361 hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
362 hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
363 hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
365 ixgbe_set_mac_type(hw);
367 /* Pick up the 82599 */
368 if (hw->mac.type != ixgbe_mac_82598EB)
369 hw->phy.smart_speed = ix_smart_speed;
371 /* Enable bus mastering */
372 pci_enable_busmaster(dev);
377 sc->mem_rid = PCIR_BAR(0);
378 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
379 &sc->mem_rid, RF_ACTIVE);
380 if (sc->mem_res == NULL) {
381 device_printf(dev, "Unable to allocate bus resource: memory\n");
386 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
387 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
389 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
390 sc->hw.back = &sc->osdep;
393 * Configure total supported RX/TX ring count
395 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
396 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
397 sc->rx_ring_inuse = sc->rx_ring_cnt;
399 switch (hw->mac.type) {
400 case ixgbe_mac_82598EB:
401 ring_cnt_max = IX_MAX_TXRING_82598;
404 case ixgbe_mac_82599EB:
405 ring_cnt_max = IX_MAX_TXRING_82599;
409 ring_cnt_max = IX_MAX_TXRING_X540;
416 sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
417 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
418 sc->tx_ring_inuse = sc->tx_ring_cnt;
420 /* Allocate TX/RX rings */
421 error = ix_alloc_rings(sc);
427 * NPOLLING RX CPU offset
429 if (sc->rx_ring_cnt == ncpus2) {
432 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
433 offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
434 if (offset >= ncpus2 ||
435 offset % sc->rx_ring_cnt != 0) {
436 device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
441 sc->rx_npoll_off = offset;
444 * NPOLLING TX CPU offset
446 if (sc->tx_ring_cnt == ncpus2) {
449 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
450 offset = device_getenv_int(dev, "npoll.txoff", offset_def);
451 if (offset >= ncpus2 ||
452 offset % sc->tx_ring_cnt != 0) {
453 device_printf(dev, "invalid npoll.txoff %d, use %d\n",
458 sc->tx_npoll_off = offset;
461 /* Allocate interrupt */
462 error = ix_alloc_intr(sc);
466 /* Setup serializes */
467 ix_setup_serialize(sc);
469 /* Allocate multicast array memory. */
470 sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
473 /* Initialize the shared code */
474 hw->allow_unsupported_sfp = ix_unsupported_sfp;
475 error = ixgbe_init_shared_code(hw);
476 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
478 * No optics in this port; ask timer routine
479 * to probe for later insertion.
481 sc->sfp_probe = TRUE;
483 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
484 device_printf(dev, "Unsupported SFP+ module detected!\n");
488 device_printf(dev, "Unable to initialize the shared code\n");
493 /* Make sure we have a good EEPROM before we read from it */
494 if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
495 device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
500 error = ixgbe_init_hw(hw);
501 if (error == IXGBE_ERR_EEPROM_VERSION) {
502 device_printf(dev, "Pre-production device detected\n");
503 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
504 device_printf(dev, "Unsupported SFP+ Module\n");
507 } else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
508 device_printf(dev, "No SFP+ Module found\n");
511 sc->ifm_media = IX_IFM_DEFAULT;
512 /* Get default flow control settings */
513 device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
515 sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
516 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
518 /* Setup OS specific network interface */
521 /* Add sysctl tree */
524 error = ix_setup_intr(sc);
526 ether_ifdetach(&sc->arpcom.ac_if);
530 /* Initialize statistics */
533 /* Check PCIE slot type/speed/width */
536 /* Save initial wake up filter configuration */
537 sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
539 /* Let hardware know driver is loaded */
540 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
541 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
542 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
551 ix_detach(device_t dev)
553 struct ix_softc *sc = device_get_softc(dev);
555 if (device_is_attached(dev)) {
556 struct ifnet *ifp = &sc->arpcom.ac_if;
559 ifnet_serialize_all(ifp);
562 ix_teardown_intr(sc, sc->intr_cnt);
564 ifnet_deserialize_all(ifp);
566 callout_terminate(&sc->timer);
569 /* Let hardware know driver is unloading */
570 ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
571 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
572 IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
575 ifmedia_removeall(&sc->media);
576 bus_generic_detach(dev);
580 if (sc->msix_mem_res != NULL) {
581 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
584 if (sc->mem_res != NULL) {
585 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
592 kfree(sc->mta, M_DEVBUF);
593 if (sc->serializes != NULL)
594 kfree(sc->serializes, M_DEVBUF);
600 ix_shutdown(device_t dev)
602 struct ix_softc *sc = device_get_softc(dev);
603 struct ifnet *ifp = &sc->arpcom.ac_if;
605 ifnet_serialize_all(ifp);
607 ifnet_deserialize_all(ifp);
613 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
615 struct ix_softc *sc = ifp->if_softc;
616 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
620 KKASSERT(txr->tx_ifsq == ifsq);
621 ASSERT_SERIALIZED(&txr->tx_serialize);
623 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
626 if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
631 while (!ifsq_is_empty(ifsq)) {
634 if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
635 ifsq_set_oactive(ifsq);
636 txr->tx_watchdog.wd_timer = 5;
640 m_head = ifsq_dequeue(ifsq);
644 if (ix_encap(txr, &m_head, &nsegs, &idx)) {
645 IFNET_STAT_INC(ifp, oerrors, 1);
650 * TX interrupt are aggressively aggregated, so increasing
651 * opackets at TX interrupt time will make the opackets
652 * statistics vastly inaccurate; we do the opackets increment
655 IFNET_STAT_INC(ifp, opackets, 1);
657 if (nsegs >= txr->tx_wreg_nsegs) {
658 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
663 ETHER_BPF_MTAP(ifp, m_head);
666 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
670 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
672 struct ix_softc *sc = ifp->if_softc;
673 struct ifreq *ifr = (struct ifreq *) data;
674 int error = 0, mask, reinit;
676 ASSERT_IFNET_SERIALIZED_ALL(ifp);
680 if (ifr->ifr_mtu > IX_MAX_MTU) {
683 ifp->if_mtu = ifr->ifr_mtu;
684 sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
690 if (ifp->if_flags & IFF_UP) {
691 if (ifp->if_flags & IFF_RUNNING) {
692 if ((ifp->if_flags ^ sc->if_flags) &
693 (IFF_PROMISC | IFF_ALLMULTI))
698 } else if (ifp->if_flags & IFF_RUNNING) {
701 sc->if_flags = ifp->if_flags;
706 if (ifp->if_flags & IFF_RUNNING) {
710 if ((ifp->if_flags & IFF_NPOLLING) == 0)
718 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
723 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
724 if (mask & IFCAP_RXCSUM) {
725 ifp->if_capenable ^= IFCAP_RXCSUM;
728 if (mask & IFCAP_VLAN_HWTAGGING) {
729 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
732 if (mask & IFCAP_TXCSUM) {
733 ifp->if_capenable ^= IFCAP_TXCSUM;
734 if (ifp->if_capenable & IFCAP_TXCSUM)
735 ifp->if_hwassist |= CSUM_OFFLOAD;
737 ifp->if_hwassist &= ~CSUM_OFFLOAD;
739 if (mask & IFCAP_TSO) {
740 ifp->if_capenable ^= IFCAP_TSO;
741 if (ifp->if_capenable & IFCAP_TSO)
742 ifp->if_hwassist |= CSUM_TSO;
744 ifp->if_hwassist &= ~CSUM_TSO;
746 if (mask & IFCAP_RSS)
747 ifp->if_capenable ^= IFCAP_RSS;
748 if (reinit && (ifp->if_flags & IFF_RUNNING))
755 struct ixgbe_i2c_req i2c;
756 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
759 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
763 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
764 i2c.dev_addr, i2c.data);
765 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
771 error = ether_ioctl(ifp, command, data);
777 #define IXGBE_MHADD_MFS_SHIFT 16
782 struct ix_softc *sc = xsc;
783 struct ifnet *ifp = &sc->arpcom.ac_if;
784 struct ixgbe_hw *hw = &sc->hw;
785 uint32_t gpie, rxctrl;
789 ASSERT_IFNET_SERIALIZED_ALL(ifp);
795 if (ifp->if_flags & IFF_NPOLLING)
799 /* Configure # of used RX/TX rings */
800 ix_set_ring_inuse(sc, polling);
801 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
803 /* Get the latest mac address, User can use a LAA */
804 bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
805 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
806 hw->addr_ctrl.rar_used_count = 1;
808 /* Prepare transmit descriptors and buffers */
809 for (i = 0; i < sc->tx_ring_inuse; ++i)
810 ix_init_tx_ring(&sc->tx_rings[i]);
815 /* Setup Multicast table */
818 /* Prepare receive descriptors and buffers */
819 for (i = 0; i < sc->rx_ring_inuse; ++i) {
820 error = ix_init_rx_ring(&sc->rx_rings[i]);
822 if_printf(ifp, "Could not initialize RX ring%d\n", i);
828 /* Configure RX settings */
831 gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
833 /* Enable Fan Failure Interrupt */
834 gpie |= IXGBE_SDP1_GPIEN_BY_MAC(hw);
836 /* Add for Module detection */
837 if (hw->mac.type == ixgbe_mac_82599EB)
838 gpie |= IXGBE_SDP2_GPIEN;
841 * Thermal Failure Detection (X540)
842 * Link Detection (X552)
844 if (hw->mac.type == ixgbe_mac_X540 ||
845 hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
846 hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
847 gpie |= IXGBE_SDP0_GPIEN_X540;
849 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
850 /* Enable Enhanced MSIX mode */
851 gpie |= IXGBE_GPIE_MSIX_MODE;
852 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
855 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
858 if (ifp->if_mtu > ETHERMTU) {
861 /* aka IXGBE_MAXFRS on 82599 and newer */
862 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
863 mhadd &= ~IXGBE_MHADD_MFS_MASK;
864 mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
865 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
871 for (i = 0; i < sc->tx_ring_inuse; ++i) {
874 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
875 txdctl |= IXGBE_TXDCTL_ENABLE;
878 * Set WTHRESH to 0, since TX head write-back is used
880 txdctl &= ~(0x7f << 16);
883 * When the internal queue falls below PTHRESH (32),
884 * start prefetching as long as there are at least
885 * HTHRESH (1) buffers ready. The values are taken
886 * from the Intel linux driver 3.8.21.
887 * Prefetching enables tx line rate even with 1 queue.
889 txdctl |= (32 << 0) | (1 << 8);
890 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
896 for (i = 0; i < sc->rx_ring_inuse; ++i) {
900 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
901 if (hw->mac.type == ixgbe_mac_82598EB) {
910 rxdctl |= IXGBE_RXDCTL_ENABLE;
911 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
912 for (k = 0; k < 10; ++k) {
913 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
920 IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
921 sc->rx_rings[0].rx_ndesc - 1);
924 /* Enable Receive engine */
925 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
926 if (hw->mac.type == ixgbe_mac_82598EB)
927 rxctrl |= IXGBE_RXCTRL_DMBYPS;
928 rxctrl |= IXGBE_RXCTRL_RXEN;
929 ixgbe_enable_rx_dma(hw, rxctrl);
931 for (i = 0; i < sc->tx_ring_inuse; ++i) {
932 const struct ix_tx_ring *txr = &sc->tx_rings[i];
934 if (txr->tx_intr_vec >= 0) {
935 ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
938 * Unconfigured TX interrupt vector could only
941 KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
942 ("TX intr vector is not set"));
943 KASSERT(i < sc->rx_ring_inuse,
944 ("invalid TX ring %d, no piggyback RX ring", i));
945 KASSERT(sc->rx_rings[i].rx_txr == txr,
946 ("RX ring %d piggybacked TX ring mismatch", i));
948 if_printf(ifp, "IVAR skips TX ring %d\n", i);
951 for (i = 0; i < sc->rx_ring_inuse; ++i) {
952 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
954 KKASSERT(rxr->rx_intr_vec >= 0);
955 ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
956 if (rxr->rx_txr != NULL) {
958 * Piggyback the TX ring interrupt onto the RX
959 * ring interrupt vector.
961 KASSERT(rxr->rx_txr->tx_intr_vec < 0,
962 ("piggybacked TX ring configured intr vector"));
963 KASSERT(rxr->rx_txr->tx_idx == i,
964 ("RX ring %d piggybacked TX ring %u",
965 i, rxr->rx_txr->tx_idx));
966 ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
968 if_printf(ifp, "IVAR RX ring %d piggybacks "
969 "TX ring %u\n", i, rxr->rx_txr->tx_idx);
973 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
974 /* Set up status MSI-X vector; it is using fixed entry 1 */
975 ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
977 /* Set up auto-mask for TX and RX rings */
978 if (hw->mac.type == ixgbe_mac_82598EB) {
979 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
981 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
982 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
985 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
987 for (i = 0; i < sc->intr_cnt; ++i)
988 ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
991 * Check on any SFP devices that need to be kick-started
993 if (hw->phy.type == ixgbe_phy_none) {
994 error = hw->phy.ops.identify(hw);
995 if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
997 "Unsupported SFP+ module type was detected.\n");
1003 /* Config/Enable Link */
1006 /* Hardware Packet Buffer & Flow Control setup */
1007 ix_config_flowctrl(sc);
1009 /* Initialize the FC settings */
1012 /* Set up VLAN support and filter */
1015 /* Setup DMA Coalescing */
1019 * Only enable interrupts if we are not polling, make sure
1020 * they are off otherwise.
1023 ix_disable_intr(sc);
1027 ifp->if_flags |= IFF_RUNNING;
1028 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1029 ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1030 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1033 ix_set_timer_cpuid(sc, polling);
1034 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1040 struct ix_softc *sc = xsc;
1041 struct ixgbe_hw *hw = &sc->hw;
1044 ASSERT_SERIALIZED(&sc->main_serialize);
1046 eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1048 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1052 if (eicr & IX_RX0_INTR_MASK) {
1053 struct ix_rx_ring *rxr = &sc->rx_rings[0];
1055 lwkt_serialize_enter(&rxr->rx_serialize);
1057 lwkt_serialize_exit(&rxr->rx_serialize);
1059 if (eicr & IX_RX1_INTR_MASK) {
1060 struct ix_rx_ring *rxr;
1062 KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1063 rxr = &sc->rx_rings[1];
1065 lwkt_serialize_enter(&rxr->rx_serialize);
1067 lwkt_serialize_exit(&rxr->rx_serialize);
1070 if (eicr & IX_TX_INTR_MASK) {
1071 struct ix_tx_ring *txr = &sc->tx_rings[0];
1073 lwkt_serialize_enter(&txr->tx_serialize);
1074 ix_txeof(txr, *(txr->tx_hdr));
1075 if (!ifsq_is_empty(txr->tx_ifsq))
1076 ifsq_devstart(txr->tx_ifsq);
1077 lwkt_serialize_exit(&txr->tx_serialize);
1080 if (__predict_false(eicr & IX_EICR_STATUS))
1081 ix_intr_status(sc, eicr);
1083 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1087 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1089 struct ix_softc *sc = ifp->if_softc;
1090 struct ifmedia *ifm = &sc->media;
1093 ix_update_link_status(sc);
1095 ifmr->ifm_status = IFM_AVALID;
1096 ifmr->ifm_active = IFM_ETHER;
1098 if (!sc->link_active) {
1099 if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1100 ifmr->ifm_active |= ifm->ifm_media;
1102 ifmr->ifm_active |= IFM_NONE;
1105 ifmr->ifm_status |= IFM_ACTIVE;
1107 layer = ixgbe_get_supported_physical_layer(&sc->hw);
1109 if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1110 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1111 (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)) {
1112 switch (sc->link_speed) {
1113 case IXGBE_LINK_SPEED_10GB_FULL:
1114 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1116 case IXGBE_LINK_SPEED_1GB_FULL:
1117 ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1119 case IXGBE_LINK_SPEED_100_FULL:
1120 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1123 } else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1124 (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1125 switch (sc->link_speed) {
1126 case IXGBE_LINK_SPEED_10GB_FULL:
1127 ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1130 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1131 switch (sc->link_speed) {
1132 case IXGBE_LINK_SPEED_10GB_FULL:
1133 ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1135 case IXGBE_LINK_SPEED_1GB_FULL:
1136 ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1139 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1140 switch (sc->link_speed) {
1141 case IXGBE_LINK_SPEED_10GB_FULL:
1142 ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1144 case IXGBE_LINK_SPEED_1GB_FULL:
1145 ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1148 } else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1149 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1150 switch (sc->link_speed) {
1151 case IXGBE_LINK_SPEED_10GB_FULL:
1152 ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1154 case IXGBE_LINK_SPEED_1GB_FULL:
1155 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1158 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1159 switch (sc->link_speed) {
1160 case IXGBE_LINK_SPEED_10GB_FULL:
1161 ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1164 } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1166 * XXX: These need to use the proper media types once
1169 switch (sc->link_speed) {
1170 case IXGBE_LINK_SPEED_10GB_FULL:
1171 ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1173 case IXGBE_LINK_SPEED_2_5GB_FULL:
1174 ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1176 case IXGBE_LINK_SPEED_1GB_FULL:
1177 ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1180 } else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1181 (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1183 * XXX: These need to use the proper media types once
1186 switch (sc->link_speed) {
1187 case IXGBE_LINK_SPEED_10GB_FULL:
1188 ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1190 case IXGBE_LINK_SPEED_2_5GB_FULL:
1191 ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1193 case IXGBE_LINK_SPEED_1GB_FULL:
1194 ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1199 /* If nothing is recognized... */
1200 if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1201 ifmr->ifm_active |= IFM_NONE;
1203 if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1204 ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1206 switch (sc->hw.fc.current_mode) {
1208 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1210 case ixgbe_fc_rx_pause:
1211 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1213 case ixgbe_fc_tx_pause:
1214 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1222 ix_media_change(struct ifnet *ifp)
1224 struct ix_softc *sc = ifp->if_softc;
1225 struct ifmedia *ifm = &sc->media;
1226 struct ixgbe_hw *hw = &sc->hw;
1228 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1231 if (hw->phy.media_type == ixgbe_media_type_backplane ||
1232 hw->mac.ops.setup_link == NULL) {
1233 if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1234 /* Only flow control setting changes are allowed */
1235 return (EOPNOTSUPP);
1239 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1241 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1246 case IFM_10G_SR: /* XXX also KR */
1248 case IFM_10G_CX4: /* XXX also KX4 */
1249 case IFM_10G_TWINAX:
1250 sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1256 case IFM_1000_CX: /* XXX is KX */
1257 sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1261 sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1266 if_printf(ifp, "Invalid media type %d!\n",
1271 sc->ifm_media = ifm->ifm_media;
1274 if (hw->mac.ops.setup_link != NULL) {
1275 hw->mac.autotry_restart = TRUE;
1276 hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1279 if (ifp->if_flags & IFF_RUNNING)
1286 ix_tso_pullup(struct mbuf **mp)
1288 int hoff, iphlen, thoff;
1292 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1294 iphlen = m->m_pkthdr.csum_iphlen;
1295 thoff = m->m_pkthdr.csum_thlen;
1296 hoff = m->m_pkthdr.csum_lhlen;
1298 KASSERT(iphlen > 0, ("invalid ip hlen"));
1299 KASSERT(thoff > 0, ("invalid tcp hlen"));
1300 KASSERT(hoff > 0, ("invalid ether hlen"));
1302 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1303 m = m_pullup(m, hoff + iphlen + thoff);
1314 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1315 uint16_t *segs_used, int *idx)
1317 uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1318 int i, j, error, nsegs, first, maxsegs;
1319 struct mbuf *m_head = *m_headp;
1320 bus_dma_segment_t segs[IX_MAX_SCATTER];
1322 struct ix_tx_buf *txbuf;
1323 union ixgbe_adv_tx_desc *txd = NULL;
1325 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1326 error = ix_tso_pullup(m_headp);
1327 if (__predict_false(error))
1332 /* Basic descriptor defines */
1333 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1334 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1336 if (m_head->m_flags & M_VLANTAG)
1337 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1340 * Important to capture the first descriptor
1341 * used because it will contain the index of
1342 * the one we tell the hardware to report back
1344 first = txr->tx_next_avail;
1345 txbuf = &txr->tx_buf[first];
1349 * Map the packet for DMA.
1351 maxsegs = txr->tx_avail - IX_TX_RESERVED;
1352 if (maxsegs > IX_MAX_SCATTER)
1353 maxsegs = IX_MAX_SCATTER;
1355 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1356 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1357 if (__predict_false(error)) {
1362 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1367 * Set up the appropriate offload context if requested,
1368 * this may consume one TX descriptor.
1370 if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1375 *segs_used += nsegs;
1376 txr->tx_nsegs += nsegs;
1377 if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1379 * Report Status (RS) is turned on every intr_nsegs
1380 * descriptors (roughly).
1383 cmd_rs = IXGBE_TXD_CMD_RS;
1386 i = txr->tx_next_avail;
1387 for (j = 0; j < nsegs; j++) {
1391 txbuf = &txr->tx_buf[i];
1392 txd = &txr->tx_base[i];
1393 seglen = segs[j].ds_len;
1394 segaddr = htole64(segs[j].ds_addr);
1396 txd->read.buffer_addr = segaddr;
1397 txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1398 cmd_type_len |seglen);
1399 txd->read.olinfo_status = htole32(olinfo_status);
1401 if (++i == txr->tx_ndesc)
1404 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1406 txr->tx_avail -= nsegs;
1407 txr->tx_next_avail = i;
1409 txbuf->m_head = m_head;
1410 txr->tx_buf[first].map = txbuf->map;
1414 * Defer TDT updating, until enough descrptors are setup
1422 ix_set_promisc(struct ix_softc *sc)
1424 struct ifnet *ifp = &sc->arpcom.ac_if;
1428 reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1429 reg_rctl &= ~IXGBE_FCTRL_UPE;
1430 if (ifp->if_flags & IFF_ALLMULTI) {
1431 mcnt = IX_MAX_MCASTADDR;
1433 struct ifmultiaddr *ifma;
1435 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1436 if (ifma->ifma_addr->sa_family != AF_LINK)
1438 if (mcnt == IX_MAX_MCASTADDR)
1443 if (mcnt < IX_MAX_MCASTADDR)
1444 reg_rctl &= ~IXGBE_FCTRL_MPE;
1445 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1447 if (ifp->if_flags & IFF_PROMISC) {
1448 reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1449 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1450 } else if (ifp->if_flags & IFF_ALLMULTI) {
1451 reg_rctl |= IXGBE_FCTRL_MPE;
1452 reg_rctl &= ~IXGBE_FCTRL_UPE;
1453 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1458 ix_set_multi(struct ix_softc *sc)
1460 struct ifnet *ifp = &sc->arpcom.ac_if;
1461 struct ifmultiaddr *ifma;
1467 bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1469 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1470 if (ifma->ifma_addr->sa_family != AF_LINK)
1472 if (mcnt == IX_MAX_MCASTADDR)
1474 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1475 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1476 IXGBE_ETH_LENGTH_OF_ADDRESS);
1480 fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1481 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1482 if (ifp->if_flags & IFF_PROMISC) {
1483 fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1484 } else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1485 fctrl |= IXGBE_FCTRL_MPE;
1486 fctrl &= ~IXGBE_FCTRL_UPE;
1488 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1490 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1492 if (mcnt < IX_MAX_MCASTADDR) {
1493 ixgbe_update_mc_addr_list(&sc->hw,
1494 mta, mcnt, ix_mc_array_itr, TRUE);
1499 * This is an iterator function now needed by the multicast
1500 * shared code. It simply feeds the shared code routine the
1501 * addresses in the array of ix_set_multi() one by one.
1504 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1506 uint8_t *addr = *update_ptr;
1510 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1511 *update_ptr = newptr;
1518 struct ix_softc *sc = arg;
1520 lwkt_serialize_enter(&sc->main_serialize);
1522 if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1523 lwkt_serialize_exit(&sc->main_serialize);
1527 /* Check for pluggable optics */
1528 if (sc->sfp_probe) {
1529 if (!ix_sfp_probe(sc))
1530 goto done; /* Nothing to do */
1533 ix_update_link_status(sc);
1534 ix_update_stats(sc);
1537 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1538 lwkt_serialize_exit(&sc->main_serialize);
1542 ix_update_link_status(struct ix_softc *sc)
1544 struct ifnet *ifp = &sc->arpcom.ac_if;
1547 if (sc->link_active == FALSE) {
1549 if_printf(ifp, "Link is up %d Gbps %s\n",
1550 sc->link_speed == 128 ? 10 : 1,
1555 * Update any Flow Control changes
1557 ixgbe_fc_enable(&sc->hw);
1558 /* MUST after ixgbe_fc_enable() */
1559 if (sc->rx_ring_inuse > 1) {
1560 switch (sc->hw.fc.current_mode) {
1561 case ixgbe_fc_rx_pause:
1562 case ixgbe_fc_tx_pause:
1564 ix_disable_rx_drop(sc);
1568 ix_enable_rx_drop(sc);
1576 /* Update DMA coalescing config */
1579 sc->link_active = TRUE;
1581 ifp->if_link_state = LINK_STATE_UP;
1582 if_link_state_change(ifp);
1584 } else { /* Link down */
1585 if (sc->link_active == TRUE) {
1587 if_printf(ifp, "Link is Down\n");
1588 ifp->if_link_state = LINK_STATE_DOWN;
1589 if_link_state_change(ifp);
1591 sc->link_active = FALSE;
1597 ix_stop(struct ix_softc *sc)
1599 struct ixgbe_hw *hw = &sc->hw;
1600 struct ifnet *ifp = &sc->arpcom.ac_if;
1603 ASSERT_IFNET_SERIALIZED_ALL(ifp);
1605 ix_disable_intr(sc);
1606 callout_stop(&sc->timer);
1608 ifp->if_flags &= ~IFF_RUNNING;
1609 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1610 struct ix_tx_ring *txr = &sc->tx_rings[i];
1612 ifsq_clr_oactive(txr->tx_ifsq);
1613 ifsq_watchdog_stop(&txr->tx_watchdog);
1614 txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1618 hw->adapter_stopped = FALSE;
1619 ixgbe_stop_adapter(hw);
1620 if (hw->mac.type == ixgbe_mac_82599EB)
1621 ixgbe_stop_mac_link_on_d3_82599(hw);
1622 /* Turn off the laser - noop with no optics */
1623 ixgbe_disable_tx_laser(hw);
1625 /* Update the stack */
1626 sc->link_up = FALSE;
1627 ix_update_link_status(sc);
1629 /* Reprogram the RAR[0] in case user changed it. */
1630 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1632 for (i = 0; i < sc->tx_ring_cnt; ++i)
1633 ix_free_tx_ring(&sc->tx_rings[i]);
1635 for (i = 0; i < sc->rx_ring_cnt; ++i)
1636 ix_free_rx_ring(&sc->rx_rings[i]);
1640 ix_setup_ifp(struct ix_softc *sc)
1642 struct ixgbe_hw *hw = &sc->hw;
1643 struct ifnet *ifp = &sc->arpcom.ac_if;
1646 ifp->if_baudrate = IF_Gbps(10UL);
1649 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1650 ifp->if_init = ix_init;
1651 ifp->if_ioctl = ix_ioctl;
1652 ifp->if_start = ix_start;
1653 ifp->if_serialize = ix_serialize;
1654 ifp->if_deserialize = ix_deserialize;
1655 ifp->if_tryserialize = ix_tryserialize;
1657 ifp->if_serialize_assert = ix_serialize_assert;
1659 #ifdef IFPOLL_ENABLE
1660 ifp->if_npoll = ix_npoll;
1663 /* Increase TSO burst length */
1664 ifp->if_tsolen = (8 * ETHERMTU);
1666 ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1667 ifp->if_nmbjclusters = ifp->if_nmbclusters;
1669 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1670 ifq_set_ready(&ifp->if_snd);
1671 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1673 ifp->if_mapsubq = ifq_mapsubq_mask;
1674 ifq_set_subq_mask(&ifp->if_snd, 0);
1676 ether_ifattach(ifp, hw->mac.addr, NULL);
1678 ifp->if_capabilities =
1679 IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1680 if (IX_ENABLE_HWRSS(sc))
1681 ifp->if_capabilities |= IFCAP_RSS;
1682 ifp->if_capenable = ifp->if_capabilities;
1683 ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1686 * Tell the upper layer(s) we support long frames.
1688 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1690 /* Setup TX rings and subqueues */
1691 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1692 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1693 struct ix_tx_ring *txr = &sc->tx_rings[i];
1695 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1696 ifsq_set_priv(ifsq, txr);
1697 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1698 txr->tx_ifsq = ifsq;
1700 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1703 /* Specify the media types supported by this adapter */
1708 ix_is_sfp(const struct ixgbe_hw *hw)
1710 switch (hw->phy.type) {
1711 case ixgbe_phy_sfp_avago:
1712 case ixgbe_phy_sfp_ftl:
1713 case ixgbe_phy_sfp_intel:
1714 case ixgbe_phy_sfp_unknown:
1715 case ixgbe_phy_sfp_passive_tyco:
1716 case ixgbe_phy_sfp_passive_unknown:
1717 case ixgbe_phy_qsfp_passive_unknown:
1718 case ixgbe_phy_qsfp_active_unknown:
1719 case ixgbe_phy_qsfp_intel:
1720 case ixgbe_phy_qsfp_unknown:
1728 ix_config_link(struct ix_softc *sc)
1730 struct ixgbe_hw *hw = &sc->hw;
1733 sfp = ix_is_sfp(hw);
1735 if (hw->phy.multispeed_fiber) {
1736 hw->mac.ops.setup_sfp(hw);
1737 ixgbe_enable_tx_laser(hw);
1743 uint32_t autoneg, err = 0;
1745 if (hw->mac.ops.check_link != NULL) {
1746 err = ixgbe_check_link(hw, &sc->link_speed,
1747 &sc->link_up, FALSE);
1752 if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1753 autoneg = sc->advspeed;
1755 autoneg = hw->phy.autoneg_advertised;
1756 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1759 err = hw->mac.ops.get_link_capabilities(hw,
1760 &autoneg, &negotiate);
1765 if (hw->mac.ops.setup_link != NULL) {
1766 err = hw->mac.ops.setup_link(hw,
1767 autoneg, sc->link_up);
1775 ix_alloc_rings(struct ix_softc *sc)
1780 * Create top level busdma tag
1782 error = bus_dma_tag_create(NULL, 1, 0,
1783 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1784 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1787 device_printf(sc->dev, "could not create top level DMA tag\n");
1792 * Allocate TX descriptor rings and buffers
1794 sc->tx_rings = kmalloc_cachealign(
1795 sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1796 M_DEVBUF, M_WAITOK | M_ZERO);
1797 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1798 struct ix_tx_ring *txr = &sc->tx_rings[i];
1802 txr->tx_intr_vec = -1;
1803 lwkt_serialize_init(&txr->tx_serialize);
1805 error = ix_create_tx_ring(txr);
1811 * Allocate RX descriptor rings and buffers
1813 sc->rx_rings = kmalloc_cachealign(
1814 sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1815 M_DEVBUF, M_WAITOK | M_ZERO);
1816 for (i = 0; i < sc->rx_ring_cnt; ++i) {
1817 struct ix_rx_ring *rxr = &sc->rx_rings[i];
1821 rxr->rx_intr_vec = -1;
1822 lwkt_serialize_init(&rxr->rx_serialize);
1824 error = ix_create_rx_ring(rxr);
1833 ix_create_tx_ring(struct ix_tx_ring *txr)
1835 int error, i, tsize, ntxd;
1838 * Validate number of transmit descriptors. It must not exceed
1839 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1841 ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1842 if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1843 ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1844 device_printf(txr->tx_sc->dev,
1845 "Using %d TX descriptors instead of %d!\n",
1847 txr->tx_ndesc = IX_DEF_TXD;
1849 txr->tx_ndesc = ntxd;
1853 * Allocate TX head write-back buffer
1855 txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1856 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1857 &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1858 if (txr->tx_hdr == NULL) {
1859 device_printf(txr->tx_sc->dev,
1860 "Unable to allocate TX head write-back buffer\n");
1865 * Allocate TX descriptor ring
1867 tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1869 txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1870 IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1871 &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1872 if (txr->tx_base == NULL) {
1873 device_printf(txr->tx_sc->dev,
1874 "Unable to allocate TX Descriptor memory\n");
1878 tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1879 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1882 * Create DMA tag for TX buffers
1884 error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1885 1, 0, /* alignment, bounds */
1886 BUS_SPACE_MAXADDR, /* lowaddr */
1887 BUS_SPACE_MAXADDR, /* highaddr */
1888 NULL, NULL, /* filter, filterarg */
1889 IX_TSO_SIZE, /* maxsize */
1890 IX_MAX_SCATTER, /* nsegments */
1891 PAGE_SIZE, /* maxsegsize */
1892 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1893 BUS_DMA_ONEBPAGE, /* flags */
1896 device_printf(txr->tx_sc->dev,
1897 "Unable to allocate TX DMA tag\n");
1898 kfree(txr->tx_buf, M_DEVBUF);
1904 * Create DMA maps for TX buffers
1906 for (i = 0; i < txr->tx_ndesc; ++i) {
1907 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1909 error = bus_dmamap_create(txr->tx_tag,
1910 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1912 device_printf(txr->tx_sc->dev,
1913 "Unable to create TX DMA map\n");
1914 ix_destroy_tx_ring(txr, i);
1920 * Initialize various watermark
1922 txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1923 txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1929 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1933 if (txr->tx_hdr != NULL) {
1934 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1935 bus_dmamem_free(txr->tx_hdr_dtag,
1936 __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1937 bus_dma_tag_destroy(txr->tx_hdr_dtag);
1941 if (txr->tx_base != NULL) {
1942 bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1943 bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1945 bus_dma_tag_destroy(txr->tx_base_dtag);
1946 txr->tx_base = NULL;
1949 if (txr->tx_buf == NULL)
1952 for (i = 0; i < ndesc; ++i) {
1953 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1955 KKASSERT(txbuf->m_head == NULL);
1956 bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1958 bus_dma_tag_destroy(txr->tx_tag);
1960 kfree(txr->tx_buf, M_DEVBUF);
1965 ix_init_tx_ring(struct ix_tx_ring *txr)
1967 /* Clear the old ring contents */
1968 bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1970 /* Clear TX head write-back buffer */
1974 txr->tx_next_avail = 0;
1975 txr->tx_next_clean = 0;
1978 /* Set number of descriptors available */
1979 txr->tx_avail = txr->tx_ndesc;
1981 /* Enable this TX ring */
1982 txr->tx_flags |= IX_TXFLAG_ENABLED;
1986 ix_init_tx_unit(struct ix_softc *sc)
1988 struct ixgbe_hw *hw = &sc->hw;
1992 * Setup the Base and Length of the Tx Descriptor Ring
1994 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1995 struct ix_tx_ring *txr = &sc->tx_rings[i];
1996 uint64_t tdba = txr->tx_base_paddr;
1997 uint64_t hdr_paddr = txr->tx_hdr_paddr;
2000 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2001 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2002 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2003 txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2005 /* Setup the HW Tx Head and Tail descriptor pointers */
2006 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2007 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2009 /* Disable TX head write-back relax ordering */
2010 switch (hw->mac.type) {
2011 case ixgbe_mac_82598EB:
2012 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2014 case ixgbe_mac_82599EB:
2015 case ixgbe_mac_X540:
2017 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2020 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2021 switch (hw->mac.type) {
2022 case ixgbe_mac_82598EB:
2023 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2025 case ixgbe_mac_82599EB:
2026 case ixgbe_mac_X540:
2028 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2032 /* Enable TX head write-back */
2033 IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2034 (uint32_t)(hdr_paddr >> 32));
2035 IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2036 ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2039 if (hw->mac.type != ixgbe_mac_82598EB) {
2040 uint32_t dmatxctl, rttdcs;
2042 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2043 dmatxctl |= IXGBE_DMATXCTL_TE;
2044 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2046 /* Disable arbiter to set MTQC */
2047 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2048 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2049 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2051 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2053 /* Reenable aribter */
2054 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2055 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2060 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2061 uint32_t *cmd_type_len, uint32_t *olinfo_status)
2063 struct ixgbe_adv_tx_context_desc *TXD;
2064 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2065 int ehdrlen, ip_hlen = 0, ctxd;
2066 boolean_t offload = TRUE;
2068 /* First check if TSO is to be used */
2069 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2070 return ix_tso_ctx_setup(txr, mp,
2071 cmd_type_len, olinfo_status);
2074 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2077 /* Indicate the whole packet as payload when not doing TSO */
2078 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2081 * In advanced descriptors the vlan tag must be placed into the
2082 * context descriptor. Hence we need to make one even if not
2083 * doing checksum offloads.
2085 if (mp->m_flags & M_VLANTAG) {
2086 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2087 IXGBE_ADVTXD_VLAN_SHIFT;
2088 } else if (!offload) {
2089 /* No TX descriptor is consumed */
2093 /* Set the ether header length */
2094 ehdrlen = mp->m_pkthdr.csum_lhlen;
2095 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2096 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2098 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2099 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2100 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2101 ip_hlen = mp->m_pkthdr.csum_iphlen;
2102 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2104 vlan_macip_lens |= ip_hlen;
2106 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2107 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2108 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2109 else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2110 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2112 if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2113 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2115 /* Now ready a context descriptor */
2116 ctxd = txr->tx_next_avail;
2117 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2119 /* Now copy bits into descriptor */
2120 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2121 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2122 TXD->seqnum_seed = htole32(0);
2123 TXD->mss_l4len_idx = htole32(0);
2125 /* We've consumed the first desc, adjust counters */
2126 if (++ctxd == txr->tx_ndesc)
2128 txr->tx_next_avail = ctxd;
2131 /* One TX descriptor is consumed */
2136 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2137 uint32_t *cmd_type_len, uint32_t *olinfo_status)
2139 struct ixgbe_adv_tx_context_desc *TXD;
2140 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2141 uint32_t mss_l4len_idx = 0, paylen;
2142 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2144 ehdrlen = mp->m_pkthdr.csum_lhlen;
2145 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2147 ip_hlen = mp->m_pkthdr.csum_iphlen;
2148 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2150 tcp_hlen = mp->m_pkthdr.csum_thlen;
2151 KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2153 ctxd = txr->tx_next_avail;
2154 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2156 if (mp->m_flags & M_VLANTAG) {
2157 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2158 IXGBE_ADVTXD_VLAN_SHIFT;
2160 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2161 vlan_macip_lens |= ip_hlen;
2162 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2164 /* ADV DTYPE TUCMD */
2165 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2166 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2167 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2168 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2171 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2172 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2173 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2175 TXD->seqnum_seed = htole32(0);
2177 if (++ctxd == txr->tx_ndesc)
2181 txr->tx_next_avail = ctxd;
2183 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2185 /* This is used in the transmit desc in encap */
2186 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2188 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2189 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2190 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2192 /* One TX descriptor is consumed */
2197 ix_txeof(struct ix_tx_ring *txr, int hdr)
2201 if (txr->tx_avail == txr->tx_ndesc)
2204 first = txr->tx_next_clean;
2208 avail = txr->tx_avail;
2209 while (first != hdr) {
2210 struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2213 if (txbuf->m_head) {
2214 bus_dmamap_unload(txr->tx_tag, txbuf->map);
2215 m_freem(txbuf->m_head);
2216 txbuf->m_head = NULL;
2218 if (++first == txr->tx_ndesc)
2221 txr->tx_next_clean = first;
2222 txr->tx_avail = avail;
2224 if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2225 ifsq_clr_oactive(txr->tx_ifsq);
2226 txr->tx_watchdog.wd_timer = 0;
2231 ix_create_rx_ring(struct ix_rx_ring *rxr)
2233 int i, rsize, error, nrxd;
2236 * Validate number of receive descriptors. It must not exceed
2237 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2239 nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2240 if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2241 nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2242 device_printf(rxr->rx_sc->dev,
2243 "Using %d RX descriptors instead of %d!\n",
2245 rxr->rx_ndesc = IX_DEF_RXD;
2247 rxr->rx_ndesc = nrxd;
2251 * Allocate RX descriptor ring
2253 rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2255 rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2256 IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2257 &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2258 if (rxr->rx_base == NULL) {
2259 device_printf(rxr->rx_sc->dev,
2260 "Unable to allocate TX Descriptor memory\n");
2264 rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2265 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2268 * Create DMA tag for RX buffers
2270 error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2271 1, 0, /* alignment, bounds */
2272 BUS_SPACE_MAXADDR, /* lowaddr */
2273 BUS_SPACE_MAXADDR, /* highaddr */
2274 NULL, NULL, /* filter, filterarg */
2275 PAGE_SIZE, /* maxsize */
2277 PAGE_SIZE, /* maxsegsize */
2278 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2281 device_printf(rxr->rx_sc->dev,
2282 "Unable to create RX DMA tag\n");
2283 kfree(rxr->rx_buf, M_DEVBUF);
2289 * Create spare DMA map for RX buffers
2291 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2294 device_printf(rxr->rx_sc->dev,
2295 "Unable to create spare RX DMA map\n");
2296 bus_dma_tag_destroy(rxr->rx_tag);
2297 kfree(rxr->rx_buf, M_DEVBUF);
2303 * Create DMA maps for RX buffers
2305 for (i = 0; i < rxr->rx_ndesc; ++i) {
2306 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2308 error = bus_dmamap_create(rxr->rx_tag,
2309 BUS_DMA_WAITOK, &rxbuf->map);
2311 device_printf(rxr->rx_sc->dev,
2312 "Unable to create RX dma map\n");
2313 ix_destroy_rx_ring(rxr, i);
2319 * Initialize various watermark
2321 rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2327 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2331 if (rxr->rx_base != NULL) {
2332 bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2333 bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2335 bus_dma_tag_destroy(rxr->rx_base_dtag);
2336 rxr->rx_base = NULL;
2339 if (rxr->rx_buf == NULL)
2342 for (i = 0; i < ndesc; ++i) {
2343 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2345 KKASSERT(rxbuf->m_head == NULL);
2346 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2348 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2349 bus_dma_tag_destroy(rxr->rx_tag);
2351 kfree(rxr->rx_buf, M_DEVBUF);
2356 ** Used to detect a descriptor that has
2357 ** been merged by Hardware RSC.
2359 static __inline uint32_t
2360 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2362 return (le32toh(rx->wb.lower.lo_dword.data) &
2363 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2367 /*********************************************************************
2369 * Initialize Hardware RSC (LRO) feature on 82599
2370 * for an RX ring, this is toggled by the LRO capability
2371 * even though it is transparent to the stack.
2373 * NOTE: since this HW feature only works with IPV4 and
2374 * our testing has shown soft LRO to be as effective
2375 * I have decided to disable this by default.
2377 **********************************************************************/
2379 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2381 struct ix_softc *sc = rxr->rx_sc;
2382 struct ixgbe_hw *hw = &sc->hw;
2383 uint32_t rscctrl, rdrxctl;
2386 /* If turning LRO/RSC off we need to disable it */
2387 if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2388 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2389 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2394 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2395 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2396 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2397 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2398 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2400 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2401 rscctrl |= IXGBE_RSCCTL_RSCEN;
2403 ** Limit the total number of descriptors that
2404 ** can be combined, so it does not exceed 64K
2406 if (rxr->mbuf_sz == MCLBYTES)
2407 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2408 else if (rxr->mbuf_sz == MJUMPAGESIZE)
2409 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2410 else if (rxr->mbuf_sz == MJUM9BYTES)
2411 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2412 else /* Using 16K cluster */
2413 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2415 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2417 /* Enable TCP header recognition */
2418 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2419 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2420 IXGBE_PSRTYPE_TCPHDR));
2422 /* Disable RSC for ACK packets */
2423 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2424 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2431 ix_init_rx_ring(struct ix_rx_ring *rxr)
2435 /* Clear the ring contents */
2436 bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2438 /* XXX we need JUMPAGESIZE for RSC too */
2439 if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2440 rxr->rx_mbuf_sz = MCLBYTES;
2442 rxr->rx_mbuf_sz = MJUMPAGESIZE;
2444 /* Now replenish the mbufs */
2445 for (i = 0; i < rxr->rx_ndesc; ++i) {
2448 error = ix_newbuf(rxr, i, TRUE);
2453 /* Setup our descriptor indices */
2454 rxr->rx_next_check = 0;
2455 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2459 ** Now set up the LRO interface:
2461 if (ixgbe_rsc_enable)
2462 ix_setup_hw_rsc(rxr);
2468 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2470 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2473 ix_init_rx_unit(struct ix_softc *sc)
2475 struct ixgbe_hw *hw = &sc->hw;
2476 struct ifnet *ifp = &sc->arpcom.ac_if;
2477 uint32_t bufsz, fctrl, rxcsum, hlreg;
2481 * Make sure receives are disabled while setting up the descriptor ring
2483 ixgbe_disable_rx(hw);
2485 /* Enable broadcasts */
2486 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2487 fctrl |= IXGBE_FCTRL_BAM;
2488 if (hw->mac.type == ixgbe_mac_82598EB) {
2489 fctrl |= IXGBE_FCTRL_DPF;
2490 fctrl |= IXGBE_FCTRL_PMCF;
2492 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2494 /* Set for Jumbo Frames? */
2495 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2496 if (ifp->if_mtu > ETHERMTU)
2497 hlreg |= IXGBE_HLREG0_JUMBOEN;
2499 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2500 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2502 KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2503 bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2504 IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2506 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2507 struct ix_rx_ring *rxr = &sc->rx_rings[i];
2508 uint64_t rdba = rxr->rx_base_paddr;
2511 /* Setup the Base and Length of the Rx Descriptor Ring */
2512 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2513 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2514 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2515 rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2518 * Set up the SRRCTL register
2520 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2522 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2523 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2525 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2526 if (sc->rx_ring_inuse > 1) {
2527 /* See the commend near ix_enable_rx_drop() */
2529 (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2530 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2531 if (i == 0 && bootverbose) {
2532 if_printf(ifp, "flow control %s, "
2533 "disable RX drop\n",
2534 ix_ifmedia2str(sc->ifm_media));
2537 srrctl |= IXGBE_SRRCTL_DROP_EN;
2538 if (i == 0 && bootverbose) {
2539 if_printf(ifp, "flow control %s, "
2541 ix_ifmedia2str(sc->ifm_media));
2545 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2547 /* Setup the HW Rx Head and Tail Descriptor Pointers */
2548 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2549 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2552 if (sc->hw.mac.type != ixgbe_mac_82598EB)
2553 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2555 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2560 if (IX_ENABLE_HWRSS(sc)) {
2561 uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2566 * When we reach here, RSS has already been disabled
2567 * in ix_stop(), so we could safely configure RSS key
2568 * and redirect table.
2574 toeplitz_get_key(key, sizeof(key));
2575 for (i = 0; i < IX_NRSSRK; ++i) {
2578 rssrk = IX_RSSRK_VAL(key, i);
2579 IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2582 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2585 /* Table size will differ based on MAC */
2586 switch (hw->mac.type) {
2587 case ixgbe_mac_X550:
2588 case ixgbe_mac_X550EM_x:
2589 case ixgbe_mac_X550EM_a:
2590 nreta = IX_NRETA_X550;
2598 * Configure RSS redirect table in following fashion:
2599 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2602 for (j = 0; j < nreta; ++j) {
2605 for (i = 0; i < IX_RETA_SIZE; ++i) {
2608 q = r % sc->rx_ring_inuse;
2609 reta |= q << (8 * i);
2612 IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2614 IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2616 IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2622 * Enable multiple receive queues.
2623 * Enable IPv4 RSS standard hash functions.
2625 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2627 IXGBE_MRQC_RSS_FIELD_IPV4 |
2628 IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2632 * PCSD must be enabled to enable multiple
2635 rxcsum |= IXGBE_RXCSUM_PCSD;
2638 if (ifp->if_capenable & IFCAP_RXCSUM)
2639 rxcsum |= IXGBE_RXCSUM_PCSD;
2641 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2644 static __inline void
2645 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2648 i = rxr->rx_ndesc - 1;
2649 IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2652 static __inline void
2653 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2656 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2661 if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2662 IXGBE_RXD_STAT_IPCS)
2663 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2666 (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2668 * - Neither TCP nor UDP
2674 if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2675 IXGBE_RXD_STAT_L4CS) {
2676 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2677 CSUM_FRAG_NOT_CHECKED;
2678 mp->m_pkthdr.csum_data = htons(0xffff);
2682 static __inline struct pktinfo *
2683 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2684 uint32_t hash, uint32_t hashtype, uint32_t ptype)
2687 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2688 pi->pi_netisr = NETISR_IP;
2690 pi->pi_l3proto = IPPROTO_TCP;
2693 case IXGBE_RXDADV_RSSTYPE_IPV4:
2694 if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2695 /* Not UDP or is fragment */
2698 pi->pi_netisr = NETISR_IP;
2700 pi->pi_l3proto = IPPROTO_UDP;
2707 m->m_flags |= M_HASH;
2708 m->m_pkthdr.hash = toeplitz_hash(hash);
2712 static __inline void
2713 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2715 rxd->read.pkt_addr = htole64(rxbuf->paddr);
2716 rxd->wb.upper.status_error = 0;
2720 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2722 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2725 * XXX discard may not be correct
2728 IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2729 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2731 rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2733 if (rxbuf->fmp != NULL) {
2734 m_freem(rxbuf->fmp);
2738 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2742 ix_rxeof(struct ix_rx_ring *rxr, int count)
2744 struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2745 int i, nsegs = 0, cpuid = mycpuid;
2747 i = rxr->rx_next_check;
2748 while (count != 0) {
2749 struct ix_rx_buf *rxbuf, *nbuf = NULL;
2750 union ixgbe_adv_rx_desc *cur;
2751 struct mbuf *sendmp = NULL, *mp;
2752 struct pktinfo *pi = NULL, pi0;
2753 uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2757 cur = &rxr->rx_base[i];
2758 staterr = le32toh(cur->wb.upper.status_error);
2760 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2764 rxbuf = &rxr->rx_buf[i];
2767 len = le16toh(cur->wb.upper.length);
2768 ptype = le32toh(cur->wb.lower.lo_dword.data) &
2769 IXGBE_RXDADV_PKTTYPE_MASK;
2770 hash = le32toh(cur->wb.lower.hi_dword.rss);
2771 hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2772 IXGBE_RXDADV_RSSTYPE_MASK;
2774 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2779 * Make sure bad packets are discarded
2781 if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2782 (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2783 ix_rx_discard(rxr, i, eop);
2787 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2788 if (ix_newbuf(rxr, i, FALSE) != 0) {
2789 ix_rx_discard(rxr, i, eop);
2794 * On 82599 which supports a hardware LRO, packets
2795 * need not be fragmented across sequential descriptors,
2796 * rather the next descriptor is indicated in bits
2797 * of the descriptor. This also means that we might
2798 * proceses more than one packet at a time, something
2799 * that has never been true before, it required
2800 * eliminating global chain pointers in favor of what
2801 * we are doing here.
2807 * Figure out the next descriptor
2810 if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2811 rsc = ix_rsc_count(cur);
2812 if (rsc) { /* Get hardware index */
2814 IXGBE_RXDADV_NEXTP_MASK) >>
2815 IXGBE_RXDADV_NEXTP_SHIFT);
2816 } else { /* Just sequential */
2818 if (nextp == rxr->rx_ndesc)
2821 nbuf = &rxr->rx_buf[nextp];
2827 * Rather than using the fmp/lmp global pointers
2828 * we now keep the head of a packet chain in the
2829 * buffer struct and pass this along from one
2830 * descriptor to the next, until we get EOP.
2832 if (rxbuf->fmp == NULL) {
2833 mp->m_pkthdr.len = len;
2837 rxbuf->fmp->m_pkthdr.len += len;
2838 rxbuf->lmp->m_next = mp;
2844 * Not the last fragment of this frame,
2845 * pass this fragment list on
2847 nbuf->fmp = rxbuf->fmp;
2848 nbuf->lmp = rxbuf->lmp;
2853 sendmp = rxbuf->fmp;
2855 sendmp->m_pkthdr.rcvif = ifp;
2856 IFNET_STAT_INC(ifp, ipackets, 1);
2861 /* Process vlan info */
2862 if (staterr & IXGBE_RXD_STAT_VP) {
2863 sendmp->m_pkthdr.ether_vlantag =
2864 le16toh(cur->wb.upper.vlan);
2865 sendmp->m_flags |= M_VLANTAG;
2867 if (ifp->if_capenable & IFCAP_RXCSUM)
2868 ix_rxcsum(staterr, sendmp, ptype);
2869 if (ifp->if_capenable & IFCAP_RSS) {
2870 pi = ix_rssinfo(sendmp, &pi0,
2871 hash, hashtype, ptype);
2877 /* Advance our pointers to the next descriptor. */
2878 if (++i == rxr->rx_ndesc)
2882 ifp->if_input(ifp, sendmp, pi, cpuid);
2884 if (nsegs >= rxr->rx_wreg_nsegs) {
2885 ix_rx_refresh(rxr, i);
2889 rxr->rx_next_check = i;
2892 ix_rx_refresh(rxr, i);
2896 ix_set_vlan(struct ix_softc *sc)
2898 struct ixgbe_hw *hw = &sc->hw;
2901 if (hw->mac.type == ixgbe_mac_82598EB) {
2902 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2903 ctrl |= IXGBE_VLNCTRL_VME;
2904 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2909 * On 82599 and later chips the VLAN enable is
2910 * per queue in RXDCTL
2912 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2913 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2914 ctrl |= IXGBE_RXDCTL_VME;
2915 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2921 ix_enable_intr(struct ix_softc *sc)
2923 struct ixgbe_hw *hw = &sc->hw;
2927 for (i = 0; i < sc->intr_cnt; ++i)
2928 lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2930 sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2932 /* Enable Fan Failure detection */
2933 if (hw->device_id == IXGBE_DEV_ID_82598AT)
2934 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2936 switch (hw->mac.type) {
2937 case ixgbe_mac_82599EB:
2938 sc->intr_mask |= IXGBE_EIMS_ECC;
2939 /* Temperature sensor on some adapters */
2940 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2941 /* SFP+ (RX_LOS_N & MOD_ABS_N) */
2942 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2943 sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
2946 case ixgbe_mac_X540:
2947 sc->intr_mask |= IXGBE_EIMS_ECC;
2948 /* Detect if Thermal Sensor is enabled */
2949 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2950 if (fwsm & IXGBE_FWSM_TS_ENABLED)
2951 sc->intr_mask |= IXGBE_EIMS_TS;
2954 case ixgbe_mac_X550:
2955 case ixgbe_mac_X550EM_a:
2956 case ixgbe_mac_X550EM_x:
2957 sc->intr_mask |= IXGBE_EIMS_ECC;
2958 /* MAC thermal sensor is automatically enabled */
2959 sc->intr_mask |= IXGBE_EIMS_TS;
2960 /* Some devices use SDP0 for important information */
2961 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
2962 hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
2963 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
2969 /* With MSI-X we use auto clear for RX and TX rings */
2970 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2972 * There are no EIAC1/EIAC2 for newer chips; the related
2973 * bits for TX and RX rings > 16 are always auto clear.
2975 * XXX which bits? There are _no_ documented EICR1 and
2976 * EICR2 at all; only EICR.
2978 IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
2980 sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
2982 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2983 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2984 sc->intr_mask |= IX_RX1_INTR_MASK;
2987 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
2990 * Enable RX and TX rings for MSI-X
2992 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2993 for (i = 0; i < sc->tx_ring_inuse; ++i) {
2994 const struct ix_tx_ring *txr = &sc->tx_rings[i];
2996 if (txr->tx_intr_vec >= 0) {
2997 IXGBE_WRITE_REG(hw, txr->tx_eims,
3001 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3002 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3004 KKASSERT(rxr->rx_intr_vec >= 0);
3005 IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3009 IXGBE_WRITE_FLUSH(hw);
3013 ix_disable_intr(struct ix_softc *sc)
3017 if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3018 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3020 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3021 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3023 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3024 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3025 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3027 IXGBE_WRITE_FLUSH(&sc->hw);
3029 for (i = 0; i < sc->intr_cnt; ++i)
3030 lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3034 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
3036 return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
3041 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
3043 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
3048 ix_slot_info(struct ix_softc *sc)
3050 struct ixgbe_hw *hw = &sc->hw;
3051 device_t dev = sc->dev;
3052 struct ixgbe_mac_info *mac = &hw->mac;
3056 /* For most devices simply call the shared code routine */
3057 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
3058 ixgbe_get_bus_info(hw);
3059 /* These devices don't use PCI-E */
3060 if (hw->mac.type == ixgbe_mac_X550EM_x ||
3061 hw->mac.type == ixgbe_mac_X550EM_a)
3067 * For the Quad port adapter we need to parse back
3068 * up the PCI tree to find the speed of the expansion
3069 * slot into which this adapter is plugged. A bit more work.
3071 dev = device_get_parent(device_get_parent(dev));
3073 device_printf(dev, "parent pcib = %x,%x,%x\n",
3074 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3076 dev = device_get_parent(device_get_parent(dev));
3078 device_printf(dev, "slot pcib = %x,%x,%x\n",
3079 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
3081 /* Now get the PCI Express Capabilities offset */
3082 offset = pci_get_pciecap_ptr(dev);
3083 /* ...and read the Link Status Register */
3084 link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3085 switch (link & IXGBE_PCI_LINK_WIDTH) {
3086 case IXGBE_PCI_LINK_WIDTH_1:
3087 hw->bus.width = ixgbe_bus_width_pcie_x1;
3089 case IXGBE_PCI_LINK_WIDTH_2:
3090 hw->bus.width = ixgbe_bus_width_pcie_x2;
3092 case IXGBE_PCI_LINK_WIDTH_4:
3093 hw->bus.width = ixgbe_bus_width_pcie_x4;
3095 case IXGBE_PCI_LINK_WIDTH_8:
3096 hw->bus.width = ixgbe_bus_width_pcie_x8;
3099 hw->bus.width = ixgbe_bus_width_unknown;
3103 switch (link & IXGBE_PCI_LINK_SPEED) {
3104 case IXGBE_PCI_LINK_SPEED_2500:
3105 hw->bus.speed = ixgbe_bus_speed_2500;
3107 case IXGBE_PCI_LINK_SPEED_5000:
3108 hw->bus.speed = ixgbe_bus_speed_5000;
3110 case IXGBE_PCI_LINK_SPEED_8000:
3111 hw->bus.speed = ixgbe_bus_speed_8000;
3114 hw->bus.speed = ixgbe_bus_speed_unknown;
3118 mac->ops.set_lan_id(hw);
3121 device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3122 hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3123 hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3124 hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3125 hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3126 hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3127 hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3129 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3130 hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3131 hw->bus.speed == ixgbe_bus_speed_2500) {
3132 device_printf(dev, "For optimal performance a x8 "
3133 "PCIE, or x4 PCIE Gen2 slot is required.\n");
3134 } else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3135 hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3136 hw->bus.speed < ixgbe_bus_speed_8000) {
3137 device_printf(dev, "For optimal performance a x8 "
3138 "PCIE Gen3 slot is required.\n");
3143 * TODO comment is incorrect
3145 * Setup the correct IVAR register for a particular MSIX interrupt
3146 * - entry is the register array entry
3147 * - vector is the MSIX vector for this queue
3148 * - type is RX/TX/MISC
3151 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3154 struct ixgbe_hw *hw = &sc->hw;
3155 uint32_t ivar, index;
3157 vector |= IXGBE_IVAR_ALLOC_VAL;
3159 switch (hw->mac.type) {
3160 case ixgbe_mac_82598EB:
3162 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3164 entry += (type * 64);
3165 index = (entry >> 2) & 0x1F;
3166 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3167 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3168 ivar |= (vector << (8 * (entry & 0x3)));
3169 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3172 case ixgbe_mac_82599EB:
3173 case ixgbe_mac_X540:
3174 case ixgbe_mac_X550:
3175 case ixgbe_mac_X550EM_a:
3176 case ixgbe_mac_X550EM_x:
3177 if (type == -1) { /* MISC IVAR */
3178 index = (entry & 1) * 8;
3179 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3180 ivar &= ~(0xFF << index);
3181 ivar |= (vector << index);
3182 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3183 } else { /* RX/TX IVARS */
3184 index = (16 * (entry & 1)) + (8 * type);
3185 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3186 ivar &= ~(0xFF << index);
3187 ivar |= (vector << index);
3188 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3197 ix_sfp_probe(struct ix_softc *sc)
3199 struct ixgbe_hw *hw = &sc->hw;
3201 if (hw->phy.type == ixgbe_phy_nl &&
3202 hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3205 ret = hw->phy.ops.identify_sfp(hw);
3209 ret = hw->phy.ops.reset(hw);
3210 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3211 if_printf(&sc->arpcom.ac_if,
3212 "Unsupported SFP+ module detected! "
3213 "Reload driver with supported module.\n");
3214 sc->sfp_probe = FALSE;
3217 if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3219 /* We now have supported optics */
3220 sc->sfp_probe = FALSE;
3228 ix_handle_link(struct ix_softc *sc)
3230 ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3231 ix_update_link_status(sc);
3235 * Handling SFP module
3238 ix_handle_mod(struct ix_softc *sc)
3240 struct ixgbe_hw *hw = &sc->hw;
3243 err = hw->phy.ops.identify_sfp(hw);
3244 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3245 if_printf(&sc->arpcom.ac_if,
3246 "Unsupported SFP+ module type was detected.\n");
3249 err = hw->mac.ops.setup_sfp(hw);
3250 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3251 if_printf(&sc->arpcom.ac_if,
3252 "Setup failure - unsupported SFP+ module type.\n");
3259 * Handling MSF (multispeed fiber)
3262 ix_handle_msf(struct ix_softc *sc)
3264 struct ixgbe_hw *hw = &sc->hw;
3267 hw->phy.ops.identify_sfp(hw);
3270 if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3271 autoneg = sc->advspeed;
3273 autoneg = hw->phy.autoneg_advertised;
3274 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3277 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3279 if (hw->mac.ops.setup_link != NULL)
3280 hw->mac.ops.setup_link(hw, autoneg, TRUE);
3284 ix_handle_phy(struct ix_softc *sc)
3286 struct ixgbe_hw *hw = &sc->hw;
3289 error = hw->phy.ops.handle_lasi(hw);
3290 if (error == IXGBE_ERR_OVERTEMP) {
3291 if_printf(&sc->arpcom.ac_if,
3292 "CRITICAL: EXTERNAL PHY OVER TEMP!! "
3293 "PHY will downshift to lower power state!\n");
3295 if_printf(&sc->arpcom.ac_if,
3296 "Error handling LASI interrupt: %d\n", error);
3301 ix_update_stats(struct ix_softc *sc)
3303 struct ifnet *ifp = &sc->arpcom.ac_if;
3304 struct ixgbe_hw *hw = &sc->hw;
3305 uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3306 uint64_t total_missed_rx = 0;
3309 sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3310 sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3311 sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3312 sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3314 for (i = 0; i < 16; i++) {
3315 sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3316 sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3317 sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3319 sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3320 sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3321 sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3323 /* Hardware workaround, gprc counts missed packets */
3324 sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3325 sc->stats.gprc -= missed_rx;
3327 if (hw->mac.type != ixgbe_mac_82598EB) {
3328 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3329 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3330 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3331 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3332 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3333 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3334 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3335 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3337 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3338 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3339 /* 82598 only has a counter in the high register */
3340 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3341 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3342 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3346 * Workaround: mprc hardware is incorrectly counting
3347 * broadcasts, so for now we subtract those.
3349 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3350 sc->stats.bprc += bprc;
3351 sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3352 if (hw->mac.type == ixgbe_mac_82598EB)
3353 sc->stats.mprc -= bprc;
3355 sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3356 sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3357 sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3358 sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3359 sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3360 sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3362 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3363 sc->stats.lxontxc += lxon;
3364 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3365 sc->stats.lxofftxc += lxoff;
3366 total = lxon + lxoff;
3368 sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3369 sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3370 sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3371 sc->stats.gptc -= total;
3372 sc->stats.mptc -= total;
3373 sc->stats.ptc64 -= total;
3374 sc->stats.gotc -= total * ETHER_MIN_LEN;
3376 sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3377 sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3378 sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3379 sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3380 sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3381 sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3382 sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3383 sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3384 sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3385 sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3386 sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3387 sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3388 sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3389 sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3390 sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3391 sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3392 sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3393 sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3394 /* Only read FCOE on 82599 */
3395 if (hw->mac.type != ixgbe_mac_82598EB) {
3396 sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3397 sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3398 sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3399 sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3400 sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3404 IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3405 IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3410 * Add sysctl variables, one per statistic, to the system.
3413 ix_add_hw_stats(struct ix_softc *sc)
3416 device_t dev = sc->dev;
3418 struct ix_tx_ring *txr = sc->tx_rings;
3419 struct ix_rx_ring *rxr = sc->rx_rings;
3421 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3422 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3423 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3424 struct ixgbe_hw_stats *stats = &sc->stats;
3426 struct sysctl_oid *stat_node, *queue_node;
3427 struct sysctl_oid_list *stat_list, *queue_list;
3429 #define QUEUE_NAME_LEN 32
3430 char namebuf[QUEUE_NAME_LEN];
3432 /* MAC stats get the own sub node */
3434 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3435 CTLFLAG_RD, NULL, "MAC Statistics");
3436 stat_list = SYSCTL_CHILDREN(stat_node);
3438 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3439 CTLFLAG_RD, &stats->crcerrs,
3441 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3442 CTLFLAG_RD, &stats->illerrc,
3443 "Illegal Byte Errors");
3444 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3445 CTLFLAG_RD, &stats->errbc,
3447 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3448 CTLFLAG_RD, &stats->mspdc,
3449 "MAC Short Packets Discarded");
3450 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3451 CTLFLAG_RD, &stats->mlfc,
3452 "MAC Local Faults");
3453 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3454 CTLFLAG_RD, &stats->mrfc,
3455 "MAC Remote Faults");
3456 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3457 CTLFLAG_RD, &stats->rlec,
3458 "Receive Length Errors");
3460 /* Flow Control stats */
3461 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3462 CTLFLAG_RD, &stats->lxontxc,
3463 "Link XON Transmitted");
3464 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3465 CTLFLAG_RD, &stats->lxonrxc,
3466 "Link XON Received");
3467 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3468 CTLFLAG_RD, &stats->lxofftxc,
3469 "Link XOFF Transmitted");
3470 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3471 CTLFLAG_RD, &stats->lxoffrxc,
3472 "Link XOFF Received");
3474 /* Packet Reception Stats */
3475 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3476 CTLFLAG_RD, &stats->tor,
3477 "Total Octets Received");
3478 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3479 CTLFLAG_RD, &stats->gorc,
3480 "Good Octets Received");
3481 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3482 CTLFLAG_RD, &stats->tpr,
3483 "Total Packets Received");
3484 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3485 CTLFLAG_RD, &stats->gprc,
3486 "Good Packets Received");
3487 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3488 CTLFLAG_RD, &stats->mprc,
3489 "Multicast Packets Received");
3490 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3491 CTLFLAG_RD, &stats->bprc,
3492 "Broadcast Packets Received");
3493 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3494 CTLFLAG_RD, &stats->prc64,
3495 "64 byte frames received ");
3496 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3497 CTLFLAG_RD, &stats->prc127,
3498 "65-127 byte frames received");
3499 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3500 CTLFLAG_RD, &stats->prc255,
3501 "128-255 byte frames received");
3502 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3503 CTLFLAG_RD, &stats->prc511,
3504 "256-511 byte frames received");
3505 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3506 CTLFLAG_RD, &stats->prc1023,
3507 "512-1023 byte frames received");
3508 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3509 CTLFLAG_RD, &stats->prc1522,
3510 "1023-1522 byte frames received");
3511 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3512 CTLFLAG_RD, &stats->ruc,
3513 "Receive Undersized");
3514 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3515 CTLFLAG_RD, &stats->rfc,
3516 "Fragmented Packets Received ");
3517 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3518 CTLFLAG_RD, &stats->roc,
3519 "Oversized Packets Received");
3520 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3521 CTLFLAG_RD, &stats->rjc,
3523 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3524 CTLFLAG_RD, &stats->mngprc,
3525 "Management Packets Received");
3526 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3527 CTLFLAG_RD, &stats->mngptc,
3528 "Management Packets Dropped");
3529 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3530 CTLFLAG_RD, &stats->xec,
3533 /* Packet Transmission Stats */
3534 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3535 CTLFLAG_RD, &stats->gotc,
3536 "Good Octets Transmitted");
3537 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3538 CTLFLAG_RD, &stats->tpt,
3539 "Total Packets Transmitted");
3540 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3541 CTLFLAG_RD, &stats->gptc,
3542 "Good Packets Transmitted");
3543 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3544 CTLFLAG_RD, &stats->bptc,
3545 "Broadcast Packets Transmitted");
3546 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3547 CTLFLAG_RD, &stats->mptc,
3548 "Multicast Packets Transmitted");
3549 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3550 CTLFLAG_RD, &stats->mngptc,
3551 "Management Packets Transmitted");
3552 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3553 CTLFLAG_RD, &stats->ptc64,
3554 "64 byte frames transmitted ");
3555 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3556 CTLFLAG_RD, &stats->ptc127,
3557 "65-127 byte frames transmitted");
3558 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3559 CTLFLAG_RD, &stats->ptc255,
3560 "128-255 byte frames transmitted");
3561 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3562 CTLFLAG_RD, &stats->ptc511,
3563 "256-511 byte frames transmitted");
3564 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3565 CTLFLAG_RD, &stats->ptc1023,
3566 "512-1023 byte frames transmitted");
3567 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3568 CTLFLAG_RD, &stats->ptc1522,
3569 "1024-1522 byte frames transmitted");
3574 * Enable the hardware to drop packets when the buffer is full.
3575 * This is useful when multiple RX rings are used, so that no
3576 * single RX ring being full stalls the entire RX engine. We
3577 * only enable this when multiple RX rings are used and when
3578 * flow control is disabled.
3581 ix_enable_rx_drop(struct ix_softc *sc)
3583 struct ixgbe_hw *hw = &sc->hw;
3587 if_printf(&sc->arpcom.ac_if,
3588 "flow control %s, enable RX drop\n",
3589 ix_fc2str(sc->hw.fc.current_mode));
3592 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3593 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3595 srrctl |= IXGBE_SRRCTL_DROP_EN;
3596 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3601 ix_disable_rx_drop(struct ix_softc *sc)
3603 struct ixgbe_hw *hw = &sc->hw;
3607 if_printf(&sc->arpcom.ac_if,
3608 "flow control %s, disable RX drop\n",
3609 ix_fc2str(sc->hw.fc.current_mode));
3612 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3613 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3615 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3616 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3621 ix_setup_serialize(struct ix_softc *sc)
3625 /* Main + RX + TX */
3626 sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3628 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3629 M_DEVBUF, M_WAITOK | M_ZERO);
3634 * NOTE: Order is critical
3637 KKASSERT(i < sc->nserialize);
3638 sc->serializes[i++] = &sc->main_serialize;
3640 for (j = 0; j < sc->rx_ring_cnt; ++j) {
3641 KKASSERT(i < sc->nserialize);
3642 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3645 for (j = 0; j < sc->tx_ring_cnt; ++j) {
3646 KKASSERT(i < sc->nserialize);
3647 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3650 KKASSERT(i == sc->nserialize);
3654 ix_alloc_intr(struct ix_softc *sc)
3656 struct ix_intr_data *intr;
3660 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3661 ix_set_ring_inuse(sc, FALSE);
3665 if (sc->intr_data != NULL)
3666 kfree(sc->intr_data, M_DEVBUF);
3669 sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3671 intr = &sc->intr_data[0];
3674 * Allocate MSI/legacy interrupt resource
3676 sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3677 &intr->intr_rid, &intr_flags);
3679 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3680 &intr->intr_rid, intr_flags);
3681 if (intr->intr_res == NULL) {
3682 device_printf(sc->dev, "Unable to allocate bus resource: "
3687 intr->intr_serialize = &sc->main_serialize;
3688 intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3689 intr->intr_func = ix_intr;
3690 intr->intr_funcarg = sc;
3691 intr->intr_rate = IX_INTR_RATE;
3692 intr->intr_use = IX_INTR_USE_RXTX;
3694 sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3695 sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3697 sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3699 ix_set_ring_inuse(sc, FALSE);
3701 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3702 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3703 sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3709 ix_free_intr(struct ix_softc *sc)
3711 if (sc->intr_data == NULL)
3714 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3715 struct ix_intr_data *intr = &sc->intr_data[0];
3717 KKASSERT(sc->intr_cnt == 1);
3718 if (intr->intr_res != NULL) {
3719 bus_release_resource(sc->dev, SYS_RES_IRQ,
3720 intr->intr_rid, intr->intr_res);
3722 if (sc->intr_type == PCI_INTR_TYPE_MSI)
3723 pci_release_msi(sc->dev);
3725 kfree(sc->intr_data, M_DEVBUF);
3727 ix_free_msix(sc, TRUE);
3732 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3734 sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3735 sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3737 if_printf(&sc->arpcom.ac_if,
3738 "RX rings %d/%d, TX rings %d/%d\n",
3739 sc->rx_ring_inuse, sc->rx_ring_cnt,
3740 sc->tx_ring_inuse, sc->tx_ring_cnt);
3745 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3747 if (!IX_ENABLE_HWRSS(sc))
3751 return sc->rx_ring_cnt;
3752 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3753 return IX_MIN_RXRING_RSS;
3755 return sc->rx_ring_msix;
3759 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3761 if (!IX_ENABLE_HWTSS(sc))
3765 return sc->tx_ring_cnt;
3766 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3769 return sc->tx_ring_msix;
3773 ix_setup_intr(struct ix_softc *sc)
3777 for (i = 0; i < sc->intr_cnt; ++i) {
3778 struct ix_intr_data *intr = &sc->intr_data[i];
3781 error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3782 INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3783 &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3785 device_printf(sc->dev, "can't setup %dth intr\n", i);
3786 ix_teardown_intr(sc, i);
3794 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3798 if (sc->intr_data == NULL)
3801 for (i = 0; i < intr_cnt; ++i) {
3802 struct ix_intr_data *intr = &sc->intr_data[i];
3804 bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3809 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3811 struct ix_softc *sc = ifp->if_softc;
3813 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3817 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3819 struct ix_softc *sc = ifp->if_softc;
3821 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3825 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3827 struct ix_softc *sc = ifp->if_softc;
3829 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3835 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3836 boolean_t serialized)
3838 struct ix_softc *sc = ifp->if_softc;
3840 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3844 #endif /* INVARIANTS */
3847 ix_free_rings(struct ix_softc *sc)
3851 if (sc->tx_rings != NULL) {
3852 for (i = 0; i < sc->tx_ring_cnt; ++i) {
3853 struct ix_tx_ring *txr = &sc->tx_rings[i];
3855 ix_destroy_tx_ring(txr, txr->tx_ndesc);
3857 kfree(sc->tx_rings, M_DEVBUF);
3860 if (sc->rx_rings != NULL) {
3861 for (i =0; i < sc->rx_ring_cnt; ++i) {
3862 struct ix_rx_ring *rxr = &sc->rx_rings[i];
3864 ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3866 kfree(sc->rx_rings, M_DEVBUF);
3869 if (sc->parent_tag != NULL)
3870 bus_dma_tag_destroy(sc->parent_tag);
3874 ix_watchdog(struct ifaltq_subque *ifsq)
3876 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3877 struct ifnet *ifp = ifsq_get_ifp(ifsq);
3878 struct ix_softc *sc = ifp->if_softc;
3881 KKASSERT(txr->tx_ifsq == ifsq);
3882 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3885 * If the interface has been paused then don't do the watchdog check
3887 if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3888 txr->tx_watchdog.wd_timer = 5;
3892 if_printf(ifp, "Watchdog timeout -- resetting\n");
3893 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3894 IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3895 IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3896 if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3897 txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3900 for (i = 0; i < sc->tx_ring_inuse; ++i)
3901 ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3905 ix_free_tx_ring(struct ix_tx_ring *txr)
3909 for (i = 0; i < txr->tx_ndesc; ++i) {
3910 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3912 if (txbuf->m_head != NULL) {
3913 bus_dmamap_unload(txr->tx_tag, txbuf->map);
3914 m_freem(txbuf->m_head);
3915 txbuf->m_head = NULL;
3921 ix_free_rx_ring(struct ix_rx_ring *rxr)
3925 for (i = 0; i < rxr->rx_ndesc; ++i) {
3926 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3928 if (rxbuf->fmp != NULL) {
3929 m_freem(rxbuf->fmp);
3933 KKASSERT(rxbuf->lmp == NULL);
3935 if (rxbuf->m_head != NULL) {
3936 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3937 m_freem(rxbuf->m_head);
3938 rxbuf->m_head = NULL;
3944 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3947 bus_dma_segment_t seg;
3949 struct ix_rx_buf *rxbuf;
3950 int flags, error, nseg;
3953 if (__predict_false(wait))
3956 m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3959 if_printf(&rxr->rx_sc->arpcom.ac_if,
3960 "Unable to allocate RX mbuf\n");
3964 m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3966 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3967 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3971 if_printf(&rxr->rx_sc->arpcom.ac_if,
3972 "Unable to load RX mbuf\n");
3977 rxbuf = &rxr->rx_buf[i];
3978 if (rxbuf->m_head != NULL)
3979 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3982 rxbuf->map = rxr->rx_sparemap;
3983 rxr->rx_sparemap = map;
3986 rxbuf->paddr = seg.ds_addr;
3988 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3993 ix_add_sysctl(struct ix_softc *sc)
3995 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3996 struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4002 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4003 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4004 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4005 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4006 "# of RX rings used");
4007 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4008 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4009 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4010 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4011 "# of TX rings used");
4012 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4013 OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4014 sc, 0, ix_sysctl_rxd, "I",
4016 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4017 OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4018 sc, 0, ix_sysctl_txd, "I",
4020 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4021 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4022 sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4023 "# of segments sent before write to hardware register");
4024 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4025 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4026 sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4027 "# of received segments sent before write to hardware register");
4028 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4029 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4030 sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4031 "# of segments per TX interrupt");
4033 #ifdef IFPOLL_ENABLE
4034 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4035 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4036 sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
4037 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4038 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4039 sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4042 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4044 ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4045 ix_sysctl_##name, #use " interrupt rate"); \
4048 IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4049 IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4050 IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4051 IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4053 #undef IX_ADD_INTR_RATE_SYSCTL
4056 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4057 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4059 for (i = 0; i < sc->rx_ring_cnt; ++i) {
4060 ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4061 SYSCTL_ADD_ULONG(ctx,
4062 SYSCTL_CHILDREN(tree), OID_AUTO, node,
4063 CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4068 ix_add_hw_stats(sc);
4074 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4076 struct ix_softc *sc = (void *)arg1;
4077 struct ifnet *ifp = &sc->arpcom.ac_if;
4078 int error, nsegs, i;
4080 nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4081 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4082 if (error || req->newptr == NULL)
4087 ifnet_serialize_all(ifp);
4088 for (i = 0; i < sc->tx_ring_cnt; ++i)
4089 sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4090 ifnet_deserialize_all(ifp);
4096 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4098 struct ix_softc *sc = (void *)arg1;
4099 struct ifnet *ifp = &sc->arpcom.ac_if;
4100 int error, nsegs, i;
4102 nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4103 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4104 if (error || req->newptr == NULL)
4109 ifnet_serialize_all(ifp);
4110 for (i = 0; i < sc->rx_ring_cnt; ++i)
4111 sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4112 ifnet_deserialize_all(ifp);
4118 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4120 struct ix_softc *sc = (void *)arg1;
4123 txd = sc->tx_rings[0].tx_ndesc;
4124 return sysctl_handle_int(oidp, &txd, 0, req);
4128 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4130 struct ix_softc *sc = (void *)arg1;
4133 rxd = sc->rx_rings[0].rx_ndesc;
4134 return sysctl_handle_int(oidp, &rxd, 0, req);
4138 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4140 struct ix_softc *sc = (void *)arg1;
4141 struct ifnet *ifp = &sc->arpcom.ac_if;
4142 struct ix_tx_ring *txr = &sc->tx_rings[0];
4145 nsegs = txr->tx_intr_nsegs;
4146 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4147 if (error || req->newptr == NULL)
4152 ifnet_serialize_all(ifp);
4154 if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4160 for (i = 0; i < sc->tx_ring_cnt; ++i)
4161 sc->tx_rings[i].tx_intr_nsegs = nsegs;
4164 ifnet_deserialize_all(ifp);
4170 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4172 uint32_t eitr, eitr_intvl;
4174 eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4175 eitr_intvl = 1000000000 / 256 / rate;
4177 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4178 eitr &= ~IX_EITR_INTVL_MASK_82598;
4179 if (eitr_intvl == 0)
4181 else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4182 eitr_intvl = IX_EITR_INTVL_MASK_82598;
4184 eitr &= ~IX_EITR_INTVL_MASK;
4186 eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4187 if (eitr_intvl == 0)
4188 eitr_intvl = IX_EITR_INTVL_MIN;
4189 else if (eitr_intvl > IX_EITR_INTVL_MAX)
4190 eitr_intvl = IX_EITR_INTVL_MAX;
4194 IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4198 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4200 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4204 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4206 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4210 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4212 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4216 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4218 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4222 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4224 struct ix_softc *sc = (void *)arg1;
4225 struct ifnet *ifp = &sc->arpcom.ac_if;
4229 for (i = 0; i < sc->intr_cnt; ++i) {
4230 if (sc->intr_data[i].intr_use == use) {
4231 rate = sc->intr_data[i].intr_rate;
4236 error = sysctl_handle_int(oidp, &rate, 0, req);
4237 if (error || req->newptr == NULL)
4242 ifnet_serialize_all(ifp);
4244 for (i = 0; i < sc->intr_cnt; ++i) {
4245 if (sc->intr_data[i].intr_use == use) {
4246 sc->intr_data[i].intr_rate = rate;
4247 if (ifp->if_flags & IFF_RUNNING)
4248 ix_set_eitr(sc, i, rate);
4252 ifnet_deserialize_all(ifp);
4258 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4259 const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4263 for (i = 0; i < sc->intr_cnt; ++i) {
4264 if (sc->intr_data[i].intr_use == use) {
4265 SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4266 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4267 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4268 sc, 0, handler, "I", desc);
4275 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4277 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4278 sc->timer_cpuid = 0; /* XXX fixed */
4280 sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4284 ix_alloc_msix(struct ix_softc *sc)
4286 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt;
4287 struct ix_intr_data *intr;
4289 int offset, offset_def, agg_rxtx, ring_max;
4290 boolean_t aggregate, setup = FALSE;
4292 msix_enable = ix_msix_enable;
4294 * Don't enable MSI-X on 82598 by default, see:
4295 * 82598 specification update errata #38
4297 if (sc->hw.mac.type == ixgbe_mac_82598EB)
4299 msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4303 msix_cnt = pci_msix_count(sc->dev);
4304 #ifdef IX_MSIX_DEBUG
4305 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4307 if (msix_cnt <= 1) {
4308 /* One MSI-X model does not make sense */
4313 while ((1 << (i + 1)) <= msix_cnt)
4318 device_printf(sc->dev, "MSI-X count %d/%d\n",
4319 msix_cnt2, msix_cnt);
4322 KKASSERT(msix_cnt >= msix_cnt2);
4323 if (msix_cnt == msix_cnt2) {
4324 /* We need at least one MSI-X for link status */
4326 if (msix_cnt2 <= 1) {
4327 /* One MSI-X for RX/TX does not make sense */
4328 device_printf(sc->dev, "not enough MSI-X for TX/RX, "
4329 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt);
4332 KKASSERT(msix_cnt > msix_cnt2);
4335 device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n",
4336 msix_cnt2, msix_cnt);
4341 * Make sure that we don't break interrupt related registers
4342 * (EIMS, etc) limitation.
4344 * NOTE: msix_cnt > msix_cnt2, when we reach here
4346 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4347 if (msix_cnt2 > IX_MAX_MSIX_82598)
4348 msix_cnt2 = IX_MAX_MSIX_82598;
4350 if (msix_cnt2 > IX_MAX_MSIX)
4351 msix_cnt2 = IX_MAX_MSIX;
4353 msix_cnt = msix_cnt2 + 1; /* +1 for status */
4356 device_printf(sc->dev, "MSI-X count max fixup %d/%d\n",
4357 msix_cnt2, msix_cnt);
4360 sc->rx_ring_msix = sc->rx_ring_cnt;
4361 if (sc->rx_ring_msix > msix_cnt2)
4362 sc->rx_ring_msix = msix_cnt2;
4364 sc->tx_ring_msix = sc->tx_ring_cnt;
4365 if (sc->tx_ring_msix > msix_cnt2)
4366 sc->tx_ring_msix = msix_cnt2;
4368 ring_max = sc->rx_ring_msix;
4369 if (ring_max < sc->tx_ring_msix)
4370 ring_max = sc->tx_ring_msix;
4372 /* Allow user to force independent RX/TX MSI-X handling */
4373 agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx",
4376 if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) {
4378 * Independent TX/RX MSI-X
4382 device_printf(sc->dev, "independent TX/RX MSI-X\n");
4383 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix;
4386 * Aggregate TX/RX MSI-X
4390 device_printf(sc->dev, "aggregate TX/RX MSI-X\n");
4391 alloc_cnt = msix_cnt2;
4392 if (alloc_cnt > ring_max)
4393 alloc_cnt = ring_max;
4394 KKASSERT(alloc_cnt >= sc->rx_ring_msix &&
4395 alloc_cnt >= sc->tx_ring_msix);
4397 ++alloc_cnt; /* For status */
4400 device_printf(sc->dev, "MSI-X alloc %d, "
4401 "RX ring %d, TX ring %d\n", alloc_cnt,
4402 sc->rx_ring_msix, sc->tx_ring_msix);
4405 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4406 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4407 &sc->msix_mem_rid, RF_ACTIVE);
4408 if (sc->msix_mem_res == NULL) {
4409 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4410 sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4411 SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4412 if (sc->msix_mem_res == NULL) {
4413 device_printf(sc->dev, "Unable to map MSI-X table\n");
4418 sc->intr_cnt = alloc_cnt;
4419 sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4420 M_DEVBUF, M_WAITOK | M_ZERO);
4421 for (x = 0; x < sc->intr_cnt; ++x) {
4422 intr = &sc->intr_data[x];
4423 intr->intr_rid = -1;
4424 intr->intr_rate = IX_INTR_RATE;
4432 if (sc->rx_ring_msix == ncpus2) {
4435 offset_def = (sc->rx_ring_msix *
4436 device_get_unit(sc->dev)) % ncpus2;
4438 offset = device_getenv_int(sc->dev,
4439 "msix.rxoff", offset_def);
4440 if (offset >= ncpus2 ||
4441 offset % sc->rx_ring_msix != 0) {
4442 device_printf(sc->dev,
4443 "invalid msix.rxoff %d, use %d\n",
4444 offset, offset_def);
4445 offset = offset_def;
4448 ix_conf_rx_msix(sc, 0, &x, offset);
4453 if (sc->tx_ring_msix == ncpus2) {
4456 offset_def = (sc->tx_ring_msix *
4457 device_get_unit(sc->dev)) % ncpus2;
4459 offset = device_getenv_int(sc->dev,
4460 "msix.txoff", offset_def);
4461 if (offset >= ncpus2 ||
4462 offset % sc->tx_ring_msix != 0) {
4463 device_printf(sc->dev,
4464 "invalid msix.txoff %d, use %d\n",
4465 offset, offset_def);
4466 offset = offset_def;
4469 ix_conf_tx_msix(sc, 0, &x, offset);
4473 ring_agg = sc->rx_ring_msix;
4474 if (ring_agg > sc->tx_ring_msix)
4475 ring_agg = sc->tx_ring_msix;
4477 if (ring_max == ncpus2) {
4480 offset_def = (ring_max * device_get_unit(sc->dev)) %
4483 offset = device_getenv_int(sc->dev, "msix.off",
4485 if (offset >= ncpus2 || offset % ring_max != 0) {
4486 device_printf(sc->dev,
4487 "invalid msix.off %d, use %d\n",
4488 offset, offset_def);
4489 offset = offset_def;
4493 for (i = 0; i < ring_agg; ++i) {
4494 struct ix_tx_ring *txr = &sc->tx_rings[i];
4495 struct ix_rx_ring *rxr = &sc->rx_rings[i];
4497 KKASSERT(x < sc->intr_cnt);
4498 rxr->rx_intr_vec = x;
4499 ix_setup_msix_eims(sc, x,
4500 &rxr->rx_eims, &rxr->rx_eims_val);
4502 /* NOTE: Leave TX ring's intr_vec negative */
4504 intr = &sc->intr_data[x++];
4506 intr->intr_serialize = &rxr->rx_serialize;
4507 intr->intr_func = ix_msix_rxtx;
4508 intr->intr_funcarg = rxr;
4509 intr->intr_use = IX_INTR_USE_RXTX;
4511 intr->intr_cpuid = i + offset;
4512 KKASSERT(intr->intr_cpuid < ncpus2);
4513 txr->tx_intr_cpuid = intr->intr_cpuid;
4515 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0),
4516 "%s rxtx%d", device_get_nameunit(sc->dev), i);
4517 intr->intr_desc = intr->intr_desc0;
4520 if (ring_agg != ring_max) {
4521 if (ring_max == sc->tx_ring_msix)
4522 ix_conf_tx_msix(sc, i, &x, offset);
4524 ix_conf_rx_msix(sc, i, &x, offset);
4531 KKASSERT(x < sc->intr_cnt);
4532 sc->sts_msix_vec = x;
4534 intr = &sc->intr_data[x++];
4536 intr->intr_serialize = &sc->main_serialize;
4537 intr->intr_func = ix_msix_status;
4538 intr->intr_funcarg = sc;
4539 intr->intr_cpuid = 0;
4540 intr->intr_use = IX_INTR_USE_STATUS;
4542 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4543 device_get_nameunit(sc->dev));
4544 intr->intr_desc = intr->intr_desc0;
4546 KKASSERT(x == sc->intr_cnt);
4548 error = pci_setup_msix(sc->dev);
4550 device_printf(sc->dev, "Setup MSI-X failed\n");
4555 for (i = 0; i < sc->intr_cnt; ++i) {
4556 intr = &sc->intr_data[i];
4558 error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4561 device_printf(sc->dev,
4562 "Unable to allocate MSI-X %d on cpu%d\n", i,
4567 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4568 &intr->intr_rid, RF_ACTIVE);
4569 if (intr->intr_res == NULL) {
4570 device_printf(sc->dev,
4571 "Unable to allocate MSI-X %d resource\n", i);
4577 pci_enable_msix(sc->dev);
4578 sc->intr_type = PCI_INTR_TYPE_MSIX;
4581 ix_free_msix(sc, setup);
4585 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4589 KKASSERT(sc->intr_cnt > 1);
4591 for (i = 0; i < sc->intr_cnt; ++i) {
4592 struct ix_intr_data *intr = &sc->intr_data[i];
4594 if (intr->intr_res != NULL) {
4595 bus_release_resource(sc->dev, SYS_RES_IRQ,
4596 intr->intr_rid, intr->intr_res);
4598 if (intr->intr_rid >= 0)
4599 pci_release_msix_vector(sc->dev, intr->intr_rid);
4602 pci_teardown_msix(sc->dev);
4605 kfree(sc->intr_data, M_DEVBUF);
4606 sc->intr_data = NULL;
4610 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4614 for (; i < sc->rx_ring_msix; ++i) {
4615 struct ix_rx_ring *rxr = &sc->rx_rings[i];
4616 struct ix_intr_data *intr;
4618 KKASSERT(x < sc->intr_cnt);
4619 rxr->rx_intr_vec = x;
4620 ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val);
4622 intr = &sc->intr_data[x++];
4624 intr->intr_serialize = &rxr->rx_serialize;
4625 intr->intr_func = ix_msix_rx;
4626 intr->intr_funcarg = rxr;
4627 intr->intr_rate = IX_MSIX_RX_RATE;
4628 intr->intr_use = IX_INTR_USE_RX;
4630 intr->intr_cpuid = i + offset;
4631 KKASSERT(intr->intr_cpuid < ncpus2);
4633 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d",
4634 device_get_nameunit(sc->dev), i);
4635 intr->intr_desc = intr->intr_desc0;
4641 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset)
4645 for (; i < sc->tx_ring_msix; ++i) {
4646 struct ix_tx_ring *txr = &sc->tx_rings[i];
4647 struct ix_intr_data *intr;
4649 KKASSERT(x < sc->intr_cnt);
4650 txr->tx_intr_vec = x;
4651 ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4653 intr = &sc->intr_data[x++];
4655 intr->intr_serialize = &txr->tx_serialize;
4656 intr->intr_func = ix_msix_tx;
4657 intr->intr_funcarg = txr;
4658 intr->intr_rate = IX_MSIX_TX_RATE;
4659 intr->intr_use = IX_INTR_USE_TX;
4661 intr->intr_cpuid = i + offset;
4662 KKASSERT(intr->intr_cpuid < ncpus2);
4663 txr->tx_intr_cpuid = intr->intr_cpuid;
4665 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4666 device_get_nameunit(sc->dev), i);
4667 intr->intr_desc = intr->intr_desc0;
4673 ix_msix_rx(void *xrxr)
4675 struct ix_rx_ring *rxr = xrxr;
4677 ASSERT_SERIALIZED(&rxr->rx_serialize);
4680 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4684 ix_msix_tx(void *xtxr)
4686 struct ix_tx_ring *txr = xtxr;
4688 ASSERT_SERIALIZED(&txr->tx_serialize);
4690 ix_txeof(txr, *(txr->tx_hdr));
4691 if (!ifsq_is_empty(txr->tx_ifsq))
4692 ifsq_devstart(txr->tx_ifsq);
4693 IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4697 ix_msix_rxtx(void *xrxr)
4699 struct ix_rx_ring *rxr = xrxr;
4700 struct ix_tx_ring *txr;
4703 ASSERT_SERIALIZED(&rxr->rx_serialize);
4709 * Since tx_next_clean is only changed by ix_txeof(),
4710 * which is called only in interrupt handler, the
4711 * check w/o holding tx serializer is MPSAFE.
4714 hdr = *(txr->tx_hdr);
4715 if (hdr != txr->tx_next_clean) {
4716 lwkt_serialize_enter(&txr->tx_serialize);
4718 if (!ifsq_is_empty(txr->tx_ifsq))
4719 ifsq_devstart(txr->tx_ifsq);
4720 lwkt_serialize_exit(&txr->tx_serialize);
4723 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4727 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4729 struct ixgbe_hw *hw = &sc->hw;
4731 /* Link status change */
4732 if (eicr & IXGBE_EICR_LSC)
4735 if (hw->mac.type != ixgbe_mac_82598EB) {
4736 if (eicr & IXGBE_EICR_ECC)
4737 if_printf(&sc->arpcom.ac_if, "ECC ERROR!! Reboot!!\n");
4739 /* Check for over temp condition */
4740 if (eicr & IXGBE_EICR_TS) {
4741 if_printf(&sc->arpcom.ac_if, "CRITICAL: OVER TEMP!! "
4742 "PHY IS SHUT DOWN!! Shutdown!!\n");
4746 if (ix_is_sfp(hw)) {
4749 /* Pluggable optics-related interrupt */
4750 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
4751 mod_mask = IXGBE_EICR_GPI_SDP0_X540;
4753 mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
4754 if (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))
4756 else if (eicr & mod_mask)
4760 /* Check for fan failure */
4761 if (hw->device_id == IXGBE_DEV_ID_82598AT &&
4762 (eicr & IXGBE_EICR_GPI_SDP1))
4763 if_printf(&sc->arpcom.ac_if, "FAN FAILURE!! Replace!!\n");
4765 /* External PHY interrupt */
4766 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
4767 (eicr & IXGBE_EICR_GPI_SDP0_X540))
4772 ix_msix_status(void *xsc)
4774 struct ix_softc *sc = xsc;
4777 ASSERT_SERIALIZED(&sc->main_serialize);
4779 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4780 ix_intr_status(sc, eicr);
4782 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
4786 ix_setup_msix_eims(const struct ix_softc *sc, int x,
4787 uint32_t *eims, uint32_t *eims_val)
4790 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4791 KASSERT(x < IX_MAX_MSIX_82598,
4792 ("%s: invalid vector %d for 82598",
4793 device_get_nameunit(sc->dev), x));
4796 *eims = IXGBE_EIMS_EX(0);
4800 KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
4801 device_get_nameunit(sc->dev), x));
4802 KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
4803 ("%s: invalid vector %d for 82598",
4804 device_get_nameunit(sc->dev), x));
4805 *eims = IXGBE_EIMS_EX(1);
4806 *eims_val = 1 << (x - 32);
4810 #ifdef IFPOLL_ENABLE
4813 ix_npoll_status(struct ifnet *ifp)
4815 struct ix_softc *sc = ifp->if_softc;
4818 ASSERT_SERIALIZED(&sc->main_serialize);
4820 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
4821 ix_intr_status(sc, eicr);
4825 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
4827 struct ix_tx_ring *txr = arg;
4829 ASSERT_SERIALIZED(&txr->tx_serialize);
4831 ix_txeof(txr, *(txr->tx_hdr));
4832 if (!ifsq_is_empty(txr->tx_ifsq))
4833 ifsq_devstart(txr->tx_ifsq);
4837 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
4839 struct ix_rx_ring *rxr = arg;
4841 ASSERT_SERIALIZED(&rxr->rx_serialize);
4843 ix_rxeof(rxr, cycle);
4847 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4849 struct ix_softc *sc = ifp->if_softc;
4850 int i, txr_cnt, rxr_cnt;
4852 ASSERT_IFNET_SERIALIZED_ALL(ifp);
4857 info->ifpi_status.status_func = ix_npoll_status;
4858 info->ifpi_status.serializer = &sc->main_serialize;
4860 txr_cnt = ix_get_txring_inuse(sc, TRUE);
4861 off = sc->tx_npoll_off;
4862 for (i = 0; i < txr_cnt; ++i) {
4863 struct ix_tx_ring *txr = &sc->tx_rings[i];
4866 KKASSERT(idx < ncpus2);
4867 info->ifpi_tx[idx].poll_func = ix_npoll_tx;
4868 info->ifpi_tx[idx].arg = txr;
4869 info->ifpi_tx[idx].serializer = &txr->tx_serialize;
4870 ifsq_set_cpuid(txr->tx_ifsq, idx);
4873 rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
4874 off = sc->rx_npoll_off;
4875 for (i = 0; i < rxr_cnt; ++i) {
4876 struct ix_rx_ring *rxr = &sc->rx_rings[i];
4879 KKASSERT(idx < ncpus2);
4880 info->ifpi_rx[idx].poll_func = ix_npoll_rx;
4881 info->ifpi_rx[idx].arg = rxr;
4882 info->ifpi_rx[idx].serializer = &rxr->rx_serialize;
4885 if (ifp->if_flags & IFF_RUNNING) {
4886 if (rxr_cnt == sc->rx_ring_inuse &&
4887 txr_cnt == sc->tx_ring_inuse) {
4888 ix_set_timer_cpuid(sc, TRUE);
4889 ix_disable_intr(sc);
4895 for (i = 0; i < sc->tx_ring_cnt; ++i) {
4896 struct ix_tx_ring *txr = &sc->tx_rings[i];
4898 ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
4901 if (ifp->if_flags & IFF_RUNNING) {
4902 txr_cnt = ix_get_txring_inuse(sc, FALSE);
4903 rxr_cnt = ix_get_rxring_inuse(sc, FALSE);
4905 if (rxr_cnt == sc->rx_ring_inuse &&
4906 txr_cnt == sc->tx_ring_inuse) {
4907 ix_set_timer_cpuid(sc, FALSE);
4917 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS)
4919 struct ix_softc *sc = (void *)arg1;
4920 struct ifnet *ifp = &sc->arpcom.ac_if;
4923 off = sc->rx_npoll_off;
4924 error = sysctl_handle_int(oidp, &off, 0, req);
4925 if (error || req->newptr == NULL)
4930 ifnet_serialize_all(ifp);
4931 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) {
4935 sc->rx_npoll_off = off;
4937 ifnet_deserialize_all(ifp);
4943 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS)
4945 struct ix_softc *sc = (void *)arg1;
4946 struct ifnet *ifp = &sc->arpcom.ac_if;
4949 off = sc->tx_npoll_off;
4950 error = sysctl_handle_int(oidp, &off, 0, req);
4951 if (error || req->newptr == NULL)
4956 ifnet_serialize_all(ifp);
4957 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) {
4961 sc->tx_npoll_off = off;
4963 ifnet_deserialize_all(ifp);
4968 #endif /* IFPOLL_ENABLE */
4970 static enum ixgbe_fc_mode
4971 ix_ifmedia2fc(int ifm)
4973 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4976 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4977 return ixgbe_fc_full;
4979 case IFM_ETH_RXPAUSE:
4980 return ixgbe_fc_rx_pause;
4982 case IFM_ETH_TXPAUSE:
4983 return ixgbe_fc_tx_pause;
4986 return ixgbe_fc_none;
4991 ix_ifmedia2str(int ifm)
4993 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
4996 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
4997 return IFM_ETH_FC_FULL;
4999 case IFM_ETH_RXPAUSE:
5000 return IFM_ETH_FC_RXPAUSE;
5002 case IFM_ETH_TXPAUSE:
5003 return IFM_ETH_FC_TXPAUSE;
5006 return IFM_ETH_FC_NONE;
5011 ix_fc2str(enum ixgbe_fc_mode fc)
5015 return IFM_ETH_FC_FULL;
5017 case ixgbe_fc_rx_pause:
5018 return IFM_ETH_FC_RXPAUSE;
5020 case ixgbe_fc_tx_pause:
5021 return IFM_ETH_FC_TXPAUSE;
5024 return IFM_ETH_FC_NONE;
5029 ix_powerdown(struct ix_softc *sc)
5031 struct ixgbe_hw *hw = &sc->hw;
5034 /* Limit power managment flow to X550EM baseT */
5035 if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5036 hw->phy.ops.enter_lplu) {
5037 /* Turn off support for APM wakeup. (Using ACPI instead) */
5038 IXGBE_WRITE_REG(hw, IXGBE_GRC,
5039 IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5042 * Clear Wake Up Status register to prevent any previous wakeup
5043 * events from waking us up immediately after we suspend.
5045 IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5048 * Program the Wakeup Filter Control register with user filter
5051 IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5053 /* Enable wakeups and power management in Wakeup Control */
5054 IXGBE_WRITE_REG(hw, IXGBE_WUC,
5055 IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5057 /* X550EM baseT adapters need a special LPLU flow */
5058 hw->phy.reset_disable = true;
5060 error = hw->phy.ops.enter_lplu(hw);
5062 if_printf(&sc->arpcom.ac_if,
5063 "Error entering LPLU: %d\n", error);
5065 hw->phy.reset_disable = false;
5067 /* Just stop for other adapters */
5074 ix_config_flowctrl(struct ix_softc *sc)
5076 struct ixgbe_hw *hw = &sc->hw;
5077 uint32_t rxpb, frame, size, tmp;
5079 frame = sc->max_frame_size;
5081 /* Calculate High Water */
5082 switch (hw->mac.type) {
5083 case ixgbe_mac_X540:
5084 case ixgbe_mac_X550:
5085 case ixgbe_mac_X550EM_a:
5086 case ixgbe_mac_X550EM_x:
5087 tmp = IXGBE_DV_X540(frame, frame);
5090 tmp = IXGBE_DV(frame, frame);
5093 size = IXGBE_BT2KB(tmp);
5094 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5095 hw->fc.high_water[0] = rxpb - size;
5097 /* Now calculate Low Water */
5098 switch (hw->mac.type) {
5099 case ixgbe_mac_X540:
5100 case ixgbe_mac_X550:
5101 case ixgbe_mac_X550EM_a:
5102 case ixgbe_mac_X550EM_x:
5103 tmp = IXGBE_LOW_DV_X540(frame);
5106 tmp = IXGBE_LOW_DV(frame);
5109 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5111 hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5112 if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5113 hw->fc.disable_fc_autoneg = TRUE;
5115 hw->fc.disable_fc_autoneg = FALSE;
5116 hw->fc.pause_time = IX_FC_PAUSE;
5117 hw->fc.send_xon = TRUE;
5121 ix_config_dmac(struct ix_softc *sc)
5123 struct ixgbe_hw *hw = &sc->hw;
5124 struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5126 if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5129 if ((dcfg->watchdog_timer ^ sc->dmac) ||
5130 (dcfg->link_speed ^ sc->link_speed)) {
5131 dcfg->watchdog_timer = sc->dmac;
5132 dcfg->fcoe_en = false;
5133 dcfg->link_speed = sc->link_speed;
5137 if_printf(&sc->arpcom.ac_if, "dmac settings: "
5138 "watchdog %d, link speed %d\n",
5139 dcfg->watchdog_timer, dcfg->link_speed);
5142 hw->mac.ops.dmac_config(hw);
5147 ix_init_media(struct ix_softc *sc)
5149 struct ixgbe_hw *hw = &sc->hw;
5150 int layer, msf_ifm = IFM_NONE;
5152 ifmedia_removeall(&sc->media);
5154 layer = ixgbe_get_supported_physical_layer(hw);
5157 * Media types with matching DragonFlyBSD media defines
5159 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5160 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5163 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5164 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5167 if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5168 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5170 /* No half-duplex support */
5173 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5174 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5176 msf_ifm = IFM_1000_LX;
5178 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5179 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5181 msf_ifm = IFM_1000_LX;
5183 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5184 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5186 msf_ifm = IFM_1000_SX;
5189 /* Add media for multispeed fiber */
5190 if (ix_is_sfp(hw) && hw->phy.multispeed_fiber && msf_ifm != IFM_NONE) {
5194 hw->mac.ops.get_link_capabilities(hw, &linkcap, &autoneg);
5195 if (linkcap & IXGBE_LINK_SPEED_1GB_FULL)
5196 ifmedia_add_nodup(&sc->media,
5197 IFM_ETHER | msf_ifm | IFM_FDX, 0, NULL);
5200 if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5201 (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5202 ifmedia_add_nodup(&sc->media,
5203 IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5205 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5206 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5209 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5210 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5215 * XXX Other (no matching DragonFlyBSD media type):
5216 * To workaround this, we'll assign these completely
5217 * inappropriate media types.
5219 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5220 if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5221 if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5222 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5225 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5226 if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5227 if_printf(&sc->arpcom.ac_if,
5228 "10GbaseKX4 mapped to 10GbaseCX4\n");
5229 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5232 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5233 if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5234 if_printf(&sc->arpcom.ac_if,
5235 "1000baseKX mapped to 1000baseCX\n");
5236 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5239 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5240 /* Someday, someone will care about you... */
5241 if_printf(&sc->arpcom.ac_if,
5242 "Media supported: 1000baseBX, ignored\n");
5245 /* XXX we probably don't need this */
5246 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5247 ifmedia_add_nodup(&sc->media,
5248 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5251 ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5253 if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5254 int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5256 sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5257 sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5258 ifmedia_set(&sc->media, sc->ifm_media);