1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_poll.h>
60 #include <net/if_types.h>
61 #include <net/vlan/if_vlan_var.h>
63 #include <net/toeplitz.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__i386__) || defined(__x86_64__)
81 #include <machine/specialreg.h>
84 #include <dev/netif/mxge/mxge_mcp.h>
85 #include <dev/netif/mxge/mcp_gen_header.h>
86 #include <dev/netif/mxge/if_mxge_var.h>
88 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
89 #define MXGE_HWRSS_KEYLEN 16
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_ticks;
98 static int mxge_num_slices = 0;
99 static int mxge_always_promisc = 0;
100 static int mxge_throttle = 0;
101 static int mxge_msi_enable = 1;
102 static int mxge_msix_enable = 1;
103 static int mxge_multi_tx = 1;
105 * Don't use RSS by default, its just too slow
107 static int mxge_use_rss = 0;
109 static const char *mxge_fw_unaligned = "mxge_ethp_z8e";
110 static const char *mxge_fw_aligned = "mxge_eth_z8e";
111 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
112 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
114 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices);
115 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control);
116 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay);
117 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable);
118 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware);
119 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait);
120 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks);
121 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc);
122 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle);
123 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx);
124 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss);
125 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable);
126 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable);
128 static int mxge_probe(device_t dev);
129 static int mxge_attach(device_t dev);
130 static int mxge_detach(device_t dev);
131 static int mxge_shutdown(device_t dev);
133 static int mxge_alloc_intr(struct mxge_softc *sc);
134 static void mxge_free_intr(struct mxge_softc *sc);
135 static int mxge_setup_intr(struct mxge_softc *sc);
136 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt);
138 static device_method_t mxge_methods[] = {
139 /* Device interface */
140 DEVMETHOD(device_probe, mxge_probe),
141 DEVMETHOD(device_attach, mxge_attach),
142 DEVMETHOD(device_detach, mxge_detach),
143 DEVMETHOD(device_shutdown, mxge_shutdown),
147 static driver_t mxge_driver = {
150 sizeof(mxge_softc_t),
153 static devclass_t mxge_devclass;
155 /* Declare ourselves to be a child of the PCI bus.*/
156 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
157 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
158 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
160 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
161 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
162 static void mxge_close(mxge_softc_t *sc, int down);
163 static int mxge_open(mxge_softc_t *sc);
164 static void mxge_tick(void *arg);
165 static void mxge_watchdog_reset(mxge_softc_t *sc);
166 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice);
169 mxge_probe(device_t dev)
171 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM &&
172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E ||
173 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) {
174 int rev = pci_get_revid(dev);
177 case MXGE_PCI_REV_Z8E:
178 device_set_desc(dev, "Myri10G-PCIE-8A");
180 case MXGE_PCI_REV_Z8ES:
181 device_set_desc(dev, "Myri10G-PCIE-8B");
184 device_set_desc(dev, "Myri10G-PCIE-8??");
185 device_printf(dev, "Unrecognized rev %d NIC\n", rev);
194 mxge_enable_wc(mxge_softc_t *sc)
196 #if defined(__i386__) || defined(__x86_64__)
200 len = rman_get_size(sc->mem_res);
201 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE,
202 PAT_WRITE_COMBINING);
207 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes,
208 bus_size_t alignment)
213 if (bytes > 4096 && alignment == 4096)
218 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary,
219 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes,
220 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma);
222 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err);
229 mxge_dma_free(bus_dmamem_t *dma)
231 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map);
232 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map);
233 bus_dma_tag_destroy(dma->dmem_tag);
237 * The eeprom strings on the lanaiX have the format
243 mxge_parse_strings(mxge_softc_t *sc)
246 int i, found_mac, found_sn2;
249 ptr = sc->eeprom_strings;
252 while (*ptr != '\0') {
253 if (strncmp(ptr, "MAC=", 4) == 0) {
256 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
257 if (endptr - ptr != 2)
266 } else if (strncmp(ptr, "PC=", 3) == 0) {
268 strlcpy(sc->product_code_string, ptr,
269 sizeof(sc->product_code_string));
270 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
272 strlcpy(sc->serial_number_string, ptr,
273 sizeof(sc->serial_number_string));
274 } else if (strncmp(ptr, "SN2=", 4) == 0) {
275 /* SN2 takes precedence over SN */
278 strlcpy(sc->serial_number_string, ptr,
279 sizeof(sc->serial_number_string));
281 while (*ptr++ != '\0') {}
288 device_printf(sc->dev, "failed to parse eeprom_strings\n");
292 #if defined(__i386__) || defined(__x86_64__)
295 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
298 unsigned long base, off;
300 device_t pdev, mcp55;
301 uint16_t vendor_id, device_id, word;
302 uintptr_t bus, slot, func, ivend, idev;
305 if (!mxge_nvidia_ecrc_enable)
308 pdev = device_get_parent(device_get_parent(sc->dev));
310 device_printf(sc->dev, "could not find parent?\n");
313 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
314 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
316 if (vendor_id != 0x10de)
321 if (device_id == 0x005d) {
322 /* ck804, base address is magic */
324 } else if (device_id >= 0x0374 && device_id <= 0x378) {
325 /* mcp55, base address stored in chipset */
326 mcp55 = pci_find_bsf(0, 0, 0);
328 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
329 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
330 word = pci_read_config(mcp55, 0x90, 2);
331 base = ((unsigned long)word & 0x7ffeU) << 25;
339 * Test below is commented because it is believed that doing
340 * config read/write beyond 0xff will access the config space
341 * for the next larger function. Uncomment this and remove
342 * the hacky pmap_mapdev() way of accessing config space when
343 * DragonFly grows support for extended pcie config space access.
347 * See if we can, by some miracle, access the extended
350 val = pci_read_config(pdev, 0x178, 4);
351 if (val != 0xffffffff) {
353 pci_write_config(pdev, 0x178, val, 4);
358 * Rather than using normal pci config space writes, we must
359 * map the Nvidia config space ourselves. This is because on
360 * opteron/nvidia class machine the 0xe000000 mapping is
361 * handled by the nvidia chipset, that means the internal PCI
362 * device (the on-chip northbridge), or the amd-8131 bridge
363 * and things behind them are not visible by this method.
366 BUS_READ_IVAR(device_get_parent(pdev), pdev,
368 BUS_READ_IVAR(device_get_parent(pdev), pdev,
369 PCI_IVAR_SLOT, &slot);
370 BUS_READ_IVAR(device_get_parent(pdev), pdev,
371 PCI_IVAR_FUNCTION, &func);
372 BUS_READ_IVAR(device_get_parent(pdev), pdev,
373 PCI_IVAR_VENDOR, &ivend);
374 BUS_READ_IVAR(device_get_parent(pdev), pdev,
375 PCI_IVAR_DEVICE, &idev);
377 off = base + 0x00100000UL * (unsigned long)bus +
378 0x00001000UL * (unsigned long)(func + 8 * slot);
380 /* map it into the kernel */
381 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
383 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
386 /* get a pointer to the config space mapped into the kernel */
387 cfgptr = va + (off & PAGE_MASK);
389 /* make sure that we can really access it */
390 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
391 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
392 if (!(vendor_id == ivend && device_id == idev)) {
393 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
394 vendor_id, device_id);
395 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
399 ptr32 = (uint32_t*)(cfgptr + 0x178);
402 if (val == 0xffffffff) {
403 device_printf(sc->dev, "extended mapping failed\n");
404 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
410 device_printf(sc->dev, "Enabled ECRC on upstream "
411 "Nvidia bridge at %d:%d:%d\n",
412 (int)bus, (int)slot, (int)func);
416 #else /* __i386__ || __x86_64__ */
419 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
421 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
427 mxge_dma_test(mxge_softc_t *sc, int test_type)
430 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr;
433 const char *test = " ";
436 * Run a small DMA test.
437 * The magic multipliers to the length tell the firmware
438 * to do DMA read, write, or read+write tests. The
439 * results are returned in cmd.data0. The upper 16
440 * bits of the return is the number of transfers completed.
441 * The lower 16 bits is the time in 0.5us ticks that the
442 * transfers took to complete.
445 len = sc->tx_boundary;
447 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
448 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
449 cmd.data2 = len * 0x10000;
450 status = mxge_send_cmd(sc, test_type, &cmd);
455 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
457 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
458 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
459 cmd.data2 = len * 0x1;
460 status = mxge_send_cmd(sc, test_type, &cmd);
465 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
467 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
468 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
469 cmd.data2 = len * 0x10001;
470 status = mxge_send_cmd(sc, test_type, &cmd);
475 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
476 (cmd.data0 & 0xffff);
479 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) {
480 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
487 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
488 * when the PCI-E Completion packets are aligned on an 8-byte
489 * boundary. Some PCI-E chip sets always align Completion packets; on
490 * the ones that do not, the alignment can be enforced by enabling
491 * ECRC generation (if supported).
493 * When PCI-E Completion packets are not aligned, it is actually more
494 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
496 * If the driver can neither enable ECRC nor verify that it has
497 * already been enabled, then it must use a firmware image which works
498 * around unaligned completion packets (ethp_z8e.dat), and it should
499 * also ensure that it never gives the device a Read-DMA which is
500 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
501 * enabled, then the driver should use the aligned (eth_z8e.dat)
502 * firmware image, and set tx_boundary to 4KB.
505 mxge_firmware_probe(mxge_softc_t *sc)
507 device_t dev = sc->dev;
511 sc->tx_boundary = 4096;
514 * Verify the max read request size was set to 4KB
515 * before trying the test with 4KB.
517 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
518 pectl = pci_read_config(dev, reg + 0x8, 2);
519 if ((pectl & (5 << 12)) != (5 << 12)) {
520 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n",
522 sc->tx_boundary = 2048;
527 * Load the optimized firmware (which assumes aligned PCIe
528 * completions) in order to see if it works on this host.
530 sc->fw_name = mxge_fw_aligned;
531 status = mxge_load_firmware(sc, 1);
536 * Enable ECRC if possible
538 mxge_enable_nvidia_ecrc(sc);
541 * Run a DMA test which watches for unaligned completions and
542 * aborts on the first one seen. Not required on Z8ES or newer.
544 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
547 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
549 return 0; /* keep the aligned firmware */
552 device_printf(dev, "DMA test failed: %d\n", status);
553 if (status == ENOSYS) {
554 device_printf(dev, "Falling back to ethp! "
555 "Please install up to date fw\n");
561 mxge_select_firmware(mxge_softc_t *sc)
564 int force_firmware = mxge_force_firmware;
567 force_firmware = sc->throttle;
569 if (force_firmware != 0) {
570 if (force_firmware == 1)
575 device_printf(sc->dev,
576 "Assuming %s completions (forced)\n",
577 aligned ? "aligned" : "unaligned");
583 * If the PCIe link width is 4 or less, we can use the aligned
584 * firmware and skip any checks
586 if (sc->link_width != 0 && sc->link_width <= 4) {
587 device_printf(sc->dev, "PCIe x%d Link, "
588 "expect reduced performance\n", sc->link_width);
593 if (mxge_firmware_probe(sc) == 0)
598 sc->fw_name = mxge_fw_aligned;
599 sc->tx_boundary = 4096;
601 sc->fw_name = mxge_fw_unaligned;
602 sc->tx_boundary = 2048;
604 return mxge_load_firmware(sc, 0);
608 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
610 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
611 if_printf(sc->ifp, "Bad firmware type: 0x%x\n",
612 be32toh(hdr->mcp_type));
616 /* Save firmware version for sysctl */
617 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
619 if_printf(sc->ifp, "firmware id: %s\n", hdr->version);
621 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
622 &sc->fw_ver_minor, &sc->fw_ver_tiny);
624 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR &&
625 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
626 if_printf(sc->ifp, "Found firmware version %s\n",
628 if_printf(sc->ifp, "Driver needs %d.%d\n",
629 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
636 z_alloc(void *nil, u_int items, u_int size)
638 return kmalloc(items * size, M_TEMP, M_WAITOK);
642 z_free(void *nil, void *ptr)
648 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
651 char *inflate_buffer;
652 const struct firmware *fw;
653 const mcp_gen_header_t *hdr;
660 fw = firmware_get(sc->fw_name);
662 if_printf(sc->ifp, "Could not find firmware image %s\n",
667 /* Setup zlib and decompress f/w */
668 bzero(&zs, sizeof(zs));
671 status = inflateInit(&zs);
672 if (status != Z_OK) {
678 * The uncompressed size is stored as the firmware version,
679 * which would otherwise go unused
681 fw_len = (size_t)fw->version;
682 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK);
683 zs.avail_in = fw->datasize;
684 zs.next_in = __DECONST(char *, fw->data);
685 zs.avail_out = fw_len;
686 zs.next_out = inflate_buffer;
687 status = inflate(&zs, Z_FINISH);
688 if (status != Z_STREAM_END) {
689 if_printf(sc->ifp, "zlib %d\n", status);
691 goto abort_with_buffer;
696 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET));
697 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
698 if_printf(sc->ifp, "Bad firmware file");
700 goto abort_with_buffer;
702 hdr = (const void*)(inflate_buffer + hdr_offset);
704 status = mxge_validate_firmware(sc, hdr);
706 goto abort_with_buffer;
708 /* Copy the inflated firmware to NIC SRAM. */
709 for (i = 0; i < fw_len; i += 256) {
710 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i,
711 min(256U, (unsigned)(fw_len - i)));
720 kfree(inflate_buffer, M_TEMP);
723 firmware_put(fw, FIRMWARE_UNLOAD);
728 * Enable or disable periodic RDMAs from the host to make certain
729 * chipsets resend dropped PCIe messages
732 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
735 volatile uint32_t *confirm;
736 volatile char *submit;
737 uint32_t *buf, dma_low, dma_high;
740 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
742 /* Clear confirmation addr */
743 confirm = (volatile uint32_t *)sc->cmd;
748 * Send an rdma command to the PCIe engine, and wait for the
749 * response in the confirmation address. The firmware should
750 * write a -1 there to indicate it is alive and well
752 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
753 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
754 buf[0] = htobe32(dma_high); /* confirm addr MSW */
755 buf[1] = htobe32(dma_low); /* confirm addr LSW */
756 buf[2] = htobe32(0xffffffff); /* confirm data */
757 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
758 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
759 buf[3] = htobe32(dma_high); /* dummy addr MSW */
760 buf[4] = htobe32(dma_low); /* dummy addr LSW */
761 buf[5] = htobe32(enable); /* enable? */
763 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
765 mxge_pio_copy(submit, buf, 64);
770 while (*confirm != 0xffffffff && i < 20) {
774 if (*confirm != 0xffffffff) {
775 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)",
776 (enable ? "enable" : "disable"), confirm, *confirm);
781 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
784 char buf_bytes[sizeof(*buf) + 8];
785 volatile mcp_cmd_response_t *response = sc->cmd;
786 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
787 uint32_t dma_low, dma_high;
788 int err, sleep_total = 0;
790 /* Ensure buf is aligned to 8 bytes */
791 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
793 buf->data0 = htobe32(data->data0);
794 buf->data1 = htobe32(data->data1);
795 buf->data2 = htobe32(data->data2);
796 buf->cmd = htobe32(cmd);
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
800 buf->response_addr.low = htobe32(dma_low);
801 buf->response_addr.high = htobe32(dma_high);
803 response->result = 0xffffffff;
805 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
811 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
813 switch (be32toh(response->result)) {
815 data->data0 = be32toh(response->data);
821 case MXGEFW_CMD_UNKNOWN:
824 case MXGEFW_CMD_ERROR_UNALIGNED:
827 case MXGEFW_CMD_ERROR_BUSY:
830 case MXGEFW_CMD_ERROR_I2C_ABSENT:
834 if_printf(sc->ifp, "command %d failed, result = %d\n",
835 cmd, be32toh(response->result));
843 if_printf(sc->ifp, "command %d timed out result = %d\n",
844 cmd, be32toh(response->result));
850 mxge_adopt_running_firmware(mxge_softc_t *sc)
852 struct mcp_gen_header *hdr;
853 const size_t bytes = sizeof(struct mcp_gen_header);
858 * Find running firmware header
861 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET));
863 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
864 if_printf(sc->ifp, "Running firmware has bad header offset "
865 "(%zu)\n", hdr_offset);
870 * Copy header of running firmware from SRAM to host memory to
873 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK);
874 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
875 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes);
876 status = mxge_validate_firmware(sc, hdr);
877 kfree(hdr, M_DEVBUF);
880 * Check to see if adopted firmware has bug where adopting
881 * it will cause broadcasts to be filtered unless the NIC
882 * is kept in ALLMULTI mode
884 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
885 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
886 sc->adopted_rx_filter_bug = 1;
887 if_printf(sc->ifp, "Adopting fw %d.%d.%d: "
888 "working around rx filter bug\n",
889 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny);
896 mxge_load_firmware(mxge_softc_t *sc, int adopt)
898 volatile uint32_t *confirm;
899 volatile char *submit;
901 uint32_t *buf, size, dma_low, dma_high;
904 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
906 size = sc->sram_size;
907 status = mxge_load_firmware_helper(sc, &size);
913 * Try to use the currently running firmware, if
916 status = mxge_adopt_running_firmware(sc);
919 "failed to adopt running firmware\n");
922 if_printf(sc->ifp, "Successfully adopted running firmware\n");
924 if (sc->tx_boundary == 4096) {
926 "Using firmware currently running on NIC. "
928 if_printf(sc->ifp, "performance consider loading "
929 "optimized firmware\n");
931 sc->fw_name = mxge_fw_unaligned;
932 sc->tx_boundary = 2048;
936 /* Clear confirmation addr */
937 confirm = (volatile uint32_t *)sc->cmd;
942 * Send a reload command to the bootstrap MCP, and wait for the
943 * response in the confirmation address. The firmware should
944 * write a -1 there to indicate it is alive and well
947 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
948 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
950 buf[0] = htobe32(dma_high); /* confirm addr MSW */
951 buf[1] = htobe32(dma_low); /* confirm addr LSW */
952 buf[2] = htobe32(0xffffffff); /* confirm data */
955 * FIX: All newest firmware should un-protect the bottom of
956 * the sram before handoff. However, the very first interfaces
957 * do not. Therefore the handoff copy must skip the first 8 bytes
959 /* where the code starts*/
960 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
961 buf[4] = htobe32(size - 8); /* length of code */
962 buf[5] = htobe32(8); /* where to copy to */
963 buf[6] = htobe32(0); /* where to jump to */
965 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
966 mxge_pio_copy(submit, buf, 64);
971 while (*confirm != 0xffffffff && i < 20) {
975 if (*confirm != 0xffffffff) {
976 if_printf(sc->ifp,"handoff failed (%p = 0x%x)",
984 mxge_update_mac_address(mxge_softc_t *sc)
987 uint8_t *addr = sc->mac_addr;
989 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) |
990 (addr[2] << 8) | addr[3];
991 cmd.data1 = (addr[4] << 8) | (addr[5]);
992 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
996 mxge_change_pause(mxge_softc_t *sc, int pause)
1002 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd);
1004 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd);
1006 if_printf(sc->ifp, "Failed to set flow control mode\n");
1014 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1019 if (mxge_always_promisc)
1023 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd);
1025 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd);
1027 if_printf(sc->ifp, "Failed to set promisc mode\n");
1031 mxge_set_multicast_list(mxge_softc_t *sc)
1034 struct ifmultiaddr *ifma;
1035 struct ifnet *ifp = sc->ifp;
1038 /* This firmware is known to not support multicast */
1039 if (!sc->fw_multicast_support)
1042 /* Disable multicast filtering while we play with the lists*/
1043 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1045 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, "
1046 "error status: %d\n", err);
1050 if (sc->adopted_rx_filter_bug)
1053 if (ifp->if_flags & IFF_ALLMULTI) {
1054 /* Request to disable multicast filtering, so quit here */
1058 /* Flush all the filters */
1059 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1061 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1062 "error status: %d\n", err);
1067 * Walk the multicast list, and add each address
1069 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1070 if (ifma->ifma_addr->sa_family != AF_LINK)
1073 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1075 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1077 cmd.data0 = htonl(cmd.data0);
1078 cmd.data1 = htonl(cmd.data1);
1079 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1081 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1082 "error status: %d\n", err);
1083 /* Abort, leaving multicast filtering off */
1088 /* Enable multicast filtering */
1089 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1091 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, "
1092 "error status: %d\n", err);
1098 mxge_max_mtu(mxge_softc_t *sc)
1103 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1104 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1106 /* try to set nbufs to see if it we can
1107 use virtually contiguous jumbos */
1109 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1112 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1114 /* otherwise, we're limited to MJUMPAGESIZE */
1115 return MJUMPAGESIZE - MXGEFW_PAD;
1120 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1122 struct mxge_slice_state *ss;
1123 mxge_rx_done_t *rx_done;
1124 volatile uint32_t *irq_claim;
1126 int slice, status, rx_intr_size;
1129 * Try to send a reset command to the card to see if it
1132 memset(&cmd, 0, sizeof (cmd));
1133 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1135 if_printf(sc->ifp, "failed reset\n");
1139 mxge_dummy_rdma(sc, 1);
1142 * Set the intrq size
1143 * XXX assume 4byte mcp_slot
1145 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t);
1146 cmd.data0 = rx_intr_size;
1147 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1150 * Even though we already know how many slices are supported
1151 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1152 * has magic side effects, and must be called after a reset.
1153 * It must be called prior to calling any RSS related cmds,
1154 * including assigning an interrupt queue for anything but
1155 * slice 0. It must also be called *after*
1156 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1157 * the firmware to compute offsets.
1159 if (sc->num_slices > 1) {
1160 /* Ask the maximum number of slices it supports */
1161 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
1163 if_printf(sc->ifp, "failed to get number of slices\n");
1168 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1169 * to setting up the interrupt queue DMA
1171 cmd.data0 = sc->num_slices;
1172 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1173 if (sc->num_tx_rings > 1)
1174 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1175 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd);
1177 if_printf(sc->ifp, "failed to set number of slices\n");
1182 if (interrupts_setup) {
1183 /* Now exchange information about interrupts */
1184 for (slice = 0; slice < sc->num_slices; slice++) {
1185 ss = &sc->ss[slice];
1187 rx_done = &ss->rx_data.rx_done;
1188 memset(rx_done->entry, 0, rx_intr_size);
1191 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1193 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1195 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA,
1200 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET,
1202 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1204 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1205 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1208 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1211 if_printf(sc->ifp, "failed set interrupt parameters\n");
1215 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1217 /* Run a DMA benchmark */
1218 mxge_dma_test(sc, MXGEFW_DMA_TEST);
1220 for (slice = 0; slice < sc->num_slices; slice++) {
1221 ss = &sc->ss[slice];
1223 ss->irq_claim = irq_claim + (2 * slice);
1225 /* Reset mcp/driver shared state back to 0 */
1226 ss->rx_data.rx_done.idx = 0;
1229 ss->tx.pkt_done = 0;
1230 ss->tx.queue_active = 0;
1231 ss->tx.activate = 0;
1232 ss->tx.deactivate = 0;
1233 ss->rx_data.rx_big.cnt = 0;
1234 ss->rx_data.rx_small.cnt = 0;
1235 if (ss->fw_stats != NULL)
1236 bzero(ss->fw_stats, sizeof(*ss->fw_stats));
1238 sc->rdma_tags_available = 15;
1240 status = mxge_update_mac_address(sc);
1241 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1242 mxge_change_pause(sc, sc->pause);
1243 mxge_set_multicast_list(sc);
1246 cmd.data0 = sc->throttle;
1247 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd))
1248 if_printf(sc->ifp, "can't enable throttle\n");
1254 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1259 unsigned int throttle;
1262 throttle = sc->throttle;
1263 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1267 if (throttle == sc->throttle)
1270 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1273 ifnet_serialize_all(sc->ifp);
1275 cmd.data0 = throttle;
1276 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1278 sc->throttle = throttle;
1280 ifnet_deserialize_all(sc->ifp);
1285 mxge_change_use_rss(SYSCTL_HANDLER_ARGS)
1291 use_rss = sc->use_rss;
1292 err = sysctl_handle_int(oidp, &use_rss, arg2, req);
1296 if (use_rss == sc->use_rss)
1299 ifnet_serialize_all(sc->ifp);
1301 sc->use_rss = use_rss;
1302 if (sc->ifp->if_flags & IFF_RUNNING) {
1307 ifnet_deserialize_all(sc->ifp);
1312 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1315 unsigned int intr_coal_delay;
1319 intr_coal_delay = sc->intr_coal_delay;
1320 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1324 if (intr_coal_delay == sc->intr_coal_delay)
1327 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1330 ifnet_serialize_all(sc->ifp);
1332 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1333 sc->intr_coal_delay = intr_coal_delay;
1335 ifnet_deserialize_all(sc->ifp);
1340 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1343 unsigned int enabled;
1347 enabled = sc->pause;
1348 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1352 if (enabled == sc->pause)
1355 ifnet_serialize_all(sc->ifp);
1356 err = mxge_change_pause(sc, enabled);
1357 ifnet_deserialize_all(sc->ifp);
1363 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1369 arg2 = be32toh(*(int *)arg1);
1371 err = sysctl_handle_int(oidp, arg1, arg2, req);
1377 mxge_rem_sysctls(mxge_softc_t *sc)
1379 if (sc->ss != NULL) {
1380 struct mxge_slice_state *ss;
1383 for (slice = 0; slice < sc->num_slices; slice++) {
1384 ss = &sc->ss[slice];
1385 if (ss->sysctl_tree != NULL) {
1386 sysctl_ctx_free(&ss->sysctl_ctx);
1387 ss->sysctl_tree = NULL;
1392 if (sc->slice_sysctl_tree != NULL) {
1393 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1394 sc->slice_sysctl_tree = NULL;
1399 mxge_add_sysctls(mxge_softc_t *sc)
1401 struct sysctl_ctx_list *ctx;
1402 struct sysctl_oid_list *children;
1404 struct mxge_slice_state *ss;
1408 ctx = device_get_sysctl_ctx(sc->dev);
1409 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1410 fw = sc->ss[0].fw_stats;
1413 * Random information
1415 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
1416 CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
1418 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number",
1419 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number");
1421 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code",
1422 CTLFLAG_RD, &sc->product_code_string, 0, "product code");
1424 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width",
1425 CTLFLAG_RD, &sc->link_width, 0, "link width");
1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary",
1428 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary");
1430 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine",
1431 CTLFLAG_RD, &sc->wc, 0, "write combining PIO");
1433 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs",
1434 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s");
1436 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs",
1437 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s");
1439 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs",
1440 CTLFLAG_RD, &sc->read_write_dma, 0,
1441 "DMA concurrent Read/Write speed in MB/s");
1443 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets",
1444 CTLFLAG_RD, &sc->watchdog_resets, 0,
1445 "Number of times NIC was reset");
1448 * Performance related tunables
1450 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay",
1451 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I",
1452 "Interrupt coalescing delay in usecs");
1454 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle",
1455 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I",
1456 "Transmit throttling");
1458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled",
1459 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I",
1460 "Interrupt coalescing delay in usecs");
1462 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss",
1463 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I",
1466 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait",
1467 CTLFLAG_RW, &mxge_deassert_wait, 0,
1468 "Wait for IRQ line to go low in ihandler");
1471 * Stats block from firmware is in network byte order.
1474 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up",
1475 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0,
1476 mxge_handle_be32, "I", "link up");
1478 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available",
1479 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0,
1480 mxge_handle_be32, "I", "rdma_tags_available");
1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32",
1483 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0,
1484 mxge_handle_be32, "I", "dropped_bad_crc32");
1486 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy",
1487 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0,
1488 mxge_handle_be32, "I", "dropped_bad_phy");
1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered",
1491 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0,
1492 mxge_handle_be32, "I", "dropped_link_error_or_filtered");
1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow",
1495 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0,
1496 mxge_handle_be32, "I", "dropped_link_overflow");
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered",
1499 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0,
1500 mxge_handle_be32, "I", "dropped_multicast_filtered");
1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer",
1503 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0,
1504 mxge_handle_be32, "I", "dropped_no_big_buffer");
1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer",
1507 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0,
1508 mxge_handle_be32, "I", "dropped_no_small_buffer");
1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun",
1511 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0,
1512 mxge_handle_be32, "I", "dropped_overrun");
1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause",
1515 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0,
1516 mxge_handle_be32, "I", "dropped_pause");
1518 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt",
1519 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0,
1520 mxge_handle_be32, "I", "dropped_runt");
1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered",
1523 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0,
1524 mxge_handle_be32, "I", "dropped_unicast_filtered");
1526 /* add counters exported for debugging from all slices */
1527 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1528 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx,
1529 children, OID_AUTO, "slice", CTLFLAG_RD, 0, "");
1530 if (sc->slice_sysctl_tree == NULL) {
1531 device_printf(sc->dev, "can't add slice sysctl node\n");
1535 for (slice = 0; slice < sc->num_slices; slice++) {
1536 ss = &sc->ss[slice];
1537 sysctl_ctx_init(&ss->sysctl_ctx);
1538 ctx = &ss->sysctl_ctx;
1539 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1540 ksprintf(slice_num, "%d", slice);
1541 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
1542 slice_num, CTLFLAG_RD, 0, "");
1543 if (ss->sysctl_tree == NULL) {
1544 device_printf(sc->dev,
1545 "can't add %d slice sysctl node\n", slice);
1546 return; /* XXX continue? */
1548 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1551 * XXX change to ULONG
1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt",
1555 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt");
1557 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt",
1558 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt");
1560 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req",
1561 CTLFLAG_RD, &ss->tx.req, 0, "tx_req");
1563 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done",
1564 CTLFLAG_RD, &ss->tx.done, 0, "tx_done");
1566 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done",
1567 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done");
1569 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active",
1570 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active");
1572 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate",
1573 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate");
1575 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate",
1576 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate");
1581 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1582 * backwards one at a time and handle ring wraps
1584 static __inline void
1585 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1586 mcp_kreq_ether_send_t *src, int cnt)
1588 int idx, starting_slot;
1590 starting_slot = tx->req;
1593 idx = (starting_slot + cnt) & tx->mask;
1594 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
1600 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1601 * at most 32 bytes at a time, so as to avoid involving the software
1602 * pio handler in the nic. We re-write the first segment's flags
1603 * to mark them valid only after writing the entire chain
1605 static __inline void
1606 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1610 volatile uint32_t *dst_ints;
1611 mcp_kreq_ether_send_t *srcp;
1612 volatile mcp_kreq_ether_send_t *dstp, *dst;
1615 idx = tx->req & tx->mask;
1617 last_flags = src->flags;
1620 dst = dstp = &tx->lanai[idx];
1623 if ((idx + cnt) < tx->mask) {
1624 for (i = 0; i < cnt - 1; i += 2) {
1625 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1626 wmb(); /* force write every 32 bytes */
1632 * Submit all but the first request, and ensure
1633 * that it is submitted below
1635 mxge_submit_req_backwards(tx, src, cnt);
1639 /* Submit the first request */
1640 mxge_pio_copy(dstp, srcp, sizeof(*src));
1641 wmb(); /* barrier before setting valid flag */
1644 /* Re-write the last 32-bits with the valid flags */
1645 src->flags = last_flags;
1646 src_ints = (uint32_t *)src;
1648 dst_ints = (volatile uint32_t *)dst;
1650 *dst_ints = *src_ints;
1656 mxge_pullup_tso(struct mbuf **mp)
1658 int hoff, iphlen, thoff;
1662 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1664 iphlen = m->m_pkthdr.csum_iphlen;
1665 thoff = m->m_pkthdr.csum_thlen;
1666 hoff = m->m_pkthdr.csum_lhlen;
1668 KASSERT(iphlen > 0, ("invalid ip hlen"));
1669 KASSERT(thoff > 0, ("invalid tcp hlen"));
1670 KASSERT(hoff > 0, ("invalid ether hlen"));
1672 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1673 m = m_pullup(m, hoff + iphlen + thoff);
1684 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map,
1685 struct mbuf *m, int busdma_seg_cnt)
1687 mcp_kreq_ether_send_t *req;
1688 bus_dma_segment_t *seg;
1689 uint32_t low, high_swapped;
1690 int len, seglen, cum_len, cum_len_next;
1691 int next_is_first, chop, cnt, rdma_count, small;
1692 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1693 uint8_t flags, flags_next;
1694 struct mxge_buffer_state *info_last;
1695 bus_dmamap_t map = info_map->map;
1697 mss = m->m_pkthdr.tso_segsz;
1700 * Negative cum_len signifies to the send loop that we are
1701 * still in the header portion of the TSO packet.
1703 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen +
1704 m->m_pkthdr.csum_thlen);
1707 * TSO implies checksum offload on this hardware
1709 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1710 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1713 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1714 * out where to put the checksum by parsing the header.
1716 pseudo_hdr_offset = htobe16(mss);
1724 * "rdma_count" is the number of RDMAs belonging to the current
1725 * packet BEFORE the current send request. For non-TSO packets,
1726 * this is equal to "count".
1728 * For TSO packets, rdma_count needs to be reset to 0 after a
1731 * The rdma_count field of the send request is the number of
1732 * RDMAs of the packet starting at that request. For TSO send
1733 * requests with one ore more cuts in the middle, this is the
1734 * number of RDMAs starting after the last cut in the request.
1735 * All previous segments before the last cut implicitly have 1
1738 * Since the number of RDMAs is not known beforehand, it must be
1739 * filled-in retroactively - after each segmentation cut or at
1740 * the end of the entire packet.
1743 while (busdma_seg_cnt) {
1745 * Break the busdma segment up into pieces
1747 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1748 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1752 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1754 cum_len_next = cum_len + seglen;
1755 (req - rdma_count)->rdma_count = rdma_count + 1;
1756 if (__predict_true(cum_len >= 0)) {
1758 chop = (cum_len_next > mss);
1759 cum_len_next = cum_len_next % mss;
1760 next_is_first = (cum_len_next == 0);
1761 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1763 next_is_first * MXGEFW_FLAGS_FIRST;
1764 rdma_count |= -(chop | next_is_first);
1765 rdma_count += chop & !next_is_first;
1766 } else if (cum_len_next >= 0) {
1771 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1772 flags_next = MXGEFW_FLAGS_TSO_PLD |
1773 MXGEFW_FLAGS_FIRST |
1774 (small * MXGEFW_FLAGS_SMALL);
1777 req->addr_high = high_swapped;
1778 req->addr_low = htobe32(low);
1779 req->pseudo_hdr_offset = pseudo_hdr_offset;
1781 req->rdma_count = 1;
1782 req->length = htobe16(seglen);
1783 req->cksum_offset = cksum_offset;
1785 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1788 cum_len = cum_len_next;
1793 if (__predict_false(cksum_offset > seglen))
1794 cksum_offset -= seglen;
1797 if (__predict_false(cnt > tx->max_desc))
1803 (req - rdma_count)->rdma_count = rdma_count;
1807 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1808 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1810 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1812 info_map->map = info_last->map;
1813 info_last->map = map;
1816 mxge_submit_req(tx, tx->req_list, cnt);
1818 if (tx->send_go != NULL && tx->queue_active == 0) {
1819 /* Tell the NIC to start polling this slice */
1821 tx->queue_active = 1;
1828 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1834 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad)
1836 mcp_kreq_ether_send_t *req;
1837 bus_dma_segment_t *seg;
1839 int cnt, cum_len, err, i, idx, odd_flag;
1840 uint16_t pseudo_hdr_offset;
1841 uint8_t flags, cksum_offset;
1842 struct mxge_buffer_state *info_map, *info_last;
1844 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1845 err = mxge_pullup_tso(&m);
1846 if (__predict_false(err))
1851 * Map the frame for DMA
1853 idx = tx->req & tx->mask;
1854 info_map = &tx->info[idx];
1855 map = info_map->map;
1857 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m,
1858 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT);
1859 if (__predict_false(err != 0))
1861 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE);
1864 * TSO is different enough, we handle it in another routine
1866 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1867 return mxge_encap_tso(tx, info_map, m, cnt);
1871 pseudo_hdr_offset = 0;
1872 flags = MXGEFW_FLAGS_NO_TSO;
1875 * Checksum offloading
1877 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1878 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1879 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1880 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1881 req->cksum_offset = cksum_offset;
1882 flags |= MXGEFW_FLAGS_CKSUM;
1883 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1887 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1888 flags |= MXGEFW_FLAGS_SMALL;
1891 * Convert segments into a request list
1895 req->flags = MXGEFW_FLAGS_FIRST;
1896 for (i = 0; i < cnt; i++) {
1897 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1898 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1899 req->length = htobe16(seg->ds_len);
1900 req->cksum_offset = cksum_offset;
1901 if (cksum_offset > seg->ds_len)
1902 cksum_offset -= seg->ds_len;
1905 req->pseudo_hdr_offset = pseudo_hdr_offset;
1906 req->pad = 0; /* complete solid 16-byte block */
1907 req->rdma_count = 1;
1908 req->flags |= flags | ((cum_len & 1) * odd_flag);
1909 cum_len += seg->ds_len;
1917 * Pad runt to 60 bytes
1921 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad));
1922 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad));
1923 req->length = htobe16(60 - cum_len);
1924 req->cksum_offset = 0;
1925 req->pseudo_hdr_offset = pseudo_hdr_offset;
1926 req->pad = 0; /* complete solid 16-byte block */
1927 req->rdma_count = 1;
1928 req->flags |= flags | ((cum_len & 1) * odd_flag);
1932 tx->req_list[0].rdma_count = cnt;
1934 /* print what the firmware will see */
1935 for (i = 0; i < cnt; i++) {
1936 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1937 "cso:%d, flags:0x%x, rdma:%d\n",
1938 i, (int)ntohl(tx->req_list[i].addr_high),
1939 (int)ntohl(tx->req_list[i].addr_low),
1940 (int)ntohs(tx->req_list[i].length),
1941 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1942 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1943 tx->req_list[i].rdma_count);
1945 kprintf("--------------\n");
1947 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1949 info_map->map = info_last->map;
1950 info_last->map = map;
1953 mxge_submit_req(tx, tx->req_list, cnt);
1955 if (tx->send_go != NULL && tx->queue_active == 0) {
1956 /* Tell the NIC to start polling this slice */
1958 tx->queue_active = 1;
1970 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1972 mxge_softc_t *sc = ifp->if_softc;
1973 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1977 KKASSERT(tx->ifsq == ifsq);
1978 ASSERT_SERIALIZED(&tx->tx_serialize);
1980 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
1983 zeropad = sc->zeropad_dma.dmem_busaddr;
1984 while (tx->mask - (tx->req - tx->done) > tx->max_desc) {
1988 m = ifsq_dequeue(ifsq);
1993 error = mxge_encap(tx, m, zeropad);
1997 IFNET_STAT_INC(ifp, oerrors, 1);
2000 /* Ran out of transmit slots */
2001 ifsq_set_oactive(ifsq);
2004 tx->watchdog.wd_timer = 5;
2008 mxge_watchdog(struct ifaltq_subque *ifsq)
2010 struct ifnet *ifp = ifsq_get_ifp(ifsq);
2011 struct mxge_softc *sc = ifp->if_softc;
2012 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
2013 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
2015 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2017 /* Check for pause blocking before resetting */
2018 if (tx->watchdog_rx_pause == rx_pause) {
2019 mxge_warn_stuck(sc, tx, 0);
2020 mxge_watchdog_reset(sc);
2023 if_printf(ifp, "Flow control blocking xmits, "
2024 "check link partner\n");
2026 tx->watchdog_rx_pause = rx_pause;
2030 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2031 * at most 32 bytes at a time, so as to avoid involving the software
2032 * pio handler in the nic. We re-write the first segment's low
2033 * DMA address to mark it valid only after we write the entire chunk
2036 static __inline void
2037 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2038 mcp_kreq_ether_recv_t *src)
2042 low = src->addr_low;
2043 src->addr_low = 0xffffffff;
2044 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2046 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2048 src->addr_low = low;
2049 dst->addr_low = low;
2054 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2057 bus_dma_segment_t seg;
2059 int cnt, err, mflag;
2061 mflag = MB_DONTWAIT;
2062 if (__predict_false(init))
2065 m = m_gethdr(mflag, MT_DATA);
2068 if (__predict_false(init)) {
2070 * During initialization, there
2071 * is nothing to setup; bail out
2077 m->m_len = m->m_pkthdr.len = MHLEN;
2079 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2080 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2083 if (__predict_false(init)) {
2085 * During initialization, there
2086 * is nothing to setup; bail out
2093 rx->info[idx].m = m;
2094 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2095 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2099 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2104 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2107 bus_dma_segment_t seg;
2109 int cnt, err, mflag;
2111 mflag = MB_DONTWAIT;
2112 if (__predict_false(init))
2115 if (rx->cl_size == MCLBYTES)
2116 m = m_getcl(mflag, MT_DATA, M_PKTHDR);
2118 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
2121 if (__predict_false(init)) {
2123 * During initialization, there
2124 * is nothing to setup; bail out
2130 m->m_len = m->m_pkthdr.len = rx->cl_size;
2132 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2133 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2136 if (__predict_false(init)) {
2138 * During initialization, there
2139 * is nothing to setup; bail out
2146 rx->info[idx].m = m;
2147 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2148 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2152 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2157 * Myri10GE hardware checksums are not valid if the sender
2158 * padded the frame with non-zero padding. This is because
2159 * the firmware just does a simple 16-bit 1s complement
2160 * checksum across the entire frame, excluding the first 14
2161 * bytes. It is best to simply to check the checksum and
2162 * tell the stack about it only if the checksum is good
2164 static __inline uint16_t
2165 mxge_rx_csum(struct mbuf *m, int csum)
2167 const struct ether_header *eh;
2168 const struct ip *ip;
2171 eh = mtod(m, const struct ether_header *);
2173 /* Only deal with IPv4 TCP & UDP for now */
2174 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2177 ip = (const struct ip *)(eh + 1);
2178 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP))
2182 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2183 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2184 - (ip->ip_hl << 2) + ip->ip_p));
2193 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2195 struct ether_vlan_header *evl;
2198 evl = mtod(m, struct ether_vlan_header *);
2201 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2202 * what the firmware thought was the end of the ethernet
2206 /* Put checksum into host byte order */
2207 *csum = ntohs(*csum);
2209 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2211 *csum += ((*csum) < ~partial);
2212 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2213 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2216 * Restore checksum to network byte order;
2217 * later consumers expect this
2219 *csum = htons(*csum);
2222 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2223 m->m_flags |= M_VLANTAG;
2226 * Remove the 802.1q header by copying the Ethernet
2227 * addresses over it and adjusting the beginning of
2228 * the data in the mbuf. The encapsulated Ethernet
2229 * type field is already in place.
2231 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2232 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2233 m_adj(m, EVL_ENCAPLEN);
2237 static __inline void
2238 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx,
2239 uint32_t len, uint32_t csum)
2242 const struct ether_header *eh;
2243 bus_dmamap_t old_map;
2246 idx = rx->cnt & rx->mask;
2249 /* Save a pointer to the received mbuf */
2250 m = rx->info[idx].m;
2252 /* Try to replace the received mbuf */
2253 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) {
2254 /* Drop the frame -- the old mbuf is re-cycled */
2255 IFNET_STAT_INC(ifp, ierrors, 1);
2259 /* Unmap the received buffer */
2260 old_map = rx->info[idx].map;
2261 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2262 bus_dmamap_unload(rx->dmat, old_map);
2264 /* Swap the bus_dmamap_t's */
2265 rx->info[idx].map = rx->extra_map;
2266 rx->extra_map = old_map;
2269 * mcp implicitly skips 1st 2 bytes so that packet is properly
2272 m->m_data += MXGEFW_PAD;
2274 m->m_pkthdr.rcvif = ifp;
2275 m->m_len = m->m_pkthdr.len = len;
2277 IFNET_STAT_INC(ifp, ipackets, 1);
2279 eh = mtod(m, const struct ether_header *);
2280 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2281 mxge_vlan_tag_remove(m, &csum);
2283 /* If the checksum is valid, mark it in the mbuf header */
2284 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2285 mxge_rx_csum(m, csum) == 0) {
2286 /* Tell the stack that the checksum is good */
2287 m->m_pkthdr.csum_data = 0xffff;
2288 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2291 ifp->if_input(ifp, m, NULL, -1);
2294 static __inline void
2295 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx,
2296 uint32_t len, uint32_t csum)
2298 const struct ether_header *eh;
2300 bus_dmamap_t old_map;
2303 idx = rx->cnt & rx->mask;
2306 /* Save a pointer to the received mbuf */
2307 m = rx->info[idx].m;
2309 /* Try to replace the received mbuf */
2310 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) {
2311 /* Drop the frame -- the old mbuf is re-cycled */
2312 IFNET_STAT_INC(ifp, ierrors, 1);
2316 /* Unmap the received buffer */
2317 old_map = rx->info[idx].map;
2318 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2319 bus_dmamap_unload(rx->dmat, old_map);
2321 /* Swap the bus_dmamap_t's */
2322 rx->info[idx].map = rx->extra_map;
2323 rx->extra_map = old_map;
2326 * mcp implicitly skips 1st 2 bytes so that packet is properly
2329 m->m_data += MXGEFW_PAD;
2331 m->m_pkthdr.rcvif = ifp;
2332 m->m_len = m->m_pkthdr.len = len;
2334 IFNET_STAT_INC(ifp, ipackets, 1);
2336 eh = mtod(m, const struct ether_header *);
2337 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2338 mxge_vlan_tag_remove(m, &csum);
2340 /* If the checksum is valid, mark it in the mbuf header */
2341 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2342 mxge_rx_csum(m, csum) == 0) {
2343 /* Tell the stack that the checksum is good */
2344 m->m_pkthdr.csum_data = 0xffff;
2345 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2348 ifp->if_input(ifp, m, NULL, -1);
2351 static __inline void
2352 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle)
2354 mxge_rx_done_t *rx_done = &rx_data->rx_done;
2356 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) {
2357 uint16_t length, checksum;
2359 length = ntohs(rx_done->entry[rx_done->idx].length);
2360 rx_done->entry[rx_done->idx].length = 0;
2362 checksum = rx_done->entry[rx_done->idx].checksum;
2364 if (length <= MXGE_RX_SMALL_BUFLEN) {
2365 mxge_rx_done_small(ifp, &rx_data->rx_small,
2368 mxge_rx_done_big(ifp, &rx_data->rx_big,
2373 rx_done->idx &= rx_done->mask;
2378 static __inline void
2379 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx)
2381 ASSERT_SERIALIZED(&tx->tx_serialize);
2383 while (tx->pkt_done != mcp_idx) {
2387 idx = tx->done & tx->mask;
2390 m = tx->info[idx].m;
2392 * mbuf and DMA map only attached to the first
2397 IFNET_STAT_INC(ifp, opackets, 1);
2398 tx->info[idx].m = NULL;
2399 bus_dmamap_unload(tx->dmat, tx->info[idx].map);
2405 * If we have space, clear OACTIVE to tell the stack that
2406 * its OK to send packets
2408 if (tx->req - tx->done < (tx->mask + 1) / 2) {
2409 ifsq_clr_oactive(tx->ifsq);
2410 if (tx->req == tx->done) {
2411 /* Reset watchdog */
2412 tx->watchdog.wd_timer = 0;
2416 if (!ifsq_is_empty(tx->ifsq))
2417 ifsq_devstart(tx->ifsq);
2419 if (tx->send_stop != NULL && tx->req == tx->done) {
2421 * Let the NIC stop polling this queue, since there
2422 * are no more transmits pending
2425 tx->queue_active = 0;
2431 static struct mxge_media_type mxge_xfp_media_types[] = {
2432 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2433 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2434 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2435 {0, (1 << 5), "10GBASE-ER"},
2436 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2437 {0, (1 << 3), "10GBASE-SW"},
2438 {0, (1 << 2), "10GBASE-LW"},
2439 {0, (1 << 1), "10GBASE-EW"},
2440 {0, (1 << 0), "Reserved"}
2443 static struct mxge_media_type mxge_sfp_media_types[] = {
2444 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2445 {0, (1 << 7), "Reserved"},
2446 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2447 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2448 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2449 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2453 mxge_media_set(mxge_softc_t *sc, int media_type)
2455 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL);
2456 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type);
2457 sc->current_media = media_type;
2458 sc->media.ifm_media = sc->media.ifm_cur->ifm_media;
2462 mxge_media_init(mxge_softc_t *sc)
2467 ifmedia_removeall(&sc->media);
2468 mxge_media_set(sc, IFM_AUTO);
2471 * Parse the product code to deterimine the interface type
2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2473 * after the 3rd dash in the driver's cached copy of the
2474 * EEPROM's product code string.
2476 ptr = sc->product_code_string;
2478 if_printf(sc->ifp, "Missing product code\n");
2482 for (i = 0; i < 3; i++, ptr++) {
2483 ptr = strchr(ptr, '-');
2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i);
2489 if (*ptr == 'C' || *(ptr +1) == 'C') {
2491 sc->connector = MXGE_CX4;
2492 mxge_media_set(sc, IFM_10G_CX4);
2493 } else if (*ptr == 'Q') {
2494 /* -Q is Quad Ribbon Fiber */
2495 sc->connector = MXGE_QRF;
2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n");
2497 /* DragonFly has no media type for Quad ribbon fiber */
2498 } else if (*ptr == 'R') {
2500 sc->connector = MXGE_XFP;
2501 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2502 /* -S or -2S is SFP+ */
2503 sc->connector = MXGE_SFP;
2505 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr);
2510 * Determine the media type for a NIC. Some XFPs will identify
2511 * themselves only when their link is up, so this is initiated via a
2512 * link up interrupt. However, this can potentially take up to
2513 * several milliseconds, so it is run via the watchdog routine, rather
2514 * than in the interrupt handler itself.
2517 mxge_media_probe(mxge_softc_t *sc)
2520 const char *cage_type;
2521 struct mxge_media_type *mxge_media_types = NULL;
2522 int i, err, ms, mxge_media_type_entries;
2525 sc->need_media_probe = 0;
2527 if (sc->connector == MXGE_XFP) {
2529 mxge_media_types = mxge_xfp_media_types;
2530 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
2531 byte = MXGE_XFP_COMPLIANCE_BYTE;
2533 } else if (sc->connector == MXGE_SFP) {
2534 /* -S or -2S is SFP+ */
2535 mxge_media_types = mxge_sfp_media_types;
2536 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
2540 /* nothing to do; media type cannot change */
2545 * At this point we know the NIC has an XFP cage, so now we
2546 * try to determine what is in the cage by using the
2547 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2548 * register. We read just one byte, which may take over
2552 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2554 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2555 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE)
2556 if_printf(sc->ifp, "failed to read XFP\n");
2557 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT)
2558 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n");
2559 if (err != MXGEFW_CMD_OK)
2562 /* Now we wait for the data to be cached */
2564 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2565 for (ms = 0; err == EBUSY && ms < 50; ms++) {
2568 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2570 if (err != MXGEFW_CMD_OK) {
2571 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n",
2572 cage_type, err, ms);
2576 if (cmd.data0 == mxge_media_types[0].bitmask) {
2578 if_printf(sc->ifp, "%s:%s\n", cage_type,
2579 mxge_media_types[0].name);
2581 if (sc->current_media != mxge_media_types[0].flag) {
2582 mxge_media_init(sc);
2583 mxge_media_set(sc, mxge_media_types[0].flag);
2587 for (i = 1; i < mxge_media_type_entries; i++) {
2588 if (cmd.data0 & mxge_media_types[i].bitmask) {
2590 if_printf(sc->ifp, "%s:%s\n", cage_type,
2591 mxge_media_types[i].name);
2594 if (sc->current_media != mxge_media_types[i].flag) {
2595 mxge_media_init(sc);
2596 mxge_media_set(sc, mxge_media_types[i].flag);
2602 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type,
2608 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats)
2610 if (sc->link_state != stats->link_up) {
2611 sc->link_state = stats->link_up;
2612 if (sc->link_state) {
2613 sc->ifp->if_link_state = LINK_STATE_UP;
2614 if_link_state_change(sc->ifp);
2616 if_printf(sc->ifp, "link up\n");
2618 sc->ifp->if_link_state = LINK_STATE_DOWN;
2619 if_link_state_change(sc->ifp);
2621 if_printf(sc->ifp, "link down\n");
2623 sc->need_media_probe = 1;
2626 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) {
2627 sc->rdma_tags_available = be32toh(stats->rdma_tags_available);
2628 if_printf(sc->ifp, "RDMA timed out! %d tags left\n",
2629 sc->rdma_tags_available);
2632 if (stats->link_down) {
2633 sc->down_cnt += stats->link_down;
2635 sc->ifp->if_link_state = LINK_STATE_DOWN;
2636 if_link_state_change(sc->ifp);
2641 mxge_serialize_skipmain(struct mxge_softc *sc)
2643 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
2647 mxge_deserialize_skipmain(struct mxge_softc *sc)
2649 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
2653 mxge_legacy(void *arg)
2655 struct mxge_slice_state *ss = arg;
2656 mxge_softc_t *sc = ss->sc;
2657 mcp_irq_data_t *stats = ss->fw_stats;
2658 mxge_tx_ring_t *tx = &ss->tx;
2659 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2660 uint32_t send_done_count;
2663 ASSERT_SERIALIZED(&sc->main_serialize);
2665 /* Make sure the DMA has finished */
2668 valid = stats->valid;
2670 /* Lower legacy IRQ */
2671 *sc->irq_deassert = 0;
2672 if (!mxge_deassert_wait) {
2673 /* Don't wait for conf. that irq is low */
2677 mxge_serialize_skipmain(sc);
2680 * Loop while waiting for legacy irq deassertion
2681 * XXX do we really want to loop?
2684 /* Check for transmit completes and receives */
2685 send_done_count = be32toh(stats->send_done_count);
2686 while ((send_done_count != tx->pkt_done) ||
2687 (rx_done->entry[rx_done->idx].length != 0)) {
2688 if (send_done_count != tx->pkt_done) {
2689 mxge_tx_done(&sc->arpcom.ac_if, tx,
2690 (int)send_done_count);
2692 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2693 send_done_count = be32toh(stats->send_done_count);
2695 if (mxge_deassert_wait)
2697 } while (*((volatile uint8_t *)&stats->valid));
2699 mxge_deserialize_skipmain(sc);
2701 /* Fw link & error stats meaningful only on the first slice */
2702 if (__predict_false(stats->stats_updated))
2703 mxge_intr_status(sc, stats);
2705 /* Check to see if we have rx token to pass back */
2707 *ss->irq_claim = be32toh(3);
2708 *(ss->irq_claim + 1) = be32toh(3);
2714 struct mxge_slice_state *ss = arg;
2715 mxge_softc_t *sc = ss->sc;
2716 mcp_irq_data_t *stats = ss->fw_stats;
2717 mxge_tx_ring_t *tx = &ss->tx;
2718 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2719 uint32_t send_done_count;
2721 #ifndef IFPOLL_ENABLE
2722 const boolean_t polling = FALSE;
2724 boolean_t polling = FALSE;
2727 ASSERT_SERIALIZED(&sc->main_serialize);
2729 /* Make sure the DMA has finished */
2730 if (__predict_false(!stats->valid))
2733 valid = stats->valid;
2736 #ifdef IFPOLL_ENABLE
2737 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2742 /* Check for receives */
2743 lwkt_serialize_enter(&ss->rx_data.rx_serialize);
2744 if (rx_done->entry[rx_done->idx].length != 0)
2745 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2746 lwkt_serialize_exit(&ss->rx_data.rx_serialize);
2750 * Check for transmit completes
2753 * Since pkt_done is only changed by mxge_tx_done(),
2754 * which is called only in interrupt handler, the
2755 * check w/o holding tx serializer is MPSAFE.
2757 send_done_count = be32toh(stats->send_done_count);
2758 if (send_done_count != tx->pkt_done) {
2759 lwkt_serialize_enter(&tx->tx_serialize);
2760 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2761 lwkt_serialize_exit(&tx->tx_serialize);
2764 if (__predict_false(stats->stats_updated))
2765 mxge_intr_status(sc, stats);
2767 /* Check to see if we have rx token to pass back */
2768 if (!polling && (valid & 0x1))
2769 *ss->irq_claim = be32toh(3);
2770 *(ss->irq_claim + 1) = be32toh(3);
2774 mxge_msix_rx(void *arg)
2776 struct mxge_slice_state *ss = arg;
2777 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2779 #ifdef IFPOLL_ENABLE
2780 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2784 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2786 if (rx_done->entry[rx_done->idx].length != 0)
2787 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1);
2789 *ss->irq_claim = be32toh(3);
2793 mxge_msix_rxtx(void *arg)
2795 struct mxge_slice_state *ss = arg;
2796 mxge_softc_t *sc = ss->sc;
2797 mcp_irq_data_t *stats = ss->fw_stats;
2798 mxge_tx_ring_t *tx = &ss->tx;
2799 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2800 uint32_t send_done_count;
2802 #ifndef IFPOLL_ENABLE
2803 const boolean_t polling = FALSE;
2805 boolean_t polling = FALSE;
2808 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2810 /* Make sure the DMA has finished */
2811 if (__predict_false(!stats->valid))
2814 valid = stats->valid;
2817 #ifdef IFPOLL_ENABLE
2818 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2822 /* Check for receives */
2823 if (!polling && rx_done->entry[rx_done->idx].length != 0)
2824 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2827 * Check for transmit completes
2830 * Since pkt_done is only changed by mxge_tx_done(),
2831 * which is called only in interrupt handler, the
2832 * check w/o holding tx serializer is MPSAFE.
2834 send_done_count = be32toh(stats->send_done_count);
2835 if (send_done_count != tx->pkt_done) {
2836 lwkt_serialize_enter(&tx->tx_serialize);
2837 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2838 lwkt_serialize_exit(&tx->tx_serialize);
2841 /* Check to see if we have rx token to pass back */
2842 if (!polling && (valid & 0x1))
2843 *ss->irq_claim = be32toh(3);
2844 *(ss->irq_claim + 1) = be32toh(3);
2848 mxge_init(void *arg)
2850 struct mxge_softc *sc = arg;
2852 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp);
2853 if ((sc->ifp->if_flags & IFF_RUNNING) == 0)
2858 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2862 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2863 if (ss->rx_data.rx_big.info[i].m == NULL)
2865 bus_dmamap_unload(ss->rx_data.rx_big.dmat,
2866 ss->rx_data.rx_big.info[i].map);
2867 m_freem(ss->rx_data.rx_big.info[i].m);
2868 ss->rx_data.rx_big.info[i].m = NULL;
2871 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2872 if (ss->rx_data.rx_small.info[i].m == NULL)
2874 bus_dmamap_unload(ss->rx_data.rx_small.dmat,
2875 ss->rx_data.rx_small.info[i].map);
2876 m_freem(ss->rx_data.rx_small.info[i].m);
2877 ss->rx_data.rx_small.info[i].m = NULL;
2880 /* Transmit ring used only on the first slice */
2881 if (ss->tx.info == NULL)
2884 for (i = 0; i <= ss->tx.mask; i++) {
2885 if (ss->tx.info[i].m == NULL)
2887 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map);
2888 m_freem(ss->tx.info[i].m);
2889 ss->tx.info[i].m = NULL;
2894 mxge_free_mbufs(mxge_softc_t *sc)
2898 for (slice = 0; slice < sc->num_slices; slice++)
2899 mxge_free_slice_mbufs(&sc->ss[slice]);
2903 mxge_free_slice_rings(struct mxge_slice_state *ss)
2907 if (ss->rx_data.rx_done.entry != NULL) {
2908 mxge_dma_free(&ss->rx_done_dma);
2909 ss->rx_data.rx_done.entry = NULL;
2912 if (ss->tx.req_list != NULL) {
2913 kfree(ss->tx.req_list, M_DEVBUF);
2914 ss->tx.req_list = NULL;
2917 if (ss->tx.seg_list != NULL) {
2918 kfree(ss->tx.seg_list, M_DEVBUF);
2919 ss->tx.seg_list = NULL;
2922 if (ss->rx_data.rx_small.shadow != NULL) {
2923 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF);
2924 ss->rx_data.rx_small.shadow = NULL;
2927 if (ss->rx_data.rx_big.shadow != NULL) {
2928 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF);
2929 ss->rx_data.rx_big.shadow = NULL;
2932 if (ss->tx.info != NULL) {
2933 if (ss->tx.dmat != NULL) {
2934 for (i = 0; i <= ss->tx.mask; i++) {
2935 bus_dmamap_destroy(ss->tx.dmat,
2936 ss->tx.info[i].map);
2938 bus_dma_tag_destroy(ss->tx.dmat);
2940 kfree(ss->tx.info, M_DEVBUF);
2944 if (ss->rx_data.rx_small.info != NULL) {
2945 if (ss->rx_data.rx_small.dmat != NULL) {
2946 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2947 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2948 ss->rx_data.rx_small.info[i].map);
2950 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2951 ss->rx_data.rx_small.extra_map);
2952 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
2954 kfree(ss->rx_data.rx_small.info, M_DEVBUF);
2955 ss->rx_data.rx_small.info = NULL;
2958 if (ss->rx_data.rx_big.info != NULL) {
2959 if (ss->rx_data.rx_big.dmat != NULL) {
2960 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2961 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2962 ss->rx_data.rx_big.info[i].map);
2964 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2965 ss->rx_data.rx_big.extra_map);
2966 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
2968 kfree(ss->rx_data.rx_big.info, M_DEVBUF);
2969 ss->rx_data.rx_big.info = NULL;
2974 mxge_free_rings(mxge_softc_t *sc)
2981 for (slice = 0; slice < sc->num_slices; slice++)
2982 mxge_free_slice_rings(&sc->ss[slice]);
2986 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2987 int tx_ring_entries)
2989 mxge_softc_t *sc = ss->sc;
2994 * Allocate per-slice receive resources
2997 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask =
2998 rx_ring_entries - 1;
2999 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1;
3001 /* Allocate the rx shadow rings */
3002 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow);
3003 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3005 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow);
3006 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3008 /* Allocate the rx host info rings */
3009 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info);
3010 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info);
3013 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3015 /* Allocate the rx busdma resources */
3016 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3018 4096, /* boundary */
3019 BUS_SPACE_MAXADDR, /* low */
3020 BUS_SPACE_MAXADDR, /* high */
3021 NULL, NULL, /* filter */
3022 MHLEN, /* maxsize */
3024 MHLEN, /* maxsegsize */
3025 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3027 &ss->rx_data.rx_small.dmat); /* tag */
3029 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3034 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK,
3035 &ss->rx_data.rx_small.extra_map);
3037 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err);
3038 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3039 ss->rx_data.rx_small.dmat = NULL;
3042 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3043 err = bus_dmamap_create(ss->rx_data.rx_small.dmat,
3044 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map);
3048 device_printf(sc->dev, "Err %d rx_small dmamap\n", err);
3050 for (j = 0; j < i; ++j) {
3051 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3052 ss->rx_data.rx_small.info[j].map);
3054 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3055 ss->rx_data.rx_small.extra_map);
3056 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3057 ss->rx_data.rx_small.dmat = NULL;
3062 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3064 4096, /* boundary */
3065 BUS_SPACE_MAXADDR, /* low */
3066 BUS_SPACE_MAXADDR, /* high */
3067 NULL, NULL, /* filter */
3070 4096, /* maxsegsize*/
3071 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3073 &ss->rx_data.rx_big.dmat); /* tag */
3075 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3080 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3081 &ss->rx_data.rx_big.extra_map);
3083 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err);
3084 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3085 ss->rx_data.rx_big.dmat = NULL;
3088 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3089 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3090 &ss->rx_data.rx_big.info[i].map);
3094 device_printf(sc->dev, "Err %d rx_big dmamap\n", err);
3095 for (j = 0; j < i; ++j) {
3096 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3097 ss->rx_data.rx_big.info[j].map);
3099 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3100 ss->rx_data.rx_big.extra_map);
3101 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3102 ss->rx_data.rx_big.dmat = NULL;
3108 * Now allocate TX resources
3111 ss->tx.mask = tx_ring_entries - 1;
3112 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3115 * Allocate the tx request copy block; MUST be at least 8 bytes
3118 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4);
3119 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes),
3120 M_DEVBUF, M_WAITOK);
3122 /* Allocate the tx busdma segment list */
3123 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc;
3124 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3126 /* Allocate the tx host info ring */
3127 bytes = tx_ring_entries * sizeof(*ss->tx.info);
3128 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3130 /* Allocate the tx busdma resources */
3131 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3133 sc->tx_boundary, /* boundary */
3134 BUS_SPACE_MAXADDR, /* low */
3135 BUS_SPACE_MAXADDR, /* high */
3136 NULL, NULL, /* filter */
3138 sizeof(struct ether_vlan_header),
3140 ss->tx.max_desc - 2, /* num segs */
3141 sc->tx_boundary, /* maxsegsz */
3142 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
3143 BUS_DMA_ONEBPAGE, /* flags */
3144 &ss->tx.dmat); /* tag */
3146 device_printf(sc->dev, "Err %d allocating tx dmat\n", err);
3151 * Now use these tags to setup DMA maps for each slot in the ring
3153 for (i = 0; i <= ss->tx.mask; i++) {
3154 err = bus_dmamap_create(ss->tx.dmat,
3155 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map);
3159 device_printf(sc->dev, "Err %d tx dmamap\n", err);
3160 for (j = 0; j < i; ++j) {
3161 bus_dmamap_destroy(ss->tx.dmat,
3162 ss->tx.info[j].map);
3164 bus_dma_tag_destroy(ss->tx.dmat);
3173 mxge_alloc_rings(mxge_softc_t *sc)
3177 int tx_ring_entries, rx_ring_entries;
3180 /* Get ring sizes */
3181 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3183 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3186 tx_ring_size = cmd.data0;
3188 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t);
3189 rx_ring_entries = sc->rx_intr_slots / 2;
3192 device_printf(sc->dev, "tx desc %d, rx desc %d\n",
3193 tx_ring_entries, rx_ring_entries);
3196 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3197 ifq_set_ready(&sc->ifp->if_snd);
3198 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings);
3200 if (sc->num_tx_rings > 1) {
3201 sc->ifp->if_mapsubq = ifq_mapsubq_mask;
3202 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1);
3205 for (slice = 0; slice < sc->num_slices; slice++) {
3206 err = mxge_alloc_slice_rings(&sc->ss[slice],
3207 rx_ring_entries, tx_ring_entries);
3209 device_printf(sc->dev,
3210 "alloc %d slice rings failed\n", slice);
3218 mxge_choose_params(int mtu, int *cl_size)
3220 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3222 if (bufsize < MCLBYTES) {
3223 *cl_size = MCLBYTES;
3225 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu));
3226 *cl_size = MJUMPAGESIZE;
3231 mxge_slice_open(struct mxge_slice_state *ss, int cl_size)
3236 slice = ss - ss->sc->ss;
3239 * Get the lanai pointers to the send and receive rings
3243 if (ss->sc->num_tx_rings == 1) {
3246 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET,
3248 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3249 (ss->sc->sram + cmd.data0);
3250 /* Leave send_go and send_stop as NULL */
3254 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3255 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3256 (ss->sc->sram + cmd.data0);
3257 ss->tx.send_go = (volatile uint32_t *)
3258 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3259 ss->tx.send_stop = (volatile uint32_t *)
3260 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3264 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3265 ss->rx_data.rx_small.lanai =
3266 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3269 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3270 ss->rx_data.rx_big.lanai =
3271 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3274 if_printf(ss->sc->ifp,
3275 "failed to get ring sizes or locations\n");
3280 * Stock small receive ring
3282 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3283 err = mxge_get_buf_small(&ss->rx_data.rx_small,
3284 ss->rx_data.rx_small.info[i].map, i, TRUE);
3286 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i,
3287 ss->rx_data.rx_small.mask + 1);
3293 * Stock big receive ring
3295 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3296 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff;
3297 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff;
3300 ss->rx_data.rx_big.cl_size = cl_size;
3302 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3303 err = mxge_get_buf_big(&ss->rx_data.rx_big,
3304 ss->rx_data.rx_big.info[i].map, i, TRUE);
3306 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i,
3307 ss->rx_data.rx_big.mask + 1);
3315 mxge_open(mxge_softc_t *sc)
3317 struct ifnet *ifp = sc->ifp;
3319 int err, slice, cl_size, i;
3321 volatile uint8_t *itable;
3322 struct mxge_slice_state *ss;
3324 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3326 /* Copy the MAC address in case it was overridden */
3327 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN);
3329 err = mxge_reset(sc, 1);
3331 if_printf(ifp, "failed to reset\n");
3335 if (sc->num_slices > 1) {
3336 /* Setup the indirection table */
3337 cmd.data0 = sc->num_slices;
3338 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd);
3340 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
3342 if_printf(ifp, "failed to setup rss tables\n");
3346 /* Just enable an identity mapping */
3347 itable = sc->sram + cmd.data0;
3348 for (i = 0; i < sc->num_slices; i++)
3349 itable[i] = (uint8_t)i;
3352 volatile uint8_t *hwkey;
3353 uint8_t swkey[MXGE_HWRSS_KEYLEN];
3355 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
3358 if_printf(ifp, "failed to get rsskey\n");
3361 hwkey = sc->sram + cmd.data0;
3363 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN);
3364 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i)
3365 hwkey[i] = swkey[i];
3368 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED,
3371 if_printf(ifp, "failed to update rsskey\n");
3375 if_printf(ifp, "RSS key updated\n");
3381 if_printf(ifp, "input hash: RSS\n");
3382 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 |
3383 MXGEFW_RSS_HASH_TYPE_TCP_IPV4;
3386 if_printf(ifp, "input hash: SRC_DST_PORT\n");
3387 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
3389 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3391 if_printf(ifp, "failed to enable slices\n");
3396 cmd.data0 = MXGEFW_TSO_MODE_NDIS;
3397 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd);
3400 * Can't change TSO mode to NDIS, never allow TSO then
3402 if_printf(ifp, "failed to set TSO mode\n");
3403 ifp->if_capenable &= ~IFCAP_TSO;
3404 ifp->if_capabilities &= ~IFCAP_TSO;
3405 ifp->if_hwassist &= ~CSUM_TSO;
3408 mxge_choose_params(ifp->if_mtu, &cl_size);
3411 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd);
3413 * Error is only meaningful if we're trying to set
3414 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3418 * Give the firmware the mtu and the big and small buffer
3419 * sizes. The firmware wants the big buf size to be a power
3420 * of two. Luckily, DragonFly's clusters are powers of two
3422 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3423 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3425 cmd.data0 = MXGE_RX_SMALL_BUFLEN;
3426 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
3428 cmd.data0 = cl_size;
3429 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3432 if_printf(ifp, "failed to setup params\n");
3436 /* Now give him the pointer to the stats block */
3437 for (slice = 0; slice < sc->num_slices; slice++) {
3438 ss = &sc->ss[slice];
3439 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3440 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3441 cmd.data2 = sizeof(struct mcp_irq_data);
3442 cmd.data2 |= (slice << 16);
3443 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3447 bus = sc->ss->fw_stats_dma.dmem_busaddr;
3448 bus += offsetof(struct mcp_irq_data, send_done_count);
3449 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3450 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3451 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3454 /* Firmware cannot support multicast without STATS_DMA_V2 */
3455 sc->fw_multicast_support = 0;
3457 sc->fw_multicast_support = 1;
3461 if_printf(ifp, "failed to setup params\n");
3465 for (slice = 0; slice < sc->num_slices; slice++) {
3466 err = mxge_slice_open(&sc->ss[slice], cl_size);
3468 if_printf(ifp, "couldn't open slice %d\n", slice);
3473 /* Finally, start the firmware running */
3474 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3476 if_printf(ifp, "Couldn't bring up link\n");
3480 ifp->if_flags |= IFF_RUNNING;
3481 for (i = 0; i < sc->num_tx_rings; ++i) {
3482 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3484 ifsq_clr_oactive(tx->ifsq);
3485 ifsq_watchdog_start(&tx->watchdog);
3491 mxge_free_mbufs(sc);
3496 mxge_close(mxge_softc_t *sc, int down)
3498 struct ifnet *ifp = sc->ifp;
3500 int err, old_down_cnt, i;
3502 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3505 old_down_cnt = sc->down_cnt;
3508 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3510 if_printf(ifp, "Couldn't bring down link\n");
3512 if (old_down_cnt == sc->down_cnt) {
3517 ifnet_deserialize_all(ifp);
3518 DELAY(10 * sc->intr_coal_delay);
3519 ifnet_serialize_all(ifp);
3523 if (old_down_cnt == sc->down_cnt)
3524 if_printf(ifp, "never got down irq\n");
3526 mxge_free_mbufs(sc);
3528 ifp->if_flags &= ~IFF_RUNNING;
3529 for (i = 0; i < sc->num_tx_rings; ++i) {
3530 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3532 ifsq_clr_oactive(tx->ifsq);
3533 ifsq_watchdog_stop(&tx->watchdog);
3538 mxge_setup_cfg_space(mxge_softc_t *sc)
3540 device_t dev = sc->dev;
3542 uint16_t lnk, pectl;
3544 /* Find the PCIe link width and set max read request to 4KB */
3545 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3546 lnk = pci_read_config(dev, reg + 0x12, 2);
3547 sc->link_width = (lnk >> 4) & 0x3f;
3549 if (sc->pectl == 0) {
3550 pectl = pci_read_config(dev, reg + 0x8, 2);
3551 pectl = (pectl & ~0x7000) | (5 << 12);
3552 pci_write_config(dev, reg + 0x8, pectl, 2);
3555 /* Restore saved pectl after watchdog reset */
3556 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3560 /* Enable DMA and memory space access */
3561 pci_enable_busmaster(dev);
3565 mxge_read_reboot(mxge_softc_t *sc)
3567 device_t dev = sc->dev;
3570 /* Find the vendor specific offset */
3571 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3572 if_printf(sc->ifp, "could not find vendor specific offset\n");
3573 return (uint32_t)-1;
3575 /* Enable read32 mode */
3576 pci_write_config(dev, vs + 0x10, 0x3, 1);
3577 /* Tell NIC which register to read */
3578 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3579 return pci_read_config(dev, vs + 0x14, 4);
3583 mxge_watchdog_reset(mxge_softc_t *sc)
3585 struct pci_devinfo *dinfo;
3592 if_printf(sc->ifp, "Watchdog reset!\n");
3595 * Check to see if the NIC rebooted. If it did, then all of
3596 * PCI config space has been reset, and things like the
3597 * busmaster bit will be zero. If this is the case, then we
3598 * must restore PCI config space before the NIC can be used
3601 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3602 if (cmd == 0xffff) {
3604 * Maybe the watchdog caught the NIC rebooting; wait
3605 * up to 100ms for it to finish. If it does not come
3606 * back, then give up
3609 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3611 if_printf(sc->ifp, "NIC disappeared!\n");
3613 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3614 /* Print the reboot status */
3615 reboot = mxge_read_reboot(sc);
3616 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot);
3618 running = sc->ifp->if_flags & IFF_RUNNING;
3621 * Quiesce NIC so that TX routines will not try to
3622 * xmit after restoration of BAR
3625 /* Mark the link as down */
3626 if (sc->link_state) {
3627 sc->ifp->if_link_state = LINK_STATE_DOWN;
3628 if_link_state_change(sc->ifp);
3632 /* Restore PCI configuration space */
3633 dinfo = device_get_ivars(sc->dev);
3634 pci_cfg_restore(sc->dev, dinfo);
3636 /* And redo any changes we made to our config space */
3637 mxge_setup_cfg_space(sc);
3640 err = mxge_load_firmware(sc, 0);
3642 if_printf(sc->ifp, "Unable to re-load f/w\n");
3643 if (running && !err) {
3646 err = mxge_open(sc);
3648 for (i = 0; i < sc->num_tx_rings; ++i)
3649 ifsq_devstart_sched(sc->ss[i].tx.ifsq);
3651 sc->watchdog_resets++;
3653 if_printf(sc->ifp, "NIC did not reboot, not resetting\n");
3657 if_printf(sc->ifp, "watchdog reset failed\n");
3661 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3666 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3668 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice);
3669 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3670 tx->req, tx->done, tx->queue_active);
3671 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n",
3672 tx->activate, tx->deactivate);
3673 if_printf(sc->ifp, "pkt_done=%d fw=%d\n",
3674 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count));
3678 mxge_update_stats(mxge_softc_t *sc)
3680 u_long ipackets, opackets, pkts;
3682 IFNET_STAT_GET(sc->ifp, ipackets, ipackets);
3683 IFNET_STAT_GET(sc->ifp, opackets, opackets);
3685 pkts = ipackets - sc->ipackets;
3686 pkts += opackets - sc->opackets;
3688 sc->ipackets = ipackets;
3689 sc->opackets = opackets;
3695 mxge_tick(void *arg)
3697 mxge_softc_t *sc = arg;
3702 lwkt_serialize_enter(&sc->main_serialize);
3705 if (sc->ifp->if_flags & IFF_RUNNING) {
3706 /* Aggregate stats from different slices */
3707 pkts = mxge_update_stats(sc);
3708 if (sc->need_media_probe)
3709 mxge_media_probe(sc);
3714 /* Ensure NIC did not suffer h/w fault while idle */
3715 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3716 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3718 mxge_serialize_skipmain(sc);
3719 mxge_watchdog_reset(sc);
3720 mxge_deserialize_skipmain(sc);
3724 /* Look less often if NIC is idle */
3729 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3731 lwkt_serialize_exit(&sc->main_serialize);
3735 mxge_media_change(struct ifnet *ifp)
3741 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3743 struct ifnet *ifp = sc->ifp;
3744 int real_mtu, old_mtu;
3747 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3748 if (mtu > sc->max_mtu || real_mtu < 60)
3751 old_mtu = ifp->if_mtu;
3753 if (ifp->if_flags & IFF_RUNNING) {
3755 err = mxge_open(sc);
3757 ifp->if_mtu = old_mtu;
3766 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3768 mxge_softc_t *sc = ifp->if_softc;
3773 ifmr->ifm_status = IFM_AVALID;
3774 ifmr->ifm_active = IFM_ETHER | IFM_FDX;
3775 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3776 ifmr->ifm_active |= sc->current_media;
3780 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data,
3781 struct ucred *cr __unused)
3783 mxge_softc_t *sc = ifp->if_softc;
3784 struct ifreq *ifr = (struct ifreq *)data;
3787 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3792 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3799 if (ifp->if_flags & IFF_UP) {
3800 if (!(ifp->if_flags & IFF_RUNNING)) {
3801 err = mxge_open(sc);
3804 * Take care of PROMISC and ALLMULTI
3807 mxge_change_promisc(sc,
3808 ifp->if_flags & IFF_PROMISC);
3809 mxge_set_multicast_list(sc);
3812 if (ifp->if_flags & IFF_RUNNING)
3819 mxge_set_multicast_list(sc);
3823 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3824 if (mask & IFCAP_TXCSUM) {
3825 ifp->if_capenable ^= IFCAP_TXCSUM;
3826 if (ifp->if_capenable & IFCAP_TXCSUM)
3827 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
3829 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
3831 if (mask & IFCAP_TSO) {
3832 ifp->if_capenable ^= IFCAP_TSO;
3833 if (ifp->if_capenable & IFCAP_TSO)
3834 ifp->if_hwassist |= CSUM_TSO;
3836 ifp->if_hwassist &= ~CSUM_TSO;
3838 if (mask & IFCAP_RXCSUM)
3839 ifp->if_capenable ^= IFCAP_RXCSUM;
3840 if (mask & IFCAP_VLAN_HWTAGGING)
3841 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3845 mxge_media_probe(sc);
3846 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3847 &sc->media, command);
3851 err = ether_ioctl(ifp, command, data);
3858 mxge_fetch_tunables(mxge_softc_t *sc)
3860 sc->intr_coal_delay = mxge_intr_coal_delay;
3861 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000))
3862 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY;
3865 if (mxge_ticks == 0)
3866 mxge_ticks = hz / 2;
3868 sc->pause = mxge_flow_control;
3869 sc->use_rss = mxge_use_rss;
3871 sc->throttle = mxge_throttle;
3872 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE)
3873 sc->throttle = MXGE_MAX_THROTTLE;
3874 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE)
3875 sc->throttle = MXGE_MIN_THROTTLE;
3879 mxge_free_slices(mxge_softc_t *sc)
3881 struct mxge_slice_state *ss;
3887 for (i = 0; i < sc->num_slices; i++) {
3889 if (ss->fw_stats != NULL) {
3890 mxge_dma_free(&ss->fw_stats_dma);
3891 ss->fw_stats = NULL;
3893 if (ss->rx_data.rx_done.entry != NULL) {
3894 mxge_dma_free(&ss->rx_done_dma);
3895 ss->rx_data.rx_done.entry = NULL;
3898 kfree(sc->ss, M_DEVBUF);
3903 mxge_alloc_slices(mxge_softc_t *sc)
3906 struct mxge_slice_state *ss;
3908 int err, i, rx_ring_size;
3910 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3912 device_printf(sc->dev, "Cannot determine rx ring size\n");
3915 rx_ring_size = cmd.data0;
3916 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t));
3918 bytes = sizeof(*sc->ss) * sc->num_slices;
3919 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO);
3921 for (i = 0; i < sc->num_slices; i++) {
3926 lwkt_serialize_init(&ss->rx_data.rx_serialize);
3927 lwkt_serialize_init(&ss->tx.tx_serialize);
3931 * Allocate per-slice rx interrupt queue
3932 * XXX assume 4bytes mcp_slot
3934 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t);
3935 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096);
3937 device_printf(sc->dev,
3938 "alloc %d slice rx_done failed\n", i);
3941 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr;
3944 * Allocate the per-slice firmware stats
3946 bytes = sizeof(*ss->fw_stats);
3947 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3948 sizeof(*ss->fw_stats), 64);
3950 device_printf(sc->dev,
3951 "alloc %d fw_stats failed\n", i);
3954 ss->fw_stats = ss->fw_stats_dma.dmem_addr;
3960 mxge_slice_probe(mxge_softc_t *sc)
3962 int status, max_intr_slots, max_slices, num_slices;
3963 int msix_cnt, msix_enable, i, multi_tx;
3968 sc->num_tx_rings = 1;
3970 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices);
3971 if (num_slices == 1)
3977 msix_enable = device_getenv_int(sc->dev, "msix.enable",
3982 msix_cnt = pci_msix_count(sc->dev);
3987 * Round down MSI-X vector count to the nearest power of 2
3990 while ((1 << (i + 1)) <= msix_cnt)
3995 * Now load the slice aware firmware see what it supports
3997 old_fw = sc->fw_name;
3998 if (old_fw == mxge_fw_aligned)
3999 sc->fw_name = mxge_fw_rss_aligned;
4001 sc->fw_name = mxge_fw_rss_unaligned;
4002 status = mxge_load_firmware(sc, 0);
4004 device_printf(sc->dev, "Falling back to a single slice\n");
4009 * Try to send a reset command to the card to see if it is alive
4011 memset(&cmd, 0, sizeof(cmd));
4012 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4014 device_printf(sc->dev, "failed reset\n");
4019 * Get rx ring size to calculate rx interrupt queue size
4021 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4023 device_printf(sc->dev, "Cannot determine rx ring size\n");
4026 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t));
4029 * Tell it the size of the rx interrupt queue
4031 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot);
4032 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4034 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4039 * Ask the maximum number of slices it supports
4041 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4043 device_printf(sc->dev,
4044 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4047 max_slices = cmd.data0;
4050 * Round down max slices count to the nearest power of 2
4053 while ((1 << (i + 1)) <= max_slices)
4055 max_slices = 1 << i;
4057 if (max_slices > msix_cnt)
4058 max_slices = msix_cnt;
4060 sc->num_slices = num_slices;
4061 sc->num_slices = if_ring_count2(sc->num_slices, max_slices);
4063 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx);
4065 sc->num_tx_rings = sc->num_slices;
4068 device_printf(sc->dev, "using %d slices, max %d\n",
4069 sc->num_slices, max_slices);
4072 if (sc->num_slices == 1)
4077 sc->fw_name = old_fw;
4078 mxge_load_firmware(sc, 0);
4082 mxge_setup_serialize(struct mxge_softc *sc)
4086 /* Main + rx + tx */
4087 sc->nserialize = (2 * sc->num_slices) + 1;
4089 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
4090 M_DEVBUF, M_WAITOK | M_ZERO);
4095 * NOTE: Order is critical
4098 KKASSERT(i < sc->nserialize);
4099 sc->serializes[i++] = &sc->main_serialize;
4101 for (slice = 0; slice < sc->num_slices; ++slice) {
4102 KKASSERT(i < sc->nserialize);
4103 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize;
4106 for (slice = 0; slice < sc->num_slices; ++slice) {
4107 KKASSERT(i < sc->nserialize);
4108 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize;
4111 KKASSERT(i == sc->nserialize);
4115 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4117 struct mxge_softc *sc = ifp->if_softc;
4119 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4123 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4125 struct mxge_softc *sc = ifp->if_softc;
4127 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4131 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4133 struct mxge_softc *sc = ifp->if_softc;
4135 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4141 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4142 boolean_t serialized)
4144 struct mxge_softc *sc = ifp->if_softc;
4146 ifnet_serialize_array_assert(sc->serializes, sc->nserialize,
4150 #endif /* INVARIANTS */
4152 #ifdef IFPOLL_ENABLE
4155 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle)
4157 struct mxge_slice_state *ss = xss;
4158 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
4160 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
4162 if (rx_done->entry[rx_done->idx].length != 0) {
4163 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle);
4167 * This register writting obviously has cost,
4168 * however, if we don't hand back the rx token,
4169 * the upcoming packets may suffer rediculously
4170 * large delay, as observed on 8AL-C using ping(8).
4172 *ss->irq_claim = be32toh(3);
4177 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4179 struct mxge_softc *sc = ifp->if_softc;
4186 * Only poll rx; polling tx and status don't seem to work
4188 for (i = 0; i < sc->num_slices; ++i) {
4189 struct mxge_slice_state *ss = &sc->ss[i];
4190 int idx = ss->intr_cpuid;
4192 KKASSERT(idx < ncpus2);
4193 info->ifpi_rx[idx].poll_func = mxge_npoll_rx;
4194 info->ifpi_rx[idx].arg = ss;
4195 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize;
4199 #endif /* IFPOLL_ENABLE */
4202 mxge_attach(device_t dev)
4204 mxge_softc_t *sc = device_get_softc(dev);
4205 struct ifnet *ifp = &sc->arpcom.ac_if;
4209 * Avoid rewriting half the lines in this file to use
4210 * &sc->arpcom.ac_if instead
4214 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4215 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status);
4217 lwkt_serialize_init(&sc->main_serialize);
4219 mxge_fetch_tunables(sc);
4221 err = bus_dma_tag_create(NULL, /* parent */
4224 BUS_SPACE_MAXADDR, /* low */
4225 BUS_SPACE_MAXADDR, /* high */
4226 NULL, NULL, /* filter */
4227 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
4229 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
4231 &sc->parent_dmat); /* tag */
4233 device_printf(dev, "Err %d allocating parent dmat\n", err);
4237 callout_init_mp(&sc->co_hdl);
4239 mxge_setup_cfg_space(sc);
4242 * Map the board into the kernel
4245 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
4247 if (sc->mem_res == NULL) {
4248 device_printf(dev, "could not map memory\n");
4253 sc->sram = rman_get_virtual(sc->mem_res);
4254 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4255 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4256 device_printf(dev, "impossible memory region size %ld\n",
4257 rman_get_size(sc->mem_res));
4263 * Make NULL terminated copy of the EEPROM strings section of
4266 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4267 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4268 rman_get_bushandle(sc->mem_res),
4269 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4270 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2);
4271 err = mxge_parse_strings(sc);
4273 device_printf(dev, "parse EEPROM string failed\n");
4278 * Enable write combining for efficient use of PCIe bus
4283 * Allocate the out of band DMA memory
4285 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64);
4287 device_printf(dev, "alloc cmd DMA buf failed\n");
4290 sc->cmd = sc->cmd_dma.dmem_addr;
4292 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4294 device_printf(dev, "alloc zeropad DMA buf failed\n");
4298 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4300 device_printf(dev, "alloc dmabench DMA buf failed\n");
4304 /* Select & load the firmware */
4305 err = mxge_select_firmware(sc);
4307 device_printf(dev, "select firmware failed\n");
4311 mxge_slice_probe(sc);
4312 err = mxge_alloc_slices(sc);
4314 device_printf(dev, "alloc slices failed\n");
4318 err = mxge_alloc_intr(sc);
4320 device_printf(dev, "alloc intr failed\n");
4324 /* Setup serializes */
4325 mxge_setup_serialize(sc);
4327 err = mxge_reset(sc, 0);
4329 device_printf(dev, "reset failed\n");
4333 err = mxge_alloc_rings(sc);
4335 device_printf(dev, "failed to allocate rings\n");
4339 ifp->if_baudrate = IF_Gbps(10UL);
4340 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO;
4341 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4343 ifp->if_capabilities |= IFCAP_VLAN_MTU;
4345 /* Well, its software, sigh */
4346 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
4348 ifp->if_capenable = ifp->if_capabilities;
4351 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4352 ifp->if_init = mxge_init;
4353 ifp->if_ioctl = mxge_ioctl;
4354 ifp->if_start = mxge_start;
4355 #ifdef IFPOLL_ENABLE
4356 if (sc->intr_type != PCI_INTR_TYPE_LEGACY)
4357 ifp->if_npoll = mxge_npoll;
4359 ifp->if_serialize = mxge_serialize;
4360 ifp->if_deserialize = mxge_deserialize;
4361 ifp->if_tryserialize = mxge_tryserialize;
4363 ifp->if_serialize_assert = mxge_serialize_assert;
4366 /* Increase TSO burst length */
4367 ifp->if_tsolen = (32 * ETHERMTU);
4369 /* Initialise the ifmedia structure */
4370 mxge_media_init(sc);
4371 mxge_media_probe(sc);
4373 ether_ifattach(ifp, sc->mac_addr, NULL);
4375 /* Setup TX rings and subqueues */
4376 for (i = 0; i < sc->num_tx_rings; ++i) {
4377 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
4378 struct mxge_slice_state *ss = &sc->ss[i];
4380 ifsq_set_cpuid(ifsq, ss->intr_cpuid);
4381 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize);
4382 ifsq_set_priv(ifsq, &ss->tx);
4385 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog);
4390 * We are not ready to do "gather" jumbo frame, so
4391 * limit MTU to MJUMPAGESIZE
4393 sc->max_mtu = MJUMPAGESIZE -
4394 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1;
4397 err = mxge_setup_intr(sc);
4399 device_printf(dev, "alloc and setup intr failed\n");
4400 ether_ifdetach(ifp);
4404 mxge_add_sysctls(sc);
4406 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc,
4407 sc->ss[0].intr_cpuid);
4416 mxge_detach(device_t dev)
4418 mxge_softc_t *sc = device_get_softc(dev);
4420 if (device_is_attached(dev)) {
4421 struct ifnet *ifp = sc->ifp;
4423 ifnet_serialize_all(ifp);
4426 if (ifp->if_flags & IFF_RUNNING)
4428 callout_stop(&sc->co_hdl);
4430 mxge_teardown_intr(sc, sc->num_slices);
4432 ifnet_deserialize_all(ifp);
4434 callout_terminate(&sc->co_hdl);
4436 ether_ifdetach(ifp);
4438 ifmedia_removeall(&sc->media);
4440 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL &&
4442 mxge_dummy_rdma(sc, 0);
4445 mxge_rem_sysctls(sc);
4446 mxge_free_rings(sc);
4448 /* MUST after sysctls, intr and rings are freed */
4449 mxge_free_slices(sc);
4451 if (sc->dmabench_dma.dmem_addr != NULL)
4452 mxge_dma_free(&sc->dmabench_dma);
4453 if (sc->zeropad_dma.dmem_addr != NULL)
4454 mxge_dma_free(&sc->zeropad_dma);
4455 if (sc->cmd_dma.dmem_addr != NULL)
4456 mxge_dma_free(&sc->cmd_dma);
4458 if (sc->msix_table_res != NULL) {
4459 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2),
4460 sc->msix_table_res);
4462 if (sc->mem_res != NULL) {
4463 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS,
4467 if (sc->parent_dmat != NULL)
4468 bus_dma_tag_destroy(sc->parent_dmat);
4474 mxge_shutdown(device_t dev)
4480 mxge_free_msix(struct mxge_softc *sc, boolean_t setup)
4484 KKASSERT(sc->num_slices > 1);
4486 for (i = 0; i < sc->num_slices; ++i) {
4487 struct mxge_slice_state *ss = &sc->ss[i];
4489 if (ss->intr_res != NULL) {
4490 bus_release_resource(sc->dev, SYS_RES_IRQ,
4491 ss->intr_rid, ss->intr_res);
4493 if (ss->intr_rid >= 0)
4494 pci_release_msix_vector(sc->dev, ss->intr_rid);
4497 pci_teardown_msix(sc->dev);
4501 mxge_alloc_msix(struct mxge_softc *sc)
4503 struct mxge_slice_state *ss;
4504 int offset, rid, error, i;
4505 boolean_t setup = FALSE;
4507 KKASSERT(sc->num_slices > 1);
4509 if (sc->num_slices == ncpus2) {
4514 offset_def = (sc->num_slices * device_get_unit(sc->dev)) %
4517 offset = device_getenv_int(sc->dev, "msix.offset", offset_def);
4518 if (offset >= ncpus2 ||
4519 offset % sc->num_slices != 0) {
4520 device_printf(sc->dev, "invalid msix.offset %d, "
4521 "use %d\n", offset, offset_def);
4522 offset = offset_def;
4528 ss->intr_serialize = &sc->main_serialize;
4529 ss->intr_func = mxge_msi;
4530 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4531 "%s comb", device_get_nameunit(sc->dev));
4532 ss->intr_desc = ss->intr_desc0;
4533 ss->intr_cpuid = offset;
4535 for (i = 1; i < sc->num_slices; ++i) {
4538 ss->intr_serialize = &ss->rx_data.rx_serialize;
4539 if (sc->num_tx_rings == 1) {
4540 ss->intr_func = mxge_msix_rx;
4541 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4542 "%s rx", device_get_nameunit(sc->dev));
4544 ss->intr_func = mxge_msix_rxtx;
4545 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4546 "%s rxtx", device_get_nameunit(sc->dev));
4548 ss->intr_desc = ss->intr_desc0;
4549 ss->intr_cpuid = offset + i;
4553 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4555 if (sc->msix_table_res == NULL) {
4556 device_printf(sc->dev, "couldn't alloc MSI-X table res\n");
4560 error = pci_setup_msix(sc->dev);
4562 device_printf(sc->dev, "could not setup MSI-X\n");
4567 for (i = 0; i < sc->num_slices; ++i) {
4570 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid,
4573 device_printf(sc->dev, "could not alloc "
4574 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid);
4578 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4579 &ss->intr_rid, RF_ACTIVE);
4580 if (ss->intr_res == NULL) {
4581 device_printf(sc->dev, "could not alloc "
4582 "MSI-X %d resource\n", i);
4588 pci_enable_msix(sc->dev);
4589 sc->intr_type = PCI_INTR_TYPE_MSIX;
4592 mxge_free_msix(sc, setup);
4597 mxge_alloc_intr(struct mxge_softc *sc)
4599 struct mxge_slice_state *ss;
4602 if (sc->num_slices > 1) {
4605 error = mxge_alloc_msix(sc);
4608 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX);
4614 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable,
4615 &ss->intr_rid, &irq_flags);
4617 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4618 &ss->intr_rid, irq_flags);
4619 if (ss->intr_res == NULL) {
4620 device_printf(sc->dev, "could not alloc interrupt\n");
4624 if (sc->intr_type == PCI_INTR_TYPE_LEGACY)
4625 ss->intr_func = mxge_legacy;
4627 ss->intr_func = mxge_msi;
4628 ss->intr_serialize = &sc->main_serialize;
4629 ss->intr_cpuid = rman_get_cpuid(ss->intr_res);
4635 mxge_setup_intr(struct mxge_softc *sc)
4639 for (i = 0; i < sc->num_slices; ++i) {
4640 struct mxge_slice_state *ss = &sc->ss[i];
4643 error = bus_setup_intr_descr(sc->dev, ss->intr_res,
4644 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand,
4645 ss->intr_serialize, ss->intr_desc);
4647 device_printf(sc->dev, "can't setup %dth intr\n", i);
4648 mxge_teardown_intr(sc, i);
4656 mxge_teardown_intr(struct mxge_softc *sc, int cnt)
4663 for (i = 0; i < cnt; ++i) {
4664 struct mxge_slice_state *ss = &sc->ss[i];
4666 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand);
4671 mxge_free_intr(struct mxge_softc *sc)
4676 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4677 struct mxge_slice_state *ss = &sc->ss[0];
4679 if (ss->intr_res != NULL) {
4680 bus_release_resource(sc->dev, SYS_RES_IRQ,
4681 ss->intr_rid, ss->intr_res);
4683 if (sc->intr_type == PCI_INTR_TYPE_MSI)
4684 pci_release_msi(sc->dev);
4686 mxge_free_msix(sc, TRUE);