1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/if_ringmap.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/if_poll.h>
61 #include <net/if_types.h>
62 #include <net/vlan/if_vlan_var.h>
64 #include <net/toeplitz.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
74 #include <bus/pci/pcireg.h>
75 #include <bus/pci/pcivar.h>
76 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
78 #include <vm/vm.h> /* for pmap_mapdev() */
81 #if defined(__x86_64__)
82 #include <machine/specialreg.h>
85 #include <dev/netif/mxge/mxge_mcp.h>
86 #include <dev/netif/mxge/mcp_gen_header.h>
87 #include <dev/netif/mxge/if_mxge_var.h>
89 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
91 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
92 #define MXGE_HWRSS_KEYLEN 16
95 static int mxge_nvidia_ecrc_enable = 1;
96 static int mxge_force_firmware = 0;
97 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY;
98 static int mxge_deassert_wait = 1;
99 static int mxge_ticks;
100 static int mxge_num_slices = 0;
101 static int mxge_always_promisc = 0;
102 static int mxge_throttle = 0;
103 static int mxge_msi_enable = 1;
104 static int mxge_msix_enable = 1;
105 static int mxge_multi_tx = 1;
107 * Don't use RSS by default, its just too slow
109 static int mxge_use_rss = 0;
111 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_NONE;
113 static const char *mxge_fw_unaligned = "mxge_ethp_z8e";
114 static const char *mxge_fw_aligned = "mxge_eth_z8e";
115 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
118 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices);
119 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay);
120 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable);
121 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware);
122 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait);
123 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks);
124 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc);
125 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle);
126 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx);
127 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss);
128 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable);
129 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable);
130 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl));
132 static int mxge_probe(device_t dev);
133 static int mxge_attach(device_t dev);
134 static int mxge_detach(device_t dev);
135 static int mxge_shutdown(device_t dev);
137 static int mxge_alloc_intr(struct mxge_softc *sc);
138 static void mxge_free_intr(struct mxge_softc *sc);
139 static int mxge_setup_intr(struct mxge_softc *sc);
140 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt);
142 static device_method_t mxge_methods[] = {
143 /* Device interface */
144 DEVMETHOD(device_probe, mxge_probe),
145 DEVMETHOD(device_attach, mxge_attach),
146 DEVMETHOD(device_detach, mxge_detach),
147 DEVMETHOD(device_shutdown, mxge_shutdown),
151 static driver_t mxge_driver = {
154 sizeof(mxge_softc_t),
157 static devclass_t mxge_devclass;
159 /* Declare ourselves to be a child of the PCI bus.*/
160 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
161 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
162 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
164 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
165 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
166 static void mxge_close(mxge_softc_t *sc, int down);
167 static int mxge_open(mxge_softc_t *sc);
168 static void mxge_tick(void *arg);
169 static void mxge_watchdog_reset(mxge_softc_t *sc);
170 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice);
173 mxge_probe(device_t dev)
175 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM &&
176 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E ||
177 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) {
178 int rev = pci_get_revid(dev);
181 case MXGE_PCI_REV_Z8E:
182 device_set_desc(dev, "Myri10G-PCIE-8A");
184 case MXGE_PCI_REV_Z8ES:
185 device_set_desc(dev, "Myri10G-PCIE-8B");
188 device_set_desc(dev, "Myri10G-PCIE-8??");
189 device_printf(dev, "Unrecognized rev %d NIC\n", rev);
198 mxge_enable_wc(mxge_softc_t *sc)
200 #if defined(__x86_64__)
204 len = rman_get_size(sc->mem_res);
205 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE,
206 PAT_WRITE_COMBINING);
211 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes,
212 bus_size_t alignment)
217 if (bytes > 4096 && alignment == 4096)
222 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary,
223 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes,
224 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma);
226 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err);
233 mxge_dma_free(bus_dmamem_t *dma)
235 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map);
236 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map);
237 bus_dma_tag_destroy(dma->dmem_tag);
241 * The eeprom strings on the lanaiX have the format
247 mxge_parse_strings(mxge_softc_t *sc)
250 int i, found_mac, found_sn2;
253 ptr = sc->eeprom_strings;
256 while (*ptr != '\0') {
257 if (strncmp(ptr, "MAC=", 4) == 0) {
260 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
261 if (endptr - ptr != 2)
270 } else if (strncmp(ptr, "PC=", 3) == 0) {
272 strlcpy(sc->product_code_string, ptr,
273 sizeof(sc->product_code_string));
274 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
276 strlcpy(sc->serial_number_string, ptr,
277 sizeof(sc->serial_number_string));
278 } else if (strncmp(ptr, "SN2=", 4) == 0) {
279 /* SN2 takes precedence over SN */
282 strlcpy(sc->serial_number_string, ptr,
283 sizeof(sc->serial_number_string));
285 while (*ptr++ != '\0') {}
292 device_printf(sc->dev, "failed to parse eeprom_strings\n");
296 #if defined(__x86_64__)
299 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
302 unsigned long base, off;
304 device_t pdev, mcp55;
305 uint16_t vendor_id, device_id, word;
306 uintptr_t bus, slot, func, ivend, idev;
309 if (!mxge_nvidia_ecrc_enable)
312 pdev = device_get_parent(device_get_parent(sc->dev));
314 device_printf(sc->dev, "could not find parent?\n");
317 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
318 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
320 if (vendor_id != 0x10de)
325 if (device_id == 0x005d) {
326 /* ck804, base address is magic */
328 } else if (device_id >= 0x0374 && device_id <= 0x378) {
329 /* mcp55, base address stored in chipset */
330 mcp55 = pci_find_bsf(0, 0, 0);
332 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
333 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
334 word = pci_read_config(mcp55, 0x90, 2);
335 base = ((unsigned long)word & 0x7ffeU) << 25;
343 * Test below is commented because it is believed that doing
344 * config read/write beyond 0xff will access the config space
345 * for the next larger function. Uncomment this and remove
346 * the hacky pmap_mapdev() way of accessing config space when
347 * DragonFly grows support for extended pcie config space access.
351 * See if we can, by some miracle, access the extended
354 val = pci_read_config(pdev, 0x178, 4);
355 if (val != 0xffffffff) {
357 pci_write_config(pdev, 0x178, val, 4);
362 * Rather than using normal pci config space writes, we must
363 * map the Nvidia config space ourselves. This is because on
364 * opteron/nvidia class machine the 0xe000000 mapping is
365 * handled by the nvidia chipset, that means the internal PCI
366 * device (the on-chip northbridge), or the amd-8131 bridge
367 * and things behind them are not visible by this method.
370 BUS_READ_IVAR(device_get_parent(pdev), pdev,
372 BUS_READ_IVAR(device_get_parent(pdev), pdev,
373 PCI_IVAR_SLOT, &slot);
374 BUS_READ_IVAR(device_get_parent(pdev), pdev,
375 PCI_IVAR_FUNCTION, &func);
376 BUS_READ_IVAR(device_get_parent(pdev), pdev,
377 PCI_IVAR_VENDOR, &ivend);
378 BUS_READ_IVAR(device_get_parent(pdev), pdev,
379 PCI_IVAR_DEVICE, &idev);
381 off = base + 0x00100000UL * (unsigned long)bus +
382 0x00001000UL * (unsigned long)(func + 8 * slot);
384 /* map it into the kernel */
385 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
387 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
390 /* get a pointer to the config space mapped into the kernel */
391 cfgptr = va + (off & PAGE_MASK);
393 /* make sure that we can really access it */
394 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
395 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
396 if (!(vendor_id == ivend && device_id == idev)) {
397 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
398 vendor_id, device_id);
399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
403 ptr32 = (uint32_t*)(cfgptr + 0x178);
406 if (val == 0xffffffff) {
407 device_printf(sc->dev, "extended mapping failed\n");
408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
412 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
414 device_printf(sc->dev, "Enabled ECRC on upstream "
415 "Nvidia bridge at %d:%d:%d\n",
416 (int)bus, (int)slot, (int)func);
420 #else /* __x86_64__ */
423 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
425 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
431 mxge_dma_test(mxge_softc_t *sc, int test_type)
434 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr;
437 const char *test = " ";
440 * Run a small DMA test.
441 * The magic multipliers to the length tell the firmware
442 * to do DMA read, write, or read+write tests. The
443 * results are returned in cmd.data0. The upper 16
444 * bits of the return is the number of transfers completed.
445 * The lower 16 bits is the time in 0.5us ticks that the
446 * transfers took to complete.
449 len = sc->tx_boundary;
451 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
452 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
453 cmd.data2 = len * 0x10000;
454 status = mxge_send_cmd(sc, test_type, &cmd);
459 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
461 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
462 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
463 cmd.data2 = len * 0x1;
464 status = mxge_send_cmd(sc, test_type, &cmd);
469 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
471 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
472 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
473 cmd.data2 = len * 0x10001;
474 status = mxge_send_cmd(sc, test_type, &cmd);
479 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
480 (cmd.data0 & 0xffff);
483 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) {
484 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
491 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
492 * when the PCI-E Completion packets are aligned on an 8-byte
493 * boundary. Some PCI-E chip sets always align Completion packets; on
494 * the ones that do not, the alignment can be enforced by enabling
495 * ECRC generation (if supported).
497 * When PCI-E Completion packets are not aligned, it is actually more
498 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
500 * If the driver can neither enable ECRC nor verify that it has
501 * already been enabled, then it must use a firmware image which works
502 * around unaligned completion packets (ethp_z8e.dat), and it should
503 * also ensure that it never gives the device a Read-DMA which is
504 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
505 * enabled, then the driver should use the aligned (eth_z8e.dat)
506 * firmware image, and set tx_boundary to 4KB.
509 mxge_firmware_probe(mxge_softc_t *sc)
511 device_t dev = sc->dev;
515 sc->tx_boundary = 4096;
518 * Verify the max read request size was set to 4KB
519 * before trying the test with 4KB.
521 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
522 pectl = pci_read_config(dev, reg + 0x8, 2);
523 if ((pectl & (5 << 12)) != (5 << 12)) {
524 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n",
526 sc->tx_boundary = 2048;
531 * Load the optimized firmware (which assumes aligned PCIe
532 * completions) in order to see if it works on this host.
534 sc->fw_name = mxge_fw_aligned;
535 status = mxge_load_firmware(sc, 1);
540 * Enable ECRC if possible
542 mxge_enable_nvidia_ecrc(sc);
545 * Run a DMA test which watches for unaligned completions and
546 * aborts on the first one seen. Not required on Z8ES or newer.
548 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
551 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
553 return 0; /* keep the aligned firmware */
556 device_printf(dev, "DMA test failed: %d\n", status);
557 if (status == ENOSYS) {
558 device_printf(dev, "Falling back to ethp! "
559 "Please install up to date fw\n");
565 mxge_select_firmware(mxge_softc_t *sc)
568 int force_firmware = mxge_force_firmware;
571 force_firmware = sc->throttle;
573 if (force_firmware != 0) {
574 if (force_firmware == 1)
579 device_printf(sc->dev,
580 "Assuming %s completions (forced)\n",
581 aligned ? "aligned" : "unaligned");
587 * If the PCIe link width is 4 or less, we can use the aligned
588 * firmware and skip any checks
590 if (sc->link_width != 0 && sc->link_width <= 4) {
591 device_printf(sc->dev, "PCIe x%d Link, "
592 "expect reduced performance\n", sc->link_width);
597 if (mxge_firmware_probe(sc) == 0)
602 sc->fw_name = mxge_fw_aligned;
603 sc->tx_boundary = 4096;
605 sc->fw_name = mxge_fw_unaligned;
606 sc->tx_boundary = 2048;
608 return mxge_load_firmware(sc, 0);
612 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
614 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
615 if_printf(sc->ifp, "Bad firmware type: 0x%x\n",
616 be32toh(hdr->mcp_type));
620 /* Save firmware version for sysctl */
621 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
623 if_printf(sc->ifp, "firmware id: %s\n", hdr->version);
625 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
626 &sc->fw_ver_minor, &sc->fw_ver_tiny);
628 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR &&
629 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
630 if_printf(sc->ifp, "Found firmware version %s\n",
632 if_printf(sc->ifp, "Driver needs %d.%d\n",
633 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
640 z_alloc(void *nil, u_int items, u_int size)
642 return kmalloc(items * size, M_TEMP, M_WAITOK);
646 z_free(void *nil, void *ptr)
652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
655 char *inflate_buffer;
656 const struct firmware *fw;
657 const mcp_gen_header_t *hdr;
664 fw = firmware_get(sc->fw_name);
666 if_printf(sc->ifp, "Could not find firmware image %s\n",
671 /* Setup zlib and decompress f/w */
672 bzero(&zs, sizeof(zs));
675 status = inflateInit(&zs);
676 if (status != Z_OK) {
682 * The uncompressed size is stored as the firmware version,
683 * which would otherwise go unused
685 fw_len = (size_t)fw->version;
686 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK);
687 zs.avail_in = fw->datasize;
688 zs.next_in = __DECONST(char *, fw->data);
689 zs.avail_out = fw_len;
690 zs.next_out = inflate_buffer;
691 status = inflate(&zs, Z_FINISH);
692 if (status != Z_STREAM_END) {
693 if_printf(sc->ifp, "zlib %d\n", status);
695 goto abort_with_buffer;
700 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET));
701 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
702 if_printf(sc->ifp, "Bad firmware file");
704 goto abort_with_buffer;
706 hdr = (const void*)(inflate_buffer + hdr_offset);
708 status = mxge_validate_firmware(sc, hdr);
710 goto abort_with_buffer;
712 /* Copy the inflated firmware to NIC SRAM. */
713 for (i = 0; i < fw_len; i += 256) {
714 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i,
715 min(256U, (unsigned)(fw_len - i)));
724 kfree(inflate_buffer, M_TEMP);
727 firmware_put(fw, FIRMWARE_UNLOAD);
732 * Enable or disable periodic RDMAs from the host to make certain
733 * chipsets resend dropped PCIe messages
736 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
739 volatile uint32_t *confirm;
740 volatile char *submit;
741 uint32_t *buf, dma_low, dma_high;
744 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
746 /* Clear confirmation addr */
747 confirm = (volatile uint32_t *)sc->cmd;
752 * Send an rdma command to the PCIe engine, and wait for the
753 * response in the confirmation address. The firmware should
754 * write a -1 there to indicate it is alive and well
756 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
757 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
758 buf[0] = htobe32(dma_high); /* confirm addr MSW */
759 buf[1] = htobe32(dma_low); /* confirm addr LSW */
760 buf[2] = htobe32(0xffffffff); /* confirm data */
761 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
762 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
763 buf[3] = htobe32(dma_high); /* dummy addr MSW */
764 buf[4] = htobe32(dma_low); /* dummy addr LSW */
765 buf[5] = htobe32(enable); /* enable? */
767 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
769 mxge_pio_copy(submit, buf, 64);
774 while (*confirm != 0xffffffff && i < 20) {
778 if (*confirm != 0xffffffff) {
779 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)",
780 (enable ? "enable" : "disable"), confirm, *confirm);
785 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
788 char buf_bytes[sizeof(*buf) + 8];
789 volatile mcp_cmd_response_t *response = sc->cmd;
790 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
791 uint32_t dma_low, dma_high;
792 int err, sleep_total = 0;
794 /* Ensure buf is aligned to 8 bytes */
795 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
797 buf->data0 = htobe32(data->data0);
798 buf->data1 = htobe32(data->data1);
799 buf->data2 = htobe32(data->data2);
800 buf->cmd = htobe32(cmd);
801 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
802 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
804 buf->response_addr.low = htobe32(dma_low);
805 buf->response_addr.high = htobe32(dma_high);
807 response->result = 0xffffffff;
809 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
815 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
817 switch (be32toh(response->result)) {
819 data->data0 = be32toh(response->data);
825 case MXGEFW_CMD_UNKNOWN:
828 case MXGEFW_CMD_ERROR_UNALIGNED:
831 case MXGEFW_CMD_ERROR_BUSY:
834 case MXGEFW_CMD_ERROR_I2C_ABSENT:
838 if_printf(sc->ifp, "command %d failed, result = %d\n",
839 cmd, be32toh(response->result));
847 if_printf(sc->ifp, "command %d timed out result = %d\n",
848 cmd, be32toh(response->result));
854 mxge_adopt_running_firmware(mxge_softc_t *sc)
856 struct mcp_gen_header *hdr;
857 const size_t bytes = sizeof(struct mcp_gen_header);
862 * Find running firmware header
865 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET));
867 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
868 if_printf(sc->ifp, "Running firmware has bad header offset "
869 "(%zu)\n", hdr_offset);
874 * Copy header of running firmware from SRAM to host memory to
877 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK);
878 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
879 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes);
880 status = mxge_validate_firmware(sc, hdr);
881 kfree(hdr, M_DEVBUF);
884 * Check to see if adopted firmware has bug where adopting
885 * it will cause broadcasts to be filtered unless the NIC
886 * is kept in ALLMULTI mode
888 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
889 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
890 sc->adopted_rx_filter_bug = 1;
891 if_printf(sc->ifp, "Adopting fw %d.%d.%d: "
892 "working around rx filter bug\n",
893 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny);
900 mxge_load_firmware(mxge_softc_t *sc, int adopt)
902 volatile uint32_t *confirm;
903 volatile char *submit;
905 uint32_t *buf, size, dma_low, dma_high;
908 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
910 size = sc->sram_size;
911 status = mxge_load_firmware_helper(sc, &size);
917 * Try to use the currently running firmware, if
920 status = mxge_adopt_running_firmware(sc);
923 "failed to adopt running firmware\n");
926 if_printf(sc->ifp, "Successfully adopted running firmware\n");
928 if (sc->tx_boundary == 4096) {
930 "Using firmware currently running on NIC. "
932 if_printf(sc->ifp, "performance consider loading "
933 "optimized firmware\n");
935 sc->fw_name = mxge_fw_unaligned;
936 sc->tx_boundary = 2048;
940 /* Clear confirmation addr */
941 confirm = (volatile uint32_t *)sc->cmd;
946 * Send a reload command to the bootstrap MCP, and wait for the
947 * response in the confirmation address. The firmware should
948 * write a -1 there to indicate it is alive and well
951 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
952 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
954 buf[0] = htobe32(dma_high); /* confirm addr MSW */
955 buf[1] = htobe32(dma_low); /* confirm addr LSW */
956 buf[2] = htobe32(0xffffffff); /* confirm data */
959 * FIX: All newest firmware should un-protect the bottom of
960 * the sram before handoff. However, the very first interfaces
961 * do not. Therefore the handoff copy must skip the first 8 bytes
963 /* where the code starts*/
964 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
965 buf[4] = htobe32(size - 8); /* length of code */
966 buf[5] = htobe32(8); /* where to copy to */
967 buf[6] = htobe32(0); /* where to jump to */
969 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
970 mxge_pio_copy(submit, buf, 64);
975 while (*confirm != 0xffffffff && i < 20) {
979 if (*confirm != 0xffffffff) {
980 if_printf(sc->ifp,"handoff failed (%p = 0x%x)",
988 mxge_update_mac_address(mxge_softc_t *sc)
991 uint8_t *addr = sc->mac_addr;
993 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) |
994 (addr[2] << 8) | addr[3];
995 cmd.data1 = (addr[4] << 8) | (addr[5]);
996 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1000 mxge_change_pause(mxge_softc_t *sc, int pause)
1005 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1007 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd);
1009 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd);
1011 if_printf(sc->ifp, "Failed to set flow control mode\n");
1019 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1024 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */
1025 if (mxge_always_promisc)
1029 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd);
1031 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd);
1033 if_printf(sc->ifp, "Failed to set promisc mode\n");
1037 mxge_set_multicast_list(mxge_softc_t *sc)
1040 struct ifmultiaddr *ifma;
1041 struct ifnet *ifp = sc->ifp;
1044 /* This firmware is known to not support multicast */
1045 if (!sc->fw_multicast_support)
1048 /* Disable multicast filtering while we play with the lists*/
1049 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1050 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1052 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, "
1053 "error status: %d\n", err);
1057 if (sc->adopted_rx_filter_bug)
1060 if (ifp->if_flags & IFF_ALLMULTI) {
1061 /* Request to disable multicast filtering, so quit here */
1065 /* Flush all the filters */
1066 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1068 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1069 "error status: %d\n", err);
1074 * Walk the multicast list, and add each address
1076 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1077 if (ifma->ifma_addr->sa_family != AF_LINK)
1080 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1082 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1084 cmd.data0 = htonl(cmd.data0);
1085 cmd.data1 = htonl(cmd.data1);
1086 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1088 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1089 "error status: %d\n", err);
1090 /* Abort, leaving multicast filtering off */
1095 /* Enable multicast filtering */
1096 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1098 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, "
1099 "error status: %d\n", err);
1105 mxge_max_mtu(mxge_softc_t *sc)
1110 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1111 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1113 /* try to set nbufs to see if it we can
1114 use virtually contiguous jumbos */
1116 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1119 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1121 /* otherwise, we're limited to MJUMPAGESIZE */
1122 return MJUMPAGESIZE - MXGEFW_PAD;
1127 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1129 struct mxge_slice_state *ss;
1130 mxge_rx_done_t *rx_done;
1131 volatile uint32_t *irq_claim;
1133 int slice, status, rx_intr_size;
1136 * Try to send a reset command to the card to see if it
1139 memset(&cmd, 0, sizeof (cmd));
1140 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1142 if_printf(sc->ifp, "failed reset\n");
1146 mxge_dummy_rdma(sc, 1);
1149 * Set the intrq size
1150 * XXX assume 4byte mcp_slot
1152 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t);
1153 cmd.data0 = rx_intr_size;
1154 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1157 * Even though we already know how many slices are supported
1158 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1159 * has magic side effects, and must be called after a reset.
1160 * It must be called prior to calling any RSS related cmds,
1161 * including assigning an interrupt queue for anything but
1162 * slice 0. It must also be called *after*
1163 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1164 * the firmware to compute offsets.
1166 if (sc->num_slices > 1) {
1167 /* Ask the maximum number of slices it supports */
1168 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
1170 if_printf(sc->ifp, "failed to get number of slices\n");
1175 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1176 * to setting up the interrupt queue DMA
1178 cmd.data0 = sc->num_slices;
1179 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1180 if (sc->num_tx_rings > 1)
1181 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd);
1184 if_printf(sc->ifp, "failed to set number of slices\n");
1189 if (interrupts_setup) {
1190 /* Now exchange information about interrupts */
1191 for (slice = 0; slice < sc->num_slices; slice++) {
1192 ss = &sc->ss[slice];
1194 rx_done = &ss->rx_data.rx_done;
1195 memset(rx_done->entry, 0, rx_intr_size);
1198 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1200 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1202 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA,
1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET,
1209 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1211 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1212 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1214 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1215 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1218 if_printf(sc->ifp, "failed set interrupt parameters\n");
1222 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1224 /* Run a DMA benchmark */
1225 mxge_dma_test(sc, MXGEFW_DMA_TEST);
1227 for (slice = 0; slice < sc->num_slices; slice++) {
1228 ss = &sc->ss[slice];
1230 ss->irq_claim = irq_claim + (2 * slice);
1232 /* Reset mcp/driver shared state back to 0 */
1233 ss->rx_data.rx_done.idx = 0;
1236 ss->tx.pkt_done = 0;
1237 ss->tx.queue_active = 0;
1238 ss->tx.activate = 0;
1239 ss->tx.deactivate = 0;
1240 ss->rx_data.rx_big.cnt = 0;
1241 ss->rx_data.rx_small.cnt = 0;
1242 if (ss->fw_stats != NULL)
1243 bzero(ss->fw_stats, sizeof(*ss->fw_stats));
1245 sc->rdma_tags_available = 15;
1247 status = mxge_update_mac_address(sc);
1248 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1249 mxge_change_pause(sc, sc->pause);
1250 mxge_set_multicast_list(sc);
1253 cmd.data0 = sc->throttle;
1254 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd))
1255 if_printf(sc->ifp, "can't enable throttle\n");
1261 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1266 unsigned int throttle;
1269 throttle = sc->throttle;
1270 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1274 if (throttle == sc->throttle)
1277 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1280 ifnet_serialize_all(sc->ifp);
1282 cmd.data0 = throttle;
1283 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1285 sc->throttle = throttle;
1287 ifnet_deserialize_all(sc->ifp);
1292 mxge_change_use_rss(SYSCTL_HANDLER_ARGS)
1298 use_rss = sc->use_rss;
1299 err = sysctl_handle_int(oidp, &use_rss, arg2, req);
1303 if (use_rss == sc->use_rss)
1306 ifnet_serialize_all(sc->ifp);
1308 sc->use_rss = use_rss;
1309 if (sc->ifp->if_flags & IFF_RUNNING) {
1314 ifnet_deserialize_all(sc->ifp);
1319 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1322 unsigned int intr_coal_delay;
1326 intr_coal_delay = sc->intr_coal_delay;
1327 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1331 if (intr_coal_delay == sc->intr_coal_delay)
1334 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1337 ifnet_serialize_all(sc->ifp);
1339 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1340 sc->intr_coal_delay = intr_coal_delay;
1342 ifnet_deserialize_all(sc->ifp);
1347 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1353 arg2 = be32toh(*(int *)arg1);
1355 err = sysctl_handle_int(oidp, arg1, arg2, req);
1361 mxge_rem_sysctls(mxge_softc_t *sc)
1363 if (sc->ss != NULL) {
1364 struct mxge_slice_state *ss;
1367 for (slice = 0; slice < sc->num_slices; slice++) {
1368 ss = &sc->ss[slice];
1369 if (ss->sysctl_tree != NULL) {
1370 sysctl_ctx_free(&ss->sysctl_ctx);
1371 ss->sysctl_tree = NULL;
1376 if (sc->slice_sysctl_tree != NULL) {
1377 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1378 sc->slice_sysctl_tree = NULL;
1383 mxge_add_sysctls(mxge_softc_t *sc)
1385 struct sysctl_ctx_list *ctx;
1386 struct sysctl_oid_list *children;
1388 struct mxge_slice_state *ss;
1392 ctx = device_get_sysctl_ctx(sc->dev);
1393 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1394 fw = sc->ss[0].fw_stats;
1397 * Random information
1399 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
1400 CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
1402 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number",
1403 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number");
1405 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code",
1406 CTLFLAG_RD, &sc->product_code_string, 0, "product code");
1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width",
1409 CTLFLAG_RD, &sc->link_width, 0, "link width");
1411 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary",
1412 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary");
1414 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine",
1415 CTLFLAG_RD, &sc->wc, 0, "write combining PIO");
1417 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs",
1418 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s");
1420 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs",
1421 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s");
1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs",
1424 CTLFLAG_RD, &sc->read_write_dma, 0,
1425 "DMA concurrent Read/Write speed in MB/s");
1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets",
1428 CTLFLAG_RD, &sc->watchdog_resets, 0,
1429 "Number of times NIC was reset");
1431 if (sc->num_slices > 1) {
1432 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "slice_cpumap",
1433 CTLTYPE_OPAQUE | CTLFLAG_RD, sc->ring_map, 0,
1434 if_ringmap_cpumap_sysctl, "I", "slice CPU map");
1438 * Performance related tunables
1440 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay",
1441 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I",
1442 "Interrupt coalescing delay in usecs");
1444 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle",
1445 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I",
1446 "Transmit throttling");
1448 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss",
1449 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I",
1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait",
1453 CTLFLAG_RW, &mxge_deassert_wait, 0,
1454 "Wait for IRQ line to go low in ihandler");
1457 * Stats block from firmware is in network byte order.
1460 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up",
1461 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0,
1462 mxge_handle_be32, "I", "link up");
1464 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available",
1465 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0,
1466 mxge_handle_be32, "I", "rdma_tags_available");
1468 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32",
1469 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0,
1470 mxge_handle_be32, "I", "dropped_bad_crc32");
1472 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy",
1473 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0,
1474 mxge_handle_be32, "I", "dropped_bad_phy");
1476 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered",
1477 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0,
1478 mxge_handle_be32, "I", "dropped_link_error_or_filtered");
1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow",
1481 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0,
1482 mxge_handle_be32, "I", "dropped_link_overflow");
1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered",
1485 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0,
1486 mxge_handle_be32, "I", "dropped_multicast_filtered");
1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer",
1489 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0,
1490 mxge_handle_be32, "I", "dropped_no_big_buffer");
1492 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer",
1493 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0,
1494 mxge_handle_be32, "I", "dropped_no_small_buffer");
1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun",
1497 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0,
1498 mxge_handle_be32, "I", "dropped_overrun");
1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause",
1501 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0,
1502 mxge_handle_be32, "I", "dropped_pause");
1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt",
1505 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0,
1506 mxge_handle_be32, "I", "dropped_runt");
1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered",
1509 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0,
1510 mxge_handle_be32, "I", "dropped_unicast_filtered");
1512 /* add counters exported for debugging from all slices */
1513 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1514 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx,
1515 children, OID_AUTO, "slice", CTLFLAG_RD, 0, "");
1516 if (sc->slice_sysctl_tree == NULL) {
1517 device_printf(sc->dev, "can't add slice sysctl node\n");
1521 for (slice = 0; slice < sc->num_slices; slice++) {
1522 ss = &sc->ss[slice];
1523 sysctl_ctx_init(&ss->sysctl_ctx);
1524 ctx = &ss->sysctl_ctx;
1525 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1526 ksprintf(slice_num, "%d", slice);
1527 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
1528 slice_num, CTLFLAG_RD, 0, "");
1529 if (ss->sysctl_tree == NULL) {
1530 device_printf(sc->dev,
1531 "can't add %d slice sysctl node\n", slice);
1532 return; /* XXX continue? */
1534 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1537 * XXX change to ULONG
1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt",
1541 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt");
1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt",
1544 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt");
1546 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req",
1547 CTLFLAG_RD, &ss->tx.req, 0, "tx_req");
1549 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done",
1550 CTLFLAG_RD, &ss->tx.done, 0, "tx_done");
1552 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done",
1553 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done");
1555 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active",
1556 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active");
1558 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate",
1559 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate");
1561 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate",
1562 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate");
1567 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1568 * backwards one at a time and handle ring wraps
1570 static __inline void
1571 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1572 mcp_kreq_ether_send_t *src, int cnt)
1574 int idx, starting_slot;
1576 starting_slot = tx->req;
1579 idx = (starting_slot + cnt) & tx->mask;
1580 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
1586 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1587 * at most 32 bytes at a time, so as to avoid involving the software
1588 * pio handler in the nic. We re-write the first segment's flags
1589 * to mark them valid only after writing the entire chain
1591 static __inline void
1592 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1596 volatile uint32_t *dst_ints;
1597 mcp_kreq_ether_send_t *srcp;
1598 volatile mcp_kreq_ether_send_t *dstp, *dst;
1601 idx = tx->req & tx->mask;
1603 last_flags = src->flags;
1606 dst = dstp = &tx->lanai[idx];
1609 if ((idx + cnt) < tx->mask) {
1610 for (i = 0; i < cnt - 1; i += 2) {
1611 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1612 wmb(); /* force write every 32 bytes */
1618 * Submit all but the first request, and ensure
1619 * that it is submitted below
1621 mxge_submit_req_backwards(tx, src, cnt);
1625 /* Submit the first request */
1626 mxge_pio_copy(dstp, srcp, sizeof(*src));
1627 wmb(); /* barrier before setting valid flag */
1630 /* Re-write the last 32-bits with the valid flags */
1631 src->flags = last_flags;
1632 src_ints = (uint32_t *)src;
1634 dst_ints = (volatile uint32_t *)dst;
1636 *dst_ints = *src_ints;
1642 mxge_pullup_tso(struct mbuf **mp)
1644 int hoff, iphlen, thoff;
1648 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1650 iphlen = m->m_pkthdr.csum_iphlen;
1651 thoff = m->m_pkthdr.csum_thlen;
1652 hoff = m->m_pkthdr.csum_lhlen;
1654 KASSERT(iphlen > 0, ("invalid ip hlen"));
1655 KASSERT(thoff > 0, ("invalid tcp hlen"));
1656 KASSERT(hoff > 0, ("invalid ether hlen"));
1658 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1659 m = m_pullup(m, hoff + iphlen + thoff);
1670 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map,
1671 struct mbuf *m, int busdma_seg_cnt)
1673 mcp_kreq_ether_send_t *req;
1674 bus_dma_segment_t *seg;
1675 uint32_t low, high_swapped;
1676 int len, seglen, cum_len, cum_len_next;
1677 int next_is_first, chop, cnt, rdma_count, small;
1678 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1679 uint8_t flags, flags_next;
1680 struct mxge_buffer_state *info_last;
1681 bus_dmamap_t map = info_map->map;
1683 mss = m->m_pkthdr.tso_segsz;
1686 * Negative cum_len signifies to the send loop that we are
1687 * still in the header portion of the TSO packet.
1689 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen +
1690 m->m_pkthdr.csum_thlen);
1693 * TSO implies checksum offload on this hardware
1695 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1696 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1699 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1700 * out where to put the checksum by parsing the header.
1702 pseudo_hdr_offset = htobe16(mss);
1710 * "rdma_count" is the number of RDMAs belonging to the current
1711 * packet BEFORE the current send request. For non-TSO packets,
1712 * this is equal to "count".
1714 * For TSO packets, rdma_count needs to be reset to 0 after a
1717 * The rdma_count field of the send request is the number of
1718 * RDMAs of the packet starting at that request. For TSO send
1719 * requests with one ore more cuts in the middle, this is the
1720 * number of RDMAs starting after the last cut in the request.
1721 * All previous segments before the last cut implicitly have 1
1724 * Since the number of RDMAs is not known beforehand, it must be
1725 * filled-in retroactively - after each segmentation cut or at
1726 * the end of the entire packet.
1729 while (busdma_seg_cnt) {
1731 * Break the busdma segment up into pieces
1733 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1734 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1738 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1740 cum_len_next = cum_len + seglen;
1741 (req - rdma_count)->rdma_count = rdma_count + 1;
1742 if (__predict_true(cum_len >= 0)) {
1744 chop = (cum_len_next > mss);
1745 cum_len_next = cum_len_next % mss;
1746 next_is_first = (cum_len_next == 0);
1747 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1749 next_is_first * MXGEFW_FLAGS_FIRST;
1750 rdma_count |= -(chop | next_is_first);
1751 rdma_count += chop & !next_is_first;
1752 } else if (cum_len_next >= 0) {
1757 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1758 flags_next = MXGEFW_FLAGS_TSO_PLD |
1759 MXGEFW_FLAGS_FIRST |
1760 (small * MXGEFW_FLAGS_SMALL);
1763 req->addr_high = high_swapped;
1764 req->addr_low = htobe32(low);
1765 req->pseudo_hdr_offset = pseudo_hdr_offset;
1767 req->rdma_count = 1;
1768 req->length = htobe16(seglen);
1769 req->cksum_offset = cksum_offset;
1771 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1774 cum_len = cum_len_next;
1779 if (__predict_false(cksum_offset > seglen))
1780 cksum_offset -= seglen;
1783 if (__predict_false(cnt > tx->max_desc))
1789 (req - rdma_count)->rdma_count = rdma_count;
1793 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1794 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1796 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1798 info_map->map = info_last->map;
1799 info_last->map = map;
1802 mxge_submit_req(tx, tx->req_list, cnt);
1804 if (tx->send_go != NULL && tx->queue_active == 0) {
1805 /* Tell the NIC to start polling this slice */
1807 tx->queue_active = 1;
1814 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1820 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad)
1822 mcp_kreq_ether_send_t *req;
1823 bus_dma_segment_t *seg;
1825 int cnt, cum_len, err, i, idx, odd_flag;
1826 uint16_t pseudo_hdr_offset;
1827 uint8_t flags, cksum_offset;
1828 struct mxge_buffer_state *info_map, *info_last;
1830 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1831 err = mxge_pullup_tso(&m);
1832 if (__predict_false(err))
1837 * Map the frame for DMA
1839 idx = tx->req & tx->mask;
1840 info_map = &tx->info[idx];
1841 map = info_map->map;
1843 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m,
1844 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT);
1845 if (__predict_false(err != 0))
1847 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE);
1850 * TSO is different enough, we handle it in another routine
1852 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1853 return mxge_encap_tso(tx, info_map, m, cnt);
1857 pseudo_hdr_offset = 0;
1858 flags = MXGEFW_FLAGS_NO_TSO;
1861 * Checksum offloading
1863 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1864 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1865 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1866 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1867 req->cksum_offset = cksum_offset;
1868 flags |= MXGEFW_FLAGS_CKSUM;
1869 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1873 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1874 flags |= MXGEFW_FLAGS_SMALL;
1877 * Convert segments into a request list
1881 req->flags = MXGEFW_FLAGS_FIRST;
1882 for (i = 0; i < cnt; i++) {
1883 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1884 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1885 req->length = htobe16(seg->ds_len);
1886 req->cksum_offset = cksum_offset;
1887 if (cksum_offset > seg->ds_len)
1888 cksum_offset -= seg->ds_len;
1891 req->pseudo_hdr_offset = pseudo_hdr_offset;
1892 req->pad = 0; /* complete solid 16-byte block */
1893 req->rdma_count = 1;
1894 req->flags |= flags | ((cum_len & 1) * odd_flag);
1895 cum_len += seg->ds_len;
1903 * Pad runt to 60 bytes
1907 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad));
1908 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad));
1909 req->length = htobe16(60 - cum_len);
1910 req->cksum_offset = 0;
1911 req->pseudo_hdr_offset = pseudo_hdr_offset;
1912 req->pad = 0; /* complete solid 16-byte block */
1913 req->rdma_count = 1;
1914 req->flags |= flags | ((cum_len & 1) * odd_flag);
1918 tx->req_list[0].rdma_count = cnt;
1920 /* print what the firmware will see */
1921 for (i = 0; i < cnt; i++) {
1922 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1923 "cso:%d, flags:0x%x, rdma:%d\n",
1924 i, (int)ntohl(tx->req_list[i].addr_high),
1925 (int)ntohl(tx->req_list[i].addr_low),
1926 (int)ntohs(tx->req_list[i].length),
1927 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1928 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1929 tx->req_list[i].rdma_count);
1931 kprintf("--------------\n");
1933 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1935 info_map->map = info_last->map;
1936 info_last->map = map;
1939 mxge_submit_req(tx, tx->req_list, cnt);
1941 if (tx->send_go != NULL && tx->queue_active == 0) {
1942 /* Tell the NIC to start polling this slice */
1944 tx->queue_active = 1;
1956 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1958 mxge_softc_t *sc = ifp->if_softc;
1959 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1963 KKASSERT(tx->ifsq == ifsq);
1964 ASSERT_SERIALIZED(&tx->tx_serialize);
1966 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
1969 zeropad = sc->zeropad_dma.dmem_busaddr;
1970 while (tx->mask - (tx->req - tx->done) > tx->max_desc) {
1974 m = ifsq_dequeue(ifsq);
1979 error = mxge_encap(tx, m, zeropad);
1983 IFNET_STAT_INC(ifp, oerrors, 1);
1986 /* Ran out of transmit slots */
1987 ifsq_set_oactive(ifsq);
1990 tx->watchdog.wd_timer = 5;
1994 mxge_watchdog(struct ifaltq_subque *ifsq)
1996 struct ifnet *ifp = ifsq_get_ifp(ifsq);
1997 struct mxge_softc *sc = ifp->if_softc;
1998 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
1999 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
2001 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2003 /* Check for pause blocking before resetting */
2004 if (tx->watchdog_rx_pause == rx_pause) {
2005 mxge_warn_stuck(sc, tx, 0);
2006 mxge_watchdog_reset(sc);
2009 if_printf(ifp, "Flow control blocking xmits, "
2010 "check link partner\n");
2012 tx->watchdog_rx_pause = rx_pause;
2016 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2017 * at most 32 bytes at a time, so as to avoid involving the software
2018 * pio handler in the nic. We re-write the first segment's low
2019 * DMA address to mark it valid only after we write the entire chunk
2022 static __inline void
2023 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2024 mcp_kreq_ether_recv_t *src)
2028 low = src->addr_low;
2029 src->addr_low = 0xffffffff;
2030 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2032 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2034 src->addr_low = low;
2035 dst->addr_low = low;
2040 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2043 bus_dma_segment_t seg;
2045 int cnt, err, mflag;
2048 if (__predict_false(init))
2051 m = m_gethdr(mflag, MT_DATA);
2054 if (__predict_false(init)) {
2056 * During initialization, there
2057 * is nothing to setup; bail out
2063 m->m_len = m->m_pkthdr.len = MHLEN;
2065 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2066 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2069 if (__predict_false(init)) {
2071 * During initialization, there
2072 * is nothing to setup; bail out
2079 rx->info[idx].m = m;
2080 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2081 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2085 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2090 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2093 bus_dma_segment_t seg;
2095 int cnt, err, mflag;
2098 if (__predict_false(init))
2101 if (rx->cl_size == MCLBYTES)
2102 m = m_getcl(mflag, MT_DATA, M_PKTHDR);
2104 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
2107 if (__predict_false(init)) {
2109 * During initialization, there
2110 * is nothing to setup; bail out
2116 m->m_len = m->m_pkthdr.len = rx->cl_size;
2118 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2119 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2122 if (__predict_false(init)) {
2124 * During initialization, there
2125 * is nothing to setup; bail out
2132 rx->info[idx].m = m;
2133 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2134 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2138 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2143 * Myri10GE hardware checksums are not valid if the sender
2144 * padded the frame with non-zero padding. This is because
2145 * the firmware just does a simple 16-bit 1s complement
2146 * checksum across the entire frame, excluding the first 14
2147 * bytes. It is best to simply to check the checksum and
2148 * tell the stack about it only if the checksum is good
2150 static __inline uint16_t
2151 mxge_rx_csum(struct mbuf *m, int csum)
2153 const struct ether_header *eh;
2154 const struct ip *ip;
2157 eh = mtod(m, const struct ether_header *);
2159 /* Only deal with IPv4 TCP & UDP for now */
2160 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2163 ip = (const struct ip *)(eh + 1);
2164 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP))
2168 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2169 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2170 - (ip->ip_hl << 2) + ip->ip_p));
2179 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2181 struct ether_vlan_header *evl;
2184 evl = mtod(m, struct ether_vlan_header *);
2187 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2188 * what the firmware thought was the end of the ethernet
2192 /* Put checksum into host byte order */
2193 *csum = ntohs(*csum);
2195 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2197 *csum += ((*csum) < ~partial);
2198 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2199 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2202 * Restore checksum to network byte order;
2203 * later consumers expect this
2205 *csum = htons(*csum);
2208 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2209 m->m_flags |= M_VLANTAG;
2212 * Remove the 802.1q header by copying the Ethernet
2213 * addresses over it and adjusting the beginning of
2214 * the data in the mbuf. The encapsulated Ethernet
2215 * type field is already in place.
2217 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2218 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2219 m_adj(m, EVL_ENCAPLEN);
2223 static __inline void
2224 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx,
2225 uint32_t len, uint32_t csum)
2228 const struct ether_header *eh;
2229 bus_dmamap_t old_map;
2232 idx = rx->cnt & rx->mask;
2235 /* Save a pointer to the received mbuf */
2236 m = rx->info[idx].m;
2238 /* Try to replace the received mbuf */
2239 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) {
2240 /* Drop the frame -- the old mbuf is re-cycled */
2241 IFNET_STAT_INC(ifp, ierrors, 1);
2245 /* Unmap the received buffer */
2246 old_map = rx->info[idx].map;
2247 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2248 bus_dmamap_unload(rx->dmat, old_map);
2250 /* Swap the bus_dmamap_t's */
2251 rx->info[idx].map = rx->extra_map;
2252 rx->extra_map = old_map;
2255 * mcp implicitly skips 1st 2 bytes so that packet is properly
2258 m->m_data += MXGEFW_PAD;
2260 m->m_pkthdr.rcvif = ifp;
2261 m->m_len = m->m_pkthdr.len = len;
2263 IFNET_STAT_INC(ifp, ipackets, 1);
2265 eh = mtod(m, const struct ether_header *);
2266 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2267 mxge_vlan_tag_remove(m, &csum);
2269 /* If the checksum is valid, mark it in the mbuf header */
2270 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2271 mxge_rx_csum(m, csum) == 0) {
2272 /* Tell the stack that the checksum is good */
2273 m->m_pkthdr.csum_data = 0xffff;
2274 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2277 ifp->if_input(ifp, m, NULL, -1);
2280 static __inline void
2281 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx,
2282 uint32_t len, uint32_t csum)
2284 const struct ether_header *eh;
2286 bus_dmamap_t old_map;
2289 idx = rx->cnt & rx->mask;
2292 /* Save a pointer to the received mbuf */
2293 m = rx->info[idx].m;
2295 /* Try to replace the received mbuf */
2296 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) {
2297 /* Drop the frame -- the old mbuf is re-cycled */
2298 IFNET_STAT_INC(ifp, ierrors, 1);
2302 /* Unmap the received buffer */
2303 old_map = rx->info[idx].map;
2304 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2305 bus_dmamap_unload(rx->dmat, old_map);
2307 /* Swap the bus_dmamap_t's */
2308 rx->info[idx].map = rx->extra_map;
2309 rx->extra_map = old_map;
2312 * mcp implicitly skips 1st 2 bytes so that packet is properly
2315 m->m_data += MXGEFW_PAD;
2317 m->m_pkthdr.rcvif = ifp;
2318 m->m_len = m->m_pkthdr.len = len;
2320 IFNET_STAT_INC(ifp, ipackets, 1);
2322 eh = mtod(m, const struct ether_header *);
2323 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2324 mxge_vlan_tag_remove(m, &csum);
2326 /* If the checksum is valid, mark it in the mbuf header */
2327 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2328 mxge_rx_csum(m, csum) == 0) {
2329 /* Tell the stack that the checksum is good */
2330 m->m_pkthdr.csum_data = 0xffff;
2331 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2334 ifp->if_input(ifp, m, NULL, -1);
2337 static __inline void
2338 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle)
2340 mxge_rx_done_t *rx_done = &rx_data->rx_done;
2342 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) {
2343 uint16_t length, checksum;
2345 length = ntohs(rx_done->entry[rx_done->idx].length);
2346 rx_done->entry[rx_done->idx].length = 0;
2348 checksum = rx_done->entry[rx_done->idx].checksum;
2350 if (length <= MXGE_RX_SMALL_BUFLEN) {
2351 mxge_rx_done_small(ifp, &rx_data->rx_small,
2354 mxge_rx_done_big(ifp, &rx_data->rx_big,
2359 rx_done->idx &= rx_done->mask;
2364 static __inline void
2365 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx)
2367 ASSERT_SERIALIZED(&tx->tx_serialize);
2369 while (tx->pkt_done != mcp_idx) {
2373 idx = tx->done & tx->mask;
2376 m = tx->info[idx].m;
2378 * mbuf and DMA map only attached to the first
2383 IFNET_STAT_INC(ifp, opackets, 1);
2384 tx->info[idx].m = NULL;
2385 bus_dmamap_unload(tx->dmat, tx->info[idx].map);
2391 * If we have space, clear OACTIVE to tell the stack that
2392 * its OK to send packets
2394 if (tx->req - tx->done < (tx->mask + 1) / 2) {
2395 ifsq_clr_oactive(tx->ifsq);
2396 if (tx->req == tx->done) {
2397 /* Reset watchdog */
2398 tx->watchdog.wd_timer = 0;
2402 if (!ifsq_is_empty(tx->ifsq))
2403 ifsq_devstart(tx->ifsq);
2405 if (tx->send_stop != NULL && tx->req == tx->done) {
2407 * Let the NIC stop polling this queue, since there
2408 * are no more transmits pending
2411 tx->queue_active = 0;
2417 static struct mxge_media_type mxge_xfp_media_types[] = {
2418 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2419 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2420 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2421 {IFM_NONE, (1 << 5), "10GBASE-ER"},
2422 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2423 {IFM_NONE, (1 << 3), "10GBASE-SW"},
2424 {IFM_NONE, (1 << 2), "10GBASE-LW"},
2425 {IFM_NONE, (1 << 1), "10GBASE-EW"},
2426 {IFM_NONE, (1 << 0), "Reserved"}
2429 static struct mxge_media_type mxge_sfp_media_types[] = {
2430 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2431 {IFM_NONE, (1 << 7), "Reserved"},
2432 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2433 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2434 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2435 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2439 mxge_media_set(mxge_softc_t *sc, int media_type)
2443 if (media_type == IFM_NONE)
2447 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
2449 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL);
2450 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt);
2452 sc->current_media = media_type;
2456 mxge_media_unset(mxge_softc_t *sc)
2458 ifmedia_removeall(&sc->media);
2459 sc->current_media = IFM_NONE;
2463 mxge_media_init(mxge_softc_t *sc)
2468 mxge_media_unset(sc);
2471 * Parse the product code to deterimine the interface type
2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2473 * after the 3rd dash in the driver's cached copy of the
2474 * EEPROM's product code string.
2476 ptr = sc->product_code_string;
2478 if_printf(sc->ifp, "Missing product code\n");
2482 for (i = 0; i < 3; i++, ptr++) {
2483 ptr = strchr(ptr, '-');
2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i);
2489 if (*ptr == 'C' || *(ptr +1) == 'C') {
2491 sc->connector = MXGE_CX4;
2492 mxge_media_set(sc, IFM_10G_CX4);
2493 } else if (*ptr == 'Q') {
2494 /* -Q is Quad Ribbon Fiber */
2495 sc->connector = MXGE_QRF;
2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n");
2497 /* DragonFly has no media type for Quad ribbon fiber */
2498 } else if (*ptr == 'R') {
2500 sc->connector = MXGE_XFP;
2501 /* NOTE: ifmedia will be installed later */
2502 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2503 /* -S or -2S is SFP+ */
2504 sc->connector = MXGE_SFP;
2505 /* NOTE: ifmedia will be installed later */
2507 sc->connector = MXGE_UNK;
2508 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr);
2513 * Determine the media type for a NIC. Some XFPs will identify
2514 * themselves only when their link is up, so this is initiated via a
2515 * link up interrupt. However, this can potentially take up to
2516 * several milliseconds, so it is run via the watchdog routine, rather
2517 * than in the interrupt handler itself.
2520 mxge_media_probe(mxge_softc_t *sc)
2523 const char *cage_type;
2524 struct mxge_media_type *mxge_media_types = NULL;
2525 int i, err, ms, mxge_media_type_entries;
2528 sc->need_media_probe = 0;
2530 if (sc->connector == MXGE_XFP) {
2532 mxge_media_types = mxge_xfp_media_types;
2533 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
2534 byte = MXGE_XFP_COMPLIANCE_BYTE;
2536 } else if (sc->connector == MXGE_SFP) {
2537 /* -S or -2S is SFP+ */
2538 mxge_media_types = mxge_sfp_media_types;
2539 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
2543 /* nothing to do; media type cannot change */
2548 * At this point we know the NIC has an XFP cage, so now we
2549 * try to determine what is in the cage by using the
2550 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2551 * register. We read just one byte, which may take over
2555 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
2556 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2558 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2559 if (err != MXGEFW_CMD_OK) {
2560 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE)
2561 if_printf(sc->ifp, "failed to read XFP\n");
2562 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT)
2563 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n");
2565 if_printf(sc->ifp, "I2C read failed, err: %d", err);
2566 mxge_media_unset(sc);
2570 /* Now we wait for the data to be cached */
2572 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2573 for (ms = 0; err == EBUSY && ms < 50; ms++) {
2576 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2578 if (err != MXGEFW_CMD_OK) {
2579 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n",
2580 cage_type, err, ms);
2581 mxge_media_unset(sc);
2585 if (cmd.data0 == mxge_media_types[0].bitmask) {
2587 if_printf(sc->ifp, "%s:%s\n", cage_type,
2588 mxge_media_types[0].name);
2590 if (sc->current_media != mxge_media_types[0].flag) {
2591 mxge_media_unset(sc);
2592 mxge_media_set(sc, mxge_media_types[0].flag);
2596 for (i = 1; i < mxge_media_type_entries; i++) {
2597 if (cmd.data0 & mxge_media_types[i].bitmask) {
2599 if_printf(sc->ifp, "%s:%s\n", cage_type,
2600 mxge_media_types[i].name);
2603 if (sc->current_media != mxge_media_types[i].flag) {
2604 mxge_media_unset(sc);
2605 mxge_media_set(sc, mxge_media_types[i].flag);
2610 mxge_media_unset(sc);
2612 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type,
2618 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats)
2620 if (sc->link_state != stats->link_up) {
2621 sc->link_state = stats->link_up;
2622 if (sc->link_state) {
2623 sc->ifp->if_link_state = LINK_STATE_UP;
2624 if_link_state_change(sc->ifp);
2626 if_printf(sc->ifp, "link up\n");
2628 sc->ifp->if_link_state = LINK_STATE_DOWN;
2629 if_link_state_change(sc->ifp);
2631 if_printf(sc->ifp, "link down\n");
2633 sc->need_media_probe = 1;
2636 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) {
2637 sc->rdma_tags_available = be32toh(stats->rdma_tags_available);
2638 if_printf(sc->ifp, "RDMA timed out! %d tags left\n",
2639 sc->rdma_tags_available);
2642 if (stats->link_down) {
2643 sc->down_cnt += stats->link_down;
2645 sc->ifp->if_link_state = LINK_STATE_DOWN;
2646 if_link_state_change(sc->ifp);
2651 mxge_serialize_skipmain(struct mxge_softc *sc)
2653 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
2657 mxge_deserialize_skipmain(struct mxge_softc *sc)
2659 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
2663 mxge_legacy(void *arg)
2665 struct mxge_slice_state *ss = arg;
2666 mxge_softc_t *sc = ss->sc;
2667 mcp_irq_data_t *stats = ss->fw_stats;
2668 mxge_tx_ring_t *tx = &ss->tx;
2669 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2670 uint32_t send_done_count;
2673 ASSERT_SERIALIZED(&sc->main_serialize);
2675 /* Make sure the DMA has finished */
2678 valid = stats->valid;
2680 /* Lower legacy IRQ */
2681 *sc->irq_deassert = 0;
2682 if (!mxge_deassert_wait) {
2683 /* Don't wait for conf. that irq is low */
2687 mxge_serialize_skipmain(sc);
2690 * Loop while waiting for legacy irq deassertion
2691 * XXX do we really want to loop?
2694 /* Check for transmit completes and receives */
2695 send_done_count = be32toh(stats->send_done_count);
2696 while ((send_done_count != tx->pkt_done) ||
2697 (rx_done->entry[rx_done->idx].length != 0)) {
2698 if (send_done_count != tx->pkt_done) {
2699 mxge_tx_done(&sc->arpcom.ac_if, tx,
2700 (int)send_done_count);
2702 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2703 send_done_count = be32toh(stats->send_done_count);
2705 if (mxge_deassert_wait)
2707 } while (*((volatile uint8_t *)&stats->valid));
2709 mxge_deserialize_skipmain(sc);
2711 /* Fw link & error stats meaningful only on the first slice */
2712 if (__predict_false(stats->stats_updated))
2713 mxge_intr_status(sc, stats);
2715 /* Check to see if we have rx token to pass back */
2717 *ss->irq_claim = be32toh(3);
2718 *(ss->irq_claim + 1) = be32toh(3);
2724 struct mxge_slice_state *ss = arg;
2725 mxge_softc_t *sc = ss->sc;
2726 mcp_irq_data_t *stats = ss->fw_stats;
2727 mxge_tx_ring_t *tx = &ss->tx;
2728 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2729 uint32_t send_done_count;
2731 #ifndef IFPOLL_ENABLE
2732 const boolean_t polling = FALSE;
2734 boolean_t polling = FALSE;
2737 ASSERT_SERIALIZED(&sc->main_serialize);
2739 /* Make sure the DMA has finished */
2740 if (__predict_false(!stats->valid))
2743 valid = stats->valid;
2746 #ifdef IFPOLL_ENABLE
2747 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2752 /* Check for receives */
2753 lwkt_serialize_enter(&ss->rx_data.rx_serialize);
2754 if (rx_done->entry[rx_done->idx].length != 0)
2755 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2756 lwkt_serialize_exit(&ss->rx_data.rx_serialize);
2760 * Check for transmit completes
2763 * Since pkt_done is only changed by mxge_tx_done(),
2764 * which is called only in interrupt handler, the
2765 * check w/o holding tx serializer is MPSAFE.
2767 send_done_count = be32toh(stats->send_done_count);
2768 if (send_done_count != tx->pkt_done) {
2769 lwkt_serialize_enter(&tx->tx_serialize);
2770 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2771 lwkt_serialize_exit(&tx->tx_serialize);
2774 if (__predict_false(stats->stats_updated))
2775 mxge_intr_status(sc, stats);
2777 /* Check to see if we have rx token to pass back */
2778 if (!polling && (valid & 0x1))
2779 *ss->irq_claim = be32toh(3);
2780 *(ss->irq_claim + 1) = be32toh(3);
2784 mxge_msix_rx(void *arg)
2786 struct mxge_slice_state *ss = arg;
2787 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2789 #ifdef IFPOLL_ENABLE
2790 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2794 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2796 if (rx_done->entry[rx_done->idx].length != 0)
2797 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1);
2799 *ss->irq_claim = be32toh(3);
2803 mxge_msix_rxtx(void *arg)
2805 struct mxge_slice_state *ss = arg;
2806 mxge_softc_t *sc = ss->sc;
2807 mcp_irq_data_t *stats = ss->fw_stats;
2808 mxge_tx_ring_t *tx = &ss->tx;
2809 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2810 uint32_t send_done_count;
2812 #ifndef IFPOLL_ENABLE
2813 const boolean_t polling = FALSE;
2815 boolean_t polling = FALSE;
2818 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2820 /* Make sure the DMA has finished */
2821 if (__predict_false(!stats->valid))
2824 valid = stats->valid;
2827 #ifdef IFPOLL_ENABLE
2828 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2832 /* Check for receives */
2833 if (!polling && rx_done->entry[rx_done->idx].length != 0)
2834 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2837 * Check for transmit completes
2840 * Since pkt_done is only changed by mxge_tx_done(),
2841 * which is called only in interrupt handler, the
2842 * check w/o holding tx serializer is MPSAFE.
2844 send_done_count = be32toh(stats->send_done_count);
2845 if (send_done_count != tx->pkt_done) {
2846 lwkt_serialize_enter(&tx->tx_serialize);
2847 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2848 lwkt_serialize_exit(&tx->tx_serialize);
2851 /* Check to see if we have rx token to pass back */
2852 if (!polling && (valid & 0x1))
2853 *ss->irq_claim = be32toh(3);
2854 *(ss->irq_claim + 1) = be32toh(3);
2858 mxge_init(void *arg)
2860 struct mxge_softc *sc = arg;
2862 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp);
2863 if ((sc->ifp->if_flags & IFF_RUNNING) == 0)
2868 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2872 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2873 if (ss->rx_data.rx_big.info[i].m == NULL)
2875 bus_dmamap_unload(ss->rx_data.rx_big.dmat,
2876 ss->rx_data.rx_big.info[i].map);
2877 m_freem(ss->rx_data.rx_big.info[i].m);
2878 ss->rx_data.rx_big.info[i].m = NULL;
2881 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2882 if (ss->rx_data.rx_small.info[i].m == NULL)
2884 bus_dmamap_unload(ss->rx_data.rx_small.dmat,
2885 ss->rx_data.rx_small.info[i].map);
2886 m_freem(ss->rx_data.rx_small.info[i].m);
2887 ss->rx_data.rx_small.info[i].m = NULL;
2890 /* Transmit ring used only on the first slice */
2891 if (ss->tx.info == NULL)
2894 for (i = 0; i <= ss->tx.mask; i++) {
2895 if (ss->tx.info[i].m == NULL)
2897 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map);
2898 m_freem(ss->tx.info[i].m);
2899 ss->tx.info[i].m = NULL;
2904 mxge_free_mbufs(mxge_softc_t *sc)
2908 for (slice = 0; slice < sc->num_slices; slice++)
2909 mxge_free_slice_mbufs(&sc->ss[slice]);
2913 mxge_free_slice_rings(struct mxge_slice_state *ss)
2917 if (ss->rx_data.rx_done.entry != NULL) {
2918 mxge_dma_free(&ss->rx_done_dma);
2919 ss->rx_data.rx_done.entry = NULL;
2922 if (ss->tx.req_list != NULL) {
2923 kfree(ss->tx.req_list, M_DEVBUF);
2924 ss->tx.req_list = NULL;
2927 if (ss->tx.seg_list != NULL) {
2928 kfree(ss->tx.seg_list, M_DEVBUF);
2929 ss->tx.seg_list = NULL;
2932 if (ss->rx_data.rx_small.shadow != NULL) {
2933 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF);
2934 ss->rx_data.rx_small.shadow = NULL;
2937 if (ss->rx_data.rx_big.shadow != NULL) {
2938 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF);
2939 ss->rx_data.rx_big.shadow = NULL;
2942 if (ss->tx.info != NULL) {
2943 if (ss->tx.dmat != NULL) {
2944 for (i = 0; i <= ss->tx.mask; i++) {
2945 bus_dmamap_destroy(ss->tx.dmat,
2946 ss->tx.info[i].map);
2948 bus_dma_tag_destroy(ss->tx.dmat);
2950 kfree(ss->tx.info, M_DEVBUF);
2954 if (ss->rx_data.rx_small.info != NULL) {
2955 if (ss->rx_data.rx_small.dmat != NULL) {
2956 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2957 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2958 ss->rx_data.rx_small.info[i].map);
2960 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2961 ss->rx_data.rx_small.extra_map);
2962 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
2964 kfree(ss->rx_data.rx_small.info, M_DEVBUF);
2965 ss->rx_data.rx_small.info = NULL;
2968 if (ss->rx_data.rx_big.info != NULL) {
2969 if (ss->rx_data.rx_big.dmat != NULL) {
2970 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2971 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2972 ss->rx_data.rx_big.info[i].map);
2974 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2975 ss->rx_data.rx_big.extra_map);
2976 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
2978 kfree(ss->rx_data.rx_big.info, M_DEVBUF);
2979 ss->rx_data.rx_big.info = NULL;
2984 mxge_free_rings(mxge_softc_t *sc)
2991 for (slice = 0; slice < sc->num_slices; slice++)
2992 mxge_free_slice_rings(&sc->ss[slice]);
2996 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2997 int tx_ring_entries)
2999 mxge_softc_t *sc = ss->sc;
3004 * Allocate per-slice receive resources
3007 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask =
3008 rx_ring_entries - 1;
3009 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1;
3011 /* Allocate the rx shadow rings */
3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow);
3013 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow);
3016 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3018 /* Allocate the rx host info rings */
3019 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info);
3020 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3022 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info);
3023 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3025 /* Allocate the rx busdma resources */
3026 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3028 4096, /* boundary */
3029 BUS_SPACE_MAXADDR, /* low */
3030 BUS_SPACE_MAXADDR, /* high */
3031 NULL, NULL, /* filter */
3032 MHLEN, /* maxsize */
3034 MHLEN, /* maxsegsize */
3035 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3037 &ss->rx_data.rx_small.dmat); /* tag */
3039 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3044 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK,
3045 &ss->rx_data.rx_small.extra_map);
3047 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err);
3048 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3049 ss->rx_data.rx_small.dmat = NULL;
3052 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3053 err = bus_dmamap_create(ss->rx_data.rx_small.dmat,
3054 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map);
3058 device_printf(sc->dev, "Err %d rx_small dmamap\n", err);
3060 for (j = 0; j < i; ++j) {
3061 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3062 ss->rx_data.rx_small.info[j].map);
3064 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3065 ss->rx_data.rx_small.extra_map);
3066 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3067 ss->rx_data.rx_small.dmat = NULL;
3072 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3074 4096, /* boundary */
3075 BUS_SPACE_MAXADDR, /* low */
3076 BUS_SPACE_MAXADDR, /* high */
3077 NULL, NULL, /* filter */
3080 4096, /* maxsegsize*/
3081 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3083 &ss->rx_data.rx_big.dmat); /* tag */
3085 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3090 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3091 &ss->rx_data.rx_big.extra_map);
3093 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err);
3094 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3095 ss->rx_data.rx_big.dmat = NULL;
3098 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3099 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3100 &ss->rx_data.rx_big.info[i].map);
3104 device_printf(sc->dev, "Err %d rx_big dmamap\n", err);
3105 for (j = 0; j < i; ++j) {
3106 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3107 ss->rx_data.rx_big.info[j].map);
3109 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3110 ss->rx_data.rx_big.extra_map);
3111 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3112 ss->rx_data.rx_big.dmat = NULL;
3118 * Now allocate TX resources
3121 ss->tx.mask = tx_ring_entries - 1;
3122 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3125 * Allocate the tx request copy block; MUST be at least 8 bytes
3128 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4);
3129 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes),
3130 M_DEVBUF, M_WAITOK);
3132 /* Allocate the tx busdma segment list */
3133 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc;
3134 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3136 /* Allocate the tx host info ring */
3137 bytes = tx_ring_entries * sizeof(*ss->tx.info);
3138 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3140 /* Allocate the tx busdma resources */
3141 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3143 sc->tx_boundary, /* boundary */
3144 BUS_SPACE_MAXADDR, /* low */
3145 BUS_SPACE_MAXADDR, /* high */
3146 NULL, NULL, /* filter */
3148 sizeof(struct ether_vlan_header),
3150 ss->tx.max_desc - 2, /* num segs */
3151 sc->tx_boundary, /* maxsegsz */
3152 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
3153 BUS_DMA_ONEBPAGE, /* flags */
3154 &ss->tx.dmat); /* tag */
3156 device_printf(sc->dev, "Err %d allocating tx dmat\n", err);
3161 * Now use these tags to setup DMA maps for each slot in the ring
3163 for (i = 0; i <= ss->tx.mask; i++) {
3164 err = bus_dmamap_create(ss->tx.dmat,
3165 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map);
3169 device_printf(sc->dev, "Err %d tx dmamap\n", err);
3170 for (j = 0; j < i; ++j) {
3171 bus_dmamap_destroy(ss->tx.dmat,
3172 ss->tx.info[j].map);
3174 bus_dma_tag_destroy(ss->tx.dmat);
3183 mxge_alloc_rings(mxge_softc_t *sc)
3187 int tx_ring_entries, rx_ring_entries;
3190 /* Get ring sizes */
3191 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3193 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3196 tx_ring_size = cmd.data0;
3198 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t);
3199 rx_ring_entries = sc->rx_intr_slots / 2;
3202 device_printf(sc->dev, "tx desc %d, rx desc %d\n",
3203 tx_ring_entries, rx_ring_entries);
3206 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices;
3207 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters;
3209 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3210 ifq_set_ready(&sc->ifp->if_snd);
3211 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings);
3213 if (sc->num_tx_rings > 1) {
3214 sc->ifp->if_mapsubq = ifq_mapsubq_modulo;
3215 ifq_set_subq_divisor(&sc->ifp->if_snd, sc->num_tx_rings);
3218 for (slice = 0; slice < sc->num_slices; slice++) {
3219 err = mxge_alloc_slice_rings(&sc->ss[slice],
3220 rx_ring_entries, tx_ring_entries);
3222 device_printf(sc->dev,
3223 "alloc %d slice rings failed\n", slice);
3231 mxge_choose_params(int mtu, int *cl_size)
3233 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3235 if (bufsize < MCLBYTES) {
3236 *cl_size = MCLBYTES;
3238 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu));
3239 *cl_size = MJUMPAGESIZE;
3244 mxge_slice_open(struct mxge_slice_state *ss, int cl_size)
3249 slice = ss - ss->sc->ss;
3252 * Get the lanai pointers to the send and receive rings
3256 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
3257 if (ss->sc->num_tx_rings == 1) {
3260 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET,
3262 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3263 (ss->sc->sram + cmd.data0);
3264 /* Leave send_go and send_stop as NULL */
3268 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3269 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3270 (ss->sc->sram + cmd.data0);
3271 ss->tx.send_go = (volatile uint32_t *)
3272 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3273 ss->tx.send_stop = (volatile uint32_t *)
3274 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3278 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3279 ss->rx_data.rx_small.lanai =
3280 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3283 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3284 ss->rx_data.rx_big.lanai =
3285 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3288 if_printf(ss->sc->ifp,
3289 "failed to get ring sizes or locations\n");
3294 * Stock small receive ring
3296 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3297 err = mxge_get_buf_small(&ss->rx_data.rx_small,
3298 ss->rx_data.rx_small.info[i].map, i, TRUE);
3300 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i,
3301 ss->rx_data.rx_small.mask + 1);
3307 * Stock big receive ring
3309 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3310 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff;
3311 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff;
3314 ss->rx_data.rx_big.cl_size = cl_size;
3316 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3317 err = mxge_get_buf_big(&ss->rx_data.rx_big,
3318 ss->rx_data.rx_big.info[i].map, i, TRUE);
3320 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i,
3321 ss->rx_data.rx_big.mask + 1);
3329 mxge_open(mxge_softc_t *sc)
3331 struct ifnet *ifp = sc->ifp;
3333 int err, slice, cl_size, i;
3335 volatile uint8_t *itable;
3336 struct mxge_slice_state *ss;
3338 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3340 /* Copy the MAC address in case it was overridden */
3341 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN);
3343 err = mxge_reset(sc, 1);
3345 if_printf(ifp, "failed to reset\n");
3349 if (sc->num_slices > 1) {
3351 * Setup the indirect table.
3353 if_ringmap_rdrtable(sc->ring_map, sc->rdr_table, NETISR_CPUMAX);
3355 cmd.data0 = NETISR_CPUMAX;
3356 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd);
3358 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
3360 if_printf(ifp, "failed to setup rss tables\n");
3364 itable = sc->sram + cmd.data0;
3365 for (i = 0; i < NETISR_CPUMAX; i++)
3366 itable[i] = sc->rdr_table[i];
3369 volatile uint8_t *hwkey;
3370 uint8_t swkey[MXGE_HWRSS_KEYLEN];
3373 * Setup Toeplitz key.
3375 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
3378 if_printf(ifp, "failed to get rsskey\n");
3381 hwkey = sc->sram + cmd.data0;
3383 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN);
3384 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i)
3385 hwkey[i] = swkey[i];
3388 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED,
3391 if_printf(ifp, "failed to update rsskey\n");
3395 if_printf(ifp, "RSS key updated\n");
3401 if_printf(ifp, "input hash: RSS\n");
3402 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 |
3403 MXGEFW_RSS_HASH_TYPE_TCP_IPV4;
3406 if_printf(ifp, "input hash: SRC_DST_PORT\n");
3407 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
3409 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3411 if_printf(ifp, "failed to enable slices\n");
3416 cmd.data0 = MXGEFW_TSO_MODE_NDIS;
3417 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd);
3420 * Can't change TSO mode to NDIS, never allow TSO then
3422 if_printf(ifp, "failed to set TSO mode\n");
3423 ifp->if_capenable &= ~IFCAP_TSO;
3424 ifp->if_capabilities &= ~IFCAP_TSO;
3425 ifp->if_hwassist &= ~CSUM_TSO;
3428 mxge_choose_params(ifp->if_mtu, &cl_size);
3431 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd);
3433 * Error is only meaningful if we're trying to set
3434 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3438 * Give the firmware the mtu and the big and small buffer
3439 * sizes. The firmware wants the big buf size to be a power
3440 * of two. Luckily, DragonFly's clusters are powers of two
3442 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3443 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3445 cmd.data0 = MXGE_RX_SMALL_BUFLEN;
3446 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
3448 cmd.data0 = cl_size;
3449 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3452 if_printf(ifp, "failed to setup params\n");
3456 /* Now give him the pointer to the stats block */
3457 for (slice = 0; slice < sc->num_slices; slice++) {
3458 ss = &sc->ss[slice];
3459 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3460 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3461 cmd.data2 = sizeof(struct mcp_irq_data);
3462 cmd.data2 |= (slice << 16);
3463 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3467 bus = sc->ss->fw_stats_dma.dmem_busaddr;
3468 bus += offsetof(struct mcp_irq_data, send_done_count);
3469 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3470 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3471 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3474 /* Firmware cannot support multicast without STATS_DMA_V2 */
3475 sc->fw_multicast_support = 0;
3477 sc->fw_multicast_support = 1;
3481 if_printf(ifp, "failed to setup params\n");
3485 for (slice = 0; slice < sc->num_slices; slice++) {
3486 err = mxge_slice_open(&sc->ss[slice], cl_size);
3488 if_printf(ifp, "couldn't open slice %d\n", slice);
3493 /* Finally, start the firmware running */
3494 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3496 if_printf(ifp, "Couldn't bring up link\n");
3500 ifp->if_flags |= IFF_RUNNING;
3501 for (i = 0; i < sc->num_tx_rings; ++i) {
3502 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3504 ifsq_clr_oactive(tx->ifsq);
3505 ifsq_watchdog_start(&tx->watchdog);
3511 mxge_free_mbufs(sc);
3516 mxge_close(mxge_softc_t *sc, int down)
3518 struct ifnet *ifp = sc->ifp;
3520 int err, old_down_cnt, i;
3522 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3525 old_down_cnt = sc->down_cnt;
3528 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3530 if_printf(ifp, "Couldn't bring down link\n");
3532 if (old_down_cnt == sc->down_cnt) {
3537 ifnet_deserialize_all(ifp);
3538 DELAY(10 * sc->intr_coal_delay);
3539 ifnet_serialize_all(ifp);
3543 if (old_down_cnt == sc->down_cnt)
3544 if_printf(ifp, "never got down irq\n");
3546 mxge_free_mbufs(sc);
3548 ifp->if_flags &= ~IFF_RUNNING;
3549 for (i = 0; i < sc->num_tx_rings; ++i) {
3550 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3552 ifsq_clr_oactive(tx->ifsq);
3553 ifsq_watchdog_stop(&tx->watchdog);
3558 mxge_setup_cfg_space(mxge_softc_t *sc)
3560 device_t dev = sc->dev;
3562 uint16_t lnk, pectl;
3564 /* Find the PCIe link width and set max read request to 4KB */
3565 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3566 lnk = pci_read_config(dev, reg + 0x12, 2);
3567 sc->link_width = (lnk >> 4) & 0x3f;
3569 if (sc->pectl == 0) {
3570 pectl = pci_read_config(dev, reg + 0x8, 2);
3571 pectl = (pectl & ~0x7000) | (5 << 12);
3572 pci_write_config(dev, reg + 0x8, pectl, 2);
3575 /* Restore saved pectl after watchdog reset */
3576 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3580 /* Enable DMA and memory space access */
3581 pci_enable_busmaster(dev);
3585 mxge_read_reboot(mxge_softc_t *sc)
3587 device_t dev = sc->dev;
3590 /* Find the vendor specific offset */
3591 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3592 if_printf(sc->ifp, "could not find vendor specific offset\n");
3593 return (uint32_t)-1;
3595 /* Enable read32 mode */
3596 pci_write_config(dev, vs + 0x10, 0x3, 1);
3597 /* Tell NIC which register to read */
3598 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3599 return pci_read_config(dev, vs + 0x14, 4);
3603 mxge_watchdog_reset(mxge_softc_t *sc)
3605 struct pci_devinfo *dinfo;
3612 if_printf(sc->ifp, "Watchdog reset!\n");
3615 * Check to see if the NIC rebooted. If it did, then all of
3616 * PCI config space has been reset, and things like the
3617 * busmaster bit will be zero. If this is the case, then we
3618 * must restore PCI config space before the NIC can be used
3621 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3622 if (cmd == 0xffff) {
3624 * Maybe the watchdog caught the NIC rebooting; wait
3625 * up to 100ms for it to finish. If it does not come
3626 * back, then give up
3629 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3631 if_printf(sc->ifp, "NIC disappeared!\n");
3633 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3634 /* Print the reboot status */
3635 reboot = mxge_read_reboot(sc);
3636 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot);
3638 running = sc->ifp->if_flags & IFF_RUNNING;
3641 * Quiesce NIC so that TX routines will not try to
3642 * xmit after restoration of BAR
3645 /* Mark the link as down */
3646 if (sc->link_state) {
3647 sc->ifp->if_link_state = LINK_STATE_DOWN;
3648 if_link_state_change(sc->ifp);
3652 /* Restore PCI configuration space */
3653 dinfo = device_get_ivars(sc->dev);
3654 pci_cfg_restore(sc->dev, dinfo);
3656 /* And redo any changes we made to our config space */
3657 mxge_setup_cfg_space(sc);
3660 err = mxge_load_firmware(sc, 0);
3662 if_printf(sc->ifp, "Unable to re-load f/w\n");
3663 if (running && !err) {
3666 err = mxge_open(sc);
3668 for (i = 0; i < sc->num_tx_rings; ++i)
3669 ifsq_devstart_sched(sc->ss[i].tx.ifsq);
3671 sc->watchdog_resets++;
3673 if_printf(sc->ifp, "NIC did not reboot, not resetting\n");
3677 if_printf(sc->ifp, "watchdog reset failed\n");
3681 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3686 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3688 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice);
3689 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3690 tx->req, tx->done, tx->queue_active);
3691 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n",
3692 tx->activate, tx->deactivate);
3693 if_printf(sc->ifp, "pkt_done=%d fw=%d\n",
3694 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count));
3698 mxge_update_stats(mxge_softc_t *sc)
3700 u_long ipackets, opackets, pkts;
3702 IFNET_STAT_GET(sc->ifp, ipackets, ipackets);
3703 IFNET_STAT_GET(sc->ifp, opackets, opackets);
3705 pkts = ipackets - sc->ipackets;
3706 pkts += opackets - sc->opackets;
3708 sc->ipackets = ipackets;
3709 sc->opackets = opackets;
3715 mxge_tick(void *arg)
3717 mxge_softc_t *sc = arg;
3722 lwkt_serialize_enter(&sc->main_serialize);
3725 if (sc->ifp->if_flags & IFF_RUNNING) {
3726 /* Aggregate stats from different slices */
3727 pkts = mxge_update_stats(sc);
3728 if (sc->need_media_probe)
3729 mxge_media_probe(sc);
3734 /* Ensure NIC did not suffer h/w fault while idle */
3735 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3736 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3738 mxge_serialize_skipmain(sc);
3739 mxge_watchdog_reset(sc);
3740 mxge_deserialize_skipmain(sc);
3744 /* Look less often if NIC is idle */
3749 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3751 lwkt_serialize_exit(&sc->main_serialize);
3755 mxge_media_change(struct ifnet *ifp)
3757 mxge_softc_t *sc = ifp->if_softc;
3758 const struct ifmedia *ifm = &sc->media;
3761 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
3770 return mxge_change_pause(sc, pause);
3774 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3776 struct ifnet *ifp = sc->ifp;
3777 int real_mtu, old_mtu;
3780 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3781 if (mtu > sc->max_mtu || real_mtu < 60)
3784 old_mtu = ifp->if_mtu;
3786 if (ifp->if_flags & IFF_RUNNING) {
3788 err = mxge_open(sc);
3790 ifp->if_mtu = old_mtu;
3799 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3801 mxge_softc_t *sc = ifp->if_softc;
3803 ifmr->ifm_status = IFM_AVALID;
3804 ifmr->ifm_active = IFM_ETHER;
3807 ifmr->ifm_status |= IFM_ACTIVE;
3810 * Autoselect is not supported, so the current media
3811 * should be delivered.
3813 ifmr->ifm_active |= sc->current_media;
3814 if (sc->current_media != IFM_NONE) {
3815 ifmr->ifm_active |= MXGE_IFM;
3817 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
3822 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data,
3823 struct ucred *cr __unused)
3825 mxge_softc_t *sc = ifp->if_softc;
3826 struct ifreq *ifr = (struct ifreq *)data;
3829 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3834 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3841 if (ifp->if_flags & IFF_UP) {
3842 if (!(ifp->if_flags & IFF_RUNNING)) {
3843 err = mxge_open(sc);
3846 * Take care of PROMISC and ALLMULTI
3849 mxge_change_promisc(sc,
3850 ifp->if_flags & IFF_PROMISC);
3851 mxge_set_multicast_list(sc);
3854 if (ifp->if_flags & IFF_RUNNING)
3861 mxge_set_multicast_list(sc);
3865 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3866 if (mask & IFCAP_TXCSUM) {
3867 ifp->if_capenable ^= IFCAP_TXCSUM;
3868 if (ifp->if_capenable & IFCAP_TXCSUM)
3869 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
3871 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
3873 if (mask & IFCAP_TSO) {
3874 ifp->if_capenable ^= IFCAP_TSO;
3875 if (ifp->if_capenable & IFCAP_TSO)
3876 ifp->if_hwassist |= CSUM_TSO;
3878 ifp->if_hwassist &= ~CSUM_TSO;
3880 if (mask & IFCAP_RXCSUM)
3881 ifp->if_capenable ^= IFCAP_RXCSUM;
3882 if (mask & IFCAP_VLAN_HWTAGGING)
3883 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3888 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3889 &sc->media, command);
3893 err = ether_ioctl(ifp, command, data);
3900 mxge_fetch_tunables(mxge_softc_t *sc)
3904 sc->intr_coal_delay = mxge_intr_coal_delay;
3905 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000))
3906 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY;
3909 if (mxge_ticks == 0)
3910 mxge_ticks = hz / 2;
3912 ifm = ifmedia_str2ethfc(mxge_flowctrl);
3913 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE))
3916 sc->use_rss = mxge_use_rss;
3918 sc->throttle = mxge_throttle;
3919 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE)
3920 sc->throttle = MXGE_MAX_THROTTLE;
3921 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE)
3922 sc->throttle = MXGE_MIN_THROTTLE;
3926 mxge_free_slices(mxge_softc_t *sc)
3928 struct mxge_slice_state *ss;
3934 for (i = 0; i < sc->num_slices; i++) {
3936 if (ss->fw_stats != NULL) {
3937 mxge_dma_free(&ss->fw_stats_dma);
3938 ss->fw_stats = NULL;
3940 if (ss->rx_data.rx_done.entry != NULL) {
3941 mxge_dma_free(&ss->rx_done_dma);
3942 ss->rx_data.rx_done.entry = NULL;
3945 kfree(sc->ss, M_DEVBUF);
3950 mxge_alloc_slices(mxge_softc_t *sc)
3953 struct mxge_slice_state *ss;
3955 int err, i, rx_ring_size;
3957 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3959 device_printf(sc->dev, "Cannot determine rx ring size\n");
3962 rx_ring_size = cmd.data0;
3963 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t));
3965 bytes = sizeof(*sc->ss) * sc->num_slices;
3966 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO);
3968 for (i = 0; i < sc->num_slices; i++) {
3973 lwkt_serialize_init(&ss->rx_data.rx_serialize);
3974 lwkt_serialize_init(&ss->tx.tx_serialize);
3978 * Allocate per-slice rx interrupt queue
3979 * XXX assume 4bytes mcp_slot
3981 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t);
3982 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096);
3984 device_printf(sc->dev,
3985 "alloc %d slice rx_done failed\n", i);
3988 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr;
3991 * Allocate the per-slice firmware stats
3993 bytes = sizeof(*ss->fw_stats);
3994 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3995 sizeof(*ss->fw_stats), 64);
3997 device_printf(sc->dev,
3998 "alloc %d fw_stats failed\n", i);
4001 ss->fw_stats = ss->fw_stats_dma.dmem_addr;
4007 mxge_slice_probe(mxge_softc_t *sc)
4009 int status, max_intr_slots, max_slices, num_slices;
4010 int msix_cnt, msix_enable, multi_tx;
4015 sc->num_tx_rings = 1;
4017 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices);
4018 if (num_slices == 1)
4021 if (netisr_ncpus == 1)
4024 msix_enable = device_getenv_int(sc->dev, "msix.enable",
4029 msix_cnt = pci_msix_count(sc->dev);
4033 device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4036 * Now load the slice aware firmware see what it supports
4038 old_fw = sc->fw_name;
4039 if (old_fw == mxge_fw_aligned)
4040 sc->fw_name = mxge_fw_rss_aligned;
4042 sc->fw_name = mxge_fw_rss_unaligned;
4043 status = mxge_load_firmware(sc, 0);
4045 device_printf(sc->dev, "Falling back to a single slice\n");
4050 * Try to send a reset command to the card to see if it is alive
4052 memset(&cmd, 0, sizeof(cmd));
4053 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4055 device_printf(sc->dev, "failed reset\n");
4060 * Get rx ring size to calculate rx interrupt queue size
4062 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4064 device_printf(sc->dev, "Cannot determine rx ring size\n");
4067 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t));
4070 * Tell it the size of the rx interrupt queue
4072 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot);
4073 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4075 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4080 * Ask the maximum number of slices it supports
4082 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4084 device_printf(sc->dev,
4085 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4088 max_slices = cmd.data0;
4090 device_printf(sc->dev, "max slices %d\n", max_slices);
4092 if (max_slices > msix_cnt)
4093 max_slices = msix_cnt;
4095 sc->ring_map = if_ringmap_alloc(sc->dev, num_slices, max_slices);
4096 sc->num_slices = if_ringmap_count(sc->ring_map);
4098 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx);
4100 sc->num_tx_rings = sc->num_slices;
4103 device_printf(sc->dev, "using %d slices, max %d\n",
4104 sc->num_slices, max_slices);
4107 if (sc->num_slices == 1)
4112 sc->fw_name = old_fw;
4113 mxge_load_firmware(sc, 0);
4117 mxge_setup_serialize(struct mxge_softc *sc)
4121 /* Main + rx + tx */
4122 sc->nserialize = (2 * sc->num_slices) + 1;
4124 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
4125 M_DEVBUF, M_WAITOK | M_ZERO);
4130 * NOTE: Order is critical
4133 KKASSERT(i < sc->nserialize);
4134 sc->serializes[i++] = &sc->main_serialize;
4136 for (slice = 0; slice < sc->num_slices; ++slice) {
4137 KKASSERT(i < sc->nserialize);
4138 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize;
4141 for (slice = 0; slice < sc->num_slices; ++slice) {
4142 KKASSERT(i < sc->nserialize);
4143 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize;
4146 KKASSERT(i == sc->nserialize);
4150 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4152 struct mxge_softc *sc = ifp->if_softc;
4154 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4158 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4160 struct mxge_softc *sc = ifp->if_softc;
4162 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4166 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4168 struct mxge_softc *sc = ifp->if_softc;
4170 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4176 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4177 boolean_t serialized)
4179 struct mxge_softc *sc = ifp->if_softc;
4181 ifnet_serialize_array_assert(sc->serializes, sc->nserialize,
4185 #endif /* INVARIANTS */
4187 #ifdef IFPOLL_ENABLE
4190 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle)
4192 struct mxge_slice_state *ss = xss;
4193 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
4195 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
4197 if (rx_done->entry[rx_done->idx].length != 0) {
4198 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle);
4202 * This register writting obviously has cost,
4203 * however, if we don't hand back the rx token,
4204 * the upcoming packets may suffer rediculously
4205 * large delay, as observed on 8AL-C using ping(8).
4207 *ss->irq_claim = be32toh(3);
4212 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4214 struct mxge_softc *sc = ifp->if_softc;
4221 * Only poll rx; polling tx and status don't seem to work
4223 for (i = 0; i < sc->num_slices; ++i) {
4224 struct mxge_slice_state *ss = &sc->ss[i];
4225 int cpu = ss->intr_cpuid;
4227 KKASSERT(cpu < netisr_ncpus);
4228 info->ifpi_rx[cpu].poll_func = mxge_npoll_rx;
4229 info->ifpi_rx[cpu].arg = ss;
4230 info->ifpi_rx[cpu].serializer = &ss->rx_data.rx_serialize;
4234 #endif /* IFPOLL_ENABLE */
4237 mxge_attach(device_t dev)
4239 mxge_softc_t *sc = device_get_softc(dev);
4240 struct ifnet *ifp = &sc->arpcom.ac_if;
4244 * Avoid rewriting half the lines in this file to use
4245 * &sc->arpcom.ac_if instead
4249 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4251 /* IFM_ETH_FORCEPAUSE can't be changed */
4252 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE,
4253 mxge_media_change, mxge_media_status);
4255 lwkt_serialize_init(&sc->main_serialize);
4257 mxge_fetch_tunables(sc);
4259 err = bus_dma_tag_create(NULL, /* parent */
4262 BUS_SPACE_MAXADDR, /* low */
4263 BUS_SPACE_MAXADDR, /* high */
4264 NULL, NULL, /* filter */
4265 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
4267 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
4269 &sc->parent_dmat); /* tag */
4271 device_printf(dev, "Err %d allocating parent dmat\n", err);
4275 callout_init_mp(&sc->co_hdl);
4277 mxge_setup_cfg_space(sc);
4280 * Map the board into the kernel
4283 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
4285 if (sc->mem_res == NULL) {
4286 device_printf(dev, "could not map memory\n");
4291 sc->sram = rman_get_virtual(sc->mem_res);
4292 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4293 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4294 device_printf(dev, "impossible memory region size %ld\n",
4295 rman_get_size(sc->mem_res));
4301 * Make NULL terminated copy of the EEPROM strings section of
4304 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4305 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4306 rman_get_bushandle(sc->mem_res),
4307 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4308 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2);
4309 err = mxge_parse_strings(sc);
4311 device_printf(dev, "parse EEPROM string failed\n");
4316 * Enable write combining for efficient use of PCIe bus
4321 * Allocate the out of band DMA memory
4323 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64);
4325 device_printf(dev, "alloc cmd DMA buf failed\n");
4328 sc->cmd = sc->cmd_dma.dmem_addr;
4330 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4332 device_printf(dev, "alloc zeropad DMA buf failed\n");
4336 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4338 device_printf(dev, "alloc dmabench DMA buf failed\n");
4342 /* Select & load the firmware */
4343 err = mxge_select_firmware(sc);
4345 device_printf(dev, "select firmware failed\n");
4349 mxge_slice_probe(sc);
4350 err = mxge_alloc_slices(sc);
4352 device_printf(dev, "alloc slices failed\n");
4356 err = mxge_alloc_intr(sc);
4358 device_printf(dev, "alloc intr failed\n");
4362 /* Setup serializes */
4363 mxge_setup_serialize(sc);
4365 err = mxge_reset(sc, 0);
4367 device_printf(dev, "reset failed\n");
4371 err = mxge_alloc_rings(sc);
4373 device_printf(dev, "failed to allocate rings\n");
4377 ifp->if_baudrate = IF_Gbps(10UL);
4378 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO;
4379 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4381 ifp->if_capabilities |= IFCAP_VLAN_MTU;
4383 /* Well, its software, sigh */
4384 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
4386 ifp->if_capenable = ifp->if_capabilities;
4389 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4390 ifp->if_init = mxge_init;
4391 ifp->if_ioctl = mxge_ioctl;
4392 ifp->if_start = mxge_start;
4393 #ifdef IFPOLL_ENABLE
4394 if (sc->intr_type != PCI_INTR_TYPE_LEGACY)
4395 ifp->if_npoll = mxge_npoll;
4397 ifp->if_serialize = mxge_serialize;
4398 ifp->if_deserialize = mxge_deserialize;
4399 ifp->if_tryserialize = mxge_tryserialize;
4401 ifp->if_serialize_assert = mxge_serialize_assert;
4404 /* Increase TSO burst length */
4405 ifp->if_tsolen = (32 * ETHERMTU);
4407 /* Initialise the ifmedia structure */
4408 mxge_media_init(sc);
4409 mxge_media_probe(sc);
4411 ether_ifattach(ifp, sc->mac_addr, NULL);
4413 /* Setup TX rings and subqueues */
4414 for (i = 0; i < sc->num_tx_rings; ++i) {
4415 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
4416 struct mxge_slice_state *ss = &sc->ss[i];
4418 ifsq_set_cpuid(ifsq, ss->intr_cpuid);
4419 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize);
4420 ifsq_set_priv(ifsq, &ss->tx);
4423 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog);
4428 * We are not ready to do "gather" jumbo frame, so
4429 * limit MTU to MJUMPAGESIZE
4431 sc->max_mtu = MJUMPAGESIZE -
4432 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1;
4435 err = mxge_setup_intr(sc);
4437 device_printf(dev, "alloc and setup intr failed\n");
4438 ether_ifdetach(ifp);
4442 mxge_add_sysctls(sc);
4444 /* Increase non-cluster mbuf limit; used by small RX rings */
4445 mb_inclimit(ifp->if_nmbclusters);
4447 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc,
4448 sc->ss[0].intr_cpuid);
4457 mxge_detach(device_t dev)
4459 mxge_softc_t *sc = device_get_softc(dev);
4461 if (device_is_attached(dev)) {
4462 struct ifnet *ifp = sc->ifp;
4463 int mblimit = ifp->if_nmbclusters;
4465 ifnet_serialize_all(ifp);
4468 if (ifp->if_flags & IFF_RUNNING)
4470 callout_stop(&sc->co_hdl);
4472 mxge_teardown_intr(sc, sc->num_slices);
4474 ifnet_deserialize_all(ifp);
4476 callout_terminate(&sc->co_hdl);
4478 ether_ifdetach(ifp);
4480 /* Decrease non-cluster mbuf limit increased by us */
4481 mb_inclimit(-mblimit);
4483 ifmedia_removeall(&sc->media);
4485 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL &&
4487 mxge_dummy_rdma(sc, 0);
4490 mxge_rem_sysctls(sc);
4491 mxge_free_rings(sc);
4493 /* MUST after sysctls, intr and rings are freed */
4494 mxge_free_slices(sc);
4496 if (sc->dmabench_dma.dmem_addr != NULL)
4497 mxge_dma_free(&sc->dmabench_dma);
4498 if (sc->zeropad_dma.dmem_addr != NULL)
4499 mxge_dma_free(&sc->zeropad_dma);
4500 if (sc->cmd_dma.dmem_addr != NULL)
4501 mxge_dma_free(&sc->cmd_dma);
4503 if (sc->msix_table_res != NULL) {
4504 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2),
4505 sc->msix_table_res);
4507 if (sc->mem_res != NULL) {
4508 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS,
4512 if (sc->parent_dmat != NULL)
4513 bus_dma_tag_destroy(sc->parent_dmat);
4515 if (sc->ring_map != NULL)
4516 if_ringmap_free(sc->ring_map);
4522 mxge_shutdown(device_t dev)
4528 mxge_free_msix(struct mxge_softc *sc, boolean_t setup)
4532 KKASSERT(sc->num_slices > 1);
4534 for (i = 0; i < sc->num_slices; ++i) {
4535 struct mxge_slice_state *ss = &sc->ss[i];
4537 if (ss->intr_res != NULL) {
4538 bus_release_resource(sc->dev, SYS_RES_IRQ,
4539 ss->intr_rid, ss->intr_res);
4541 if (ss->intr_rid >= 0)
4542 pci_release_msix_vector(sc->dev, ss->intr_rid);
4545 pci_teardown_msix(sc->dev);
4549 mxge_alloc_msix(struct mxge_softc *sc)
4551 struct mxge_slice_state *ss;
4553 boolean_t setup = FALSE;
4555 KKASSERT(sc->num_slices > 1);
4559 ss->intr_serialize = &sc->main_serialize;
4560 ss->intr_func = mxge_msi;
4561 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4562 "%s comb", device_get_nameunit(sc->dev));
4563 ss->intr_desc = ss->intr_desc0;
4564 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, 0);
4566 for (i = 1; i < sc->num_slices; ++i) {
4569 ss->intr_serialize = &ss->rx_data.rx_serialize;
4570 if (sc->num_tx_rings == 1) {
4571 ss->intr_func = mxge_msix_rx;
4572 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4573 "%s rx%d", device_get_nameunit(sc->dev), i);
4575 ss->intr_func = mxge_msix_rxtx;
4576 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4577 "%s rxtx%d", device_get_nameunit(sc->dev), i);
4579 ss->intr_desc = ss->intr_desc0;
4580 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, i);
4584 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4586 if (sc->msix_table_res == NULL) {
4587 device_printf(sc->dev, "couldn't alloc MSI-X table res\n");
4591 error = pci_setup_msix(sc->dev);
4593 device_printf(sc->dev, "could not setup MSI-X\n");
4598 for (i = 0; i < sc->num_slices; ++i) {
4601 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid,
4604 device_printf(sc->dev, "could not alloc "
4605 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid);
4609 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4610 &ss->intr_rid, RF_ACTIVE);
4611 if (ss->intr_res == NULL) {
4612 device_printf(sc->dev, "could not alloc "
4613 "MSI-X %d resource\n", i);
4619 pci_enable_msix(sc->dev);
4620 sc->intr_type = PCI_INTR_TYPE_MSIX;
4623 mxge_free_msix(sc, setup);
4628 mxge_alloc_intr(struct mxge_softc *sc)
4630 struct mxge_slice_state *ss;
4633 if (sc->num_slices > 1) {
4636 error = mxge_alloc_msix(sc);
4639 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX);
4645 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable,
4646 &ss->intr_rid, &irq_flags);
4648 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4649 &ss->intr_rid, irq_flags);
4650 if (ss->intr_res == NULL) {
4651 device_printf(sc->dev, "could not alloc interrupt\n");
4655 if (sc->intr_type == PCI_INTR_TYPE_LEGACY)
4656 ss->intr_func = mxge_legacy;
4658 ss->intr_func = mxge_msi;
4659 ss->intr_serialize = &sc->main_serialize;
4660 ss->intr_cpuid = rman_get_cpuid(ss->intr_res);
4666 mxge_setup_intr(struct mxge_softc *sc)
4670 for (i = 0; i < sc->num_slices; ++i) {
4671 struct mxge_slice_state *ss = &sc->ss[i];
4674 error = bus_setup_intr_descr(sc->dev, ss->intr_res,
4675 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand,
4676 ss->intr_serialize, ss->intr_desc);
4678 device_printf(sc->dev, "can't setup %dth intr\n", i);
4679 mxge_teardown_intr(sc, i);
4687 mxge_teardown_intr(struct mxge_softc *sc, int cnt)
4694 for (i = 0; i < cnt; ++i) {
4695 struct mxge_slice_state *ss = &sc->ss[i];
4697 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand);
4702 mxge_free_intr(struct mxge_softc *sc)
4707 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4708 struct mxge_slice_state *ss = &sc->ss[0];
4710 if (ss->intr_res != NULL) {
4711 bus_release_resource(sc->dev, SYS_RES_IRQ,
4712 ss->intr_rid, ss->intr_res);
4714 if (sc->intr_type == PCI_INTR_TYPE_MSI)
4715 pci_release_msi(sc->dev);
4717 mxge_free_msix(sc, TRUE);