1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_poll.h>
60 #include <net/if_types.h>
61 #include <net/vlan/if_vlan_var.h>
63 #include <net/toeplitz.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__i386__) || defined(__x86_64__)
81 #include <machine/specialreg.h>
84 #include <dev/netif/mxge/mxge_mcp.h>
85 #include <dev/netif/mxge/mcp_gen_header.h>
86 #include <dev/netif/mxge/if_mxge_var.h>
88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
91 #define MXGE_HWRSS_KEYLEN 16
94 static int mxge_nvidia_ecrc_enable = 1;
95 static int mxge_force_firmware = 0;
96 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY;
97 static int mxge_deassert_wait = 1;
98 static int mxge_ticks;
99 static int mxge_num_slices = 0;
100 static int mxge_always_promisc = 0;
101 static int mxge_throttle = 0;
102 static int mxge_msi_enable = 1;
103 static int mxge_msix_enable = 1;
104 static int mxge_multi_tx = 1;
106 * Don't use RSS by default, its just too slow
108 static int mxge_use_rss = 0;
110 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_FULL;
112 static const char *mxge_fw_unaligned = "mxge_ethp_z8e";
113 static const char *mxge_fw_aligned = "mxge_eth_z8e";
114 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
115 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices);
118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay);
119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable);
120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware);
121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait);
122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks);
123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc);
124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle);
125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx);
126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss);
127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable);
128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable);
129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl));
131 static int mxge_probe(device_t dev);
132 static int mxge_attach(device_t dev);
133 static int mxge_detach(device_t dev);
134 static int mxge_shutdown(device_t dev);
136 static int mxge_alloc_intr(struct mxge_softc *sc);
137 static void mxge_free_intr(struct mxge_softc *sc);
138 static int mxge_setup_intr(struct mxge_softc *sc);
139 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt);
141 static device_method_t mxge_methods[] = {
142 /* Device interface */
143 DEVMETHOD(device_probe, mxge_probe),
144 DEVMETHOD(device_attach, mxge_attach),
145 DEVMETHOD(device_detach, mxge_detach),
146 DEVMETHOD(device_shutdown, mxge_shutdown),
150 static driver_t mxge_driver = {
153 sizeof(mxge_softc_t),
156 static devclass_t mxge_devclass;
158 /* Declare ourselves to be a child of the PCI bus.*/
159 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
160 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
161 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
163 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
164 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
165 static void mxge_close(mxge_softc_t *sc, int down);
166 static int mxge_open(mxge_softc_t *sc);
167 static void mxge_tick(void *arg);
168 static void mxge_watchdog_reset(mxge_softc_t *sc);
169 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice);
172 mxge_probe(device_t dev)
174 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM &&
175 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E ||
176 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) {
177 int rev = pci_get_revid(dev);
180 case MXGE_PCI_REV_Z8E:
181 device_set_desc(dev, "Myri10G-PCIE-8A");
183 case MXGE_PCI_REV_Z8ES:
184 device_set_desc(dev, "Myri10G-PCIE-8B");
187 device_set_desc(dev, "Myri10G-PCIE-8??");
188 device_printf(dev, "Unrecognized rev %d NIC\n", rev);
197 mxge_enable_wc(mxge_softc_t *sc)
199 #if defined(__i386__) || defined(__x86_64__)
203 len = rman_get_size(sc->mem_res);
204 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE,
205 PAT_WRITE_COMBINING);
210 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes,
211 bus_size_t alignment)
216 if (bytes > 4096 && alignment == 4096)
221 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary,
222 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes,
223 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma);
225 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err);
232 mxge_dma_free(bus_dmamem_t *dma)
234 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map);
235 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map);
236 bus_dma_tag_destroy(dma->dmem_tag);
240 * The eeprom strings on the lanaiX have the format
246 mxge_parse_strings(mxge_softc_t *sc)
249 int i, found_mac, found_sn2;
252 ptr = sc->eeprom_strings;
255 while (*ptr != '\0') {
256 if (strncmp(ptr, "MAC=", 4) == 0) {
259 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
260 if (endptr - ptr != 2)
269 } else if (strncmp(ptr, "PC=", 3) == 0) {
271 strlcpy(sc->product_code_string, ptr,
272 sizeof(sc->product_code_string));
273 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
275 strlcpy(sc->serial_number_string, ptr,
276 sizeof(sc->serial_number_string));
277 } else if (strncmp(ptr, "SN2=", 4) == 0) {
278 /* SN2 takes precedence over SN */
281 strlcpy(sc->serial_number_string, ptr,
282 sizeof(sc->serial_number_string));
284 while (*ptr++ != '\0') {}
291 device_printf(sc->dev, "failed to parse eeprom_strings\n");
295 #if defined(__i386__) || defined(__x86_64__)
298 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
301 unsigned long base, off;
303 device_t pdev, mcp55;
304 uint16_t vendor_id, device_id, word;
305 uintptr_t bus, slot, func, ivend, idev;
308 if (!mxge_nvidia_ecrc_enable)
311 pdev = device_get_parent(device_get_parent(sc->dev));
313 device_printf(sc->dev, "could not find parent?\n");
316 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
317 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
319 if (vendor_id != 0x10de)
324 if (device_id == 0x005d) {
325 /* ck804, base address is magic */
327 } else if (device_id >= 0x0374 && device_id <= 0x378) {
328 /* mcp55, base address stored in chipset */
329 mcp55 = pci_find_bsf(0, 0, 0);
331 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
332 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
333 word = pci_read_config(mcp55, 0x90, 2);
334 base = ((unsigned long)word & 0x7ffeU) << 25;
342 * Test below is commented because it is believed that doing
343 * config read/write beyond 0xff will access the config space
344 * for the next larger function. Uncomment this and remove
345 * the hacky pmap_mapdev() way of accessing config space when
346 * DragonFly grows support for extended pcie config space access.
350 * See if we can, by some miracle, access the extended
353 val = pci_read_config(pdev, 0x178, 4);
354 if (val != 0xffffffff) {
356 pci_write_config(pdev, 0x178, val, 4);
361 * Rather than using normal pci config space writes, we must
362 * map the Nvidia config space ourselves. This is because on
363 * opteron/nvidia class machine the 0xe000000 mapping is
364 * handled by the nvidia chipset, that means the internal PCI
365 * device (the on-chip northbridge), or the amd-8131 bridge
366 * and things behind them are not visible by this method.
369 BUS_READ_IVAR(device_get_parent(pdev), pdev,
371 BUS_READ_IVAR(device_get_parent(pdev), pdev,
372 PCI_IVAR_SLOT, &slot);
373 BUS_READ_IVAR(device_get_parent(pdev), pdev,
374 PCI_IVAR_FUNCTION, &func);
375 BUS_READ_IVAR(device_get_parent(pdev), pdev,
376 PCI_IVAR_VENDOR, &ivend);
377 BUS_READ_IVAR(device_get_parent(pdev), pdev,
378 PCI_IVAR_DEVICE, &idev);
380 off = base + 0x00100000UL * (unsigned long)bus +
381 0x00001000UL * (unsigned long)(func + 8 * slot);
383 /* map it into the kernel */
384 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
386 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
389 /* get a pointer to the config space mapped into the kernel */
390 cfgptr = va + (off & PAGE_MASK);
392 /* make sure that we can really access it */
393 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
394 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
395 if (!(vendor_id == ivend && device_id == idev)) {
396 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
397 vendor_id, device_id);
398 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
402 ptr32 = (uint32_t*)(cfgptr + 0x178);
405 if (val == 0xffffffff) {
406 device_printf(sc->dev, "extended mapping failed\n");
407 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
411 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
413 device_printf(sc->dev, "Enabled ECRC on upstream "
414 "Nvidia bridge at %d:%d:%d\n",
415 (int)bus, (int)slot, (int)func);
419 #else /* __i386__ || __x86_64__ */
422 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
424 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
430 mxge_dma_test(mxge_softc_t *sc, int test_type)
433 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr;
436 const char *test = " ";
439 * Run a small DMA test.
440 * The magic multipliers to the length tell the firmware
441 * to do DMA read, write, or read+write tests. The
442 * results are returned in cmd.data0. The upper 16
443 * bits of the return is the number of transfers completed.
444 * The lower 16 bits is the time in 0.5us ticks that the
445 * transfers took to complete.
448 len = sc->tx_boundary;
450 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
451 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
452 cmd.data2 = len * 0x10000;
453 status = mxge_send_cmd(sc, test_type, &cmd);
458 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
460 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
461 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
462 cmd.data2 = len * 0x1;
463 status = mxge_send_cmd(sc, test_type, &cmd);
468 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
470 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
471 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
472 cmd.data2 = len * 0x10001;
473 status = mxge_send_cmd(sc, test_type, &cmd);
478 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
479 (cmd.data0 & 0xffff);
482 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) {
483 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
491 * when the PCI-E Completion packets are aligned on an 8-byte
492 * boundary. Some PCI-E chip sets always align Completion packets; on
493 * the ones that do not, the alignment can be enforced by enabling
494 * ECRC generation (if supported).
496 * When PCI-E Completion packets are not aligned, it is actually more
497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
499 * If the driver can neither enable ECRC nor verify that it has
500 * already been enabled, then it must use a firmware image which works
501 * around unaligned completion packets (ethp_z8e.dat), and it should
502 * also ensure that it never gives the device a Read-DMA which is
503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
504 * enabled, then the driver should use the aligned (eth_z8e.dat)
505 * firmware image, and set tx_boundary to 4KB.
508 mxge_firmware_probe(mxge_softc_t *sc)
510 device_t dev = sc->dev;
514 sc->tx_boundary = 4096;
517 * Verify the max read request size was set to 4KB
518 * before trying the test with 4KB.
520 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
521 pectl = pci_read_config(dev, reg + 0x8, 2);
522 if ((pectl & (5 << 12)) != (5 << 12)) {
523 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n",
525 sc->tx_boundary = 2048;
530 * Load the optimized firmware (which assumes aligned PCIe
531 * completions) in order to see if it works on this host.
533 sc->fw_name = mxge_fw_aligned;
534 status = mxge_load_firmware(sc, 1);
539 * Enable ECRC if possible
541 mxge_enable_nvidia_ecrc(sc);
544 * Run a DMA test which watches for unaligned completions and
545 * aborts on the first one seen. Not required on Z8ES or newer.
547 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
550 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
552 return 0; /* keep the aligned firmware */
555 device_printf(dev, "DMA test failed: %d\n", status);
556 if (status == ENOSYS) {
557 device_printf(dev, "Falling back to ethp! "
558 "Please install up to date fw\n");
564 mxge_select_firmware(mxge_softc_t *sc)
567 int force_firmware = mxge_force_firmware;
570 force_firmware = sc->throttle;
572 if (force_firmware != 0) {
573 if (force_firmware == 1)
578 device_printf(sc->dev,
579 "Assuming %s completions (forced)\n",
580 aligned ? "aligned" : "unaligned");
586 * If the PCIe link width is 4 or less, we can use the aligned
587 * firmware and skip any checks
589 if (sc->link_width != 0 && sc->link_width <= 4) {
590 device_printf(sc->dev, "PCIe x%d Link, "
591 "expect reduced performance\n", sc->link_width);
596 if (mxge_firmware_probe(sc) == 0)
601 sc->fw_name = mxge_fw_aligned;
602 sc->tx_boundary = 4096;
604 sc->fw_name = mxge_fw_unaligned;
605 sc->tx_boundary = 2048;
607 return mxge_load_firmware(sc, 0);
611 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
613 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
614 if_printf(sc->ifp, "Bad firmware type: 0x%x\n",
615 be32toh(hdr->mcp_type));
619 /* Save firmware version for sysctl */
620 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
622 if_printf(sc->ifp, "firmware id: %s\n", hdr->version);
624 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
625 &sc->fw_ver_minor, &sc->fw_ver_tiny);
627 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR &&
628 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
629 if_printf(sc->ifp, "Found firmware version %s\n",
631 if_printf(sc->ifp, "Driver needs %d.%d\n",
632 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
639 z_alloc(void *nil, u_int items, u_int size)
641 return kmalloc(items * size, M_TEMP, M_WAITOK);
645 z_free(void *nil, void *ptr)
651 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
654 char *inflate_buffer;
655 const struct firmware *fw;
656 const mcp_gen_header_t *hdr;
663 fw = firmware_get(sc->fw_name);
665 if_printf(sc->ifp, "Could not find firmware image %s\n",
670 /* Setup zlib and decompress f/w */
671 bzero(&zs, sizeof(zs));
674 status = inflateInit(&zs);
675 if (status != Z_OK) {
681 * The uncompressed size is stored as the firmware version,
682 * which would otherwise go unused
684 fw_len = (size_t)fw->version;
685 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK);
686 zs.avail_in = fw->datasize;
687 zs.next_in = __DECONST(char *, fw->data);
688 zs.avail_out = fw_len;
689 zs.next_out = inflate_buffer;
690 status = inflate(&zs, Z_FINISH);
691 if (status != Z_STREAM_END) {
692 if_printf(sc->ifp, "zlib %d\n", status);
694 goto abort_with_buffer;
699 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET));
700 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
701 if_printf(sc->ifp, "Bad firmware file");
703 goto abort_with_buffer;
705 hdr = (const void*)(inflate_buffer + hdr_offset);
707 status = mxge_validate_firmware(sc, hdr);
709 goto abort_with_buffer;
711 /* Copy the inflated firmware to NIC SRAM. */
712 for (i = 0; i < fw_len; i += 256) {
713 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i,
714 min(256U, (unsigned)(fw_len - i)));
723 kfree(inflate_buffer, M_TEMP);
726 firmware_put(fw, FIRMWARE_UNLOAD);
731 * Enable or disable periodic RDMAs from the host to make certain
732 * chipsets resend dropped PCIe messages
735 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
738 volatile uint32_t *confirm;
739 volatile char *submit;
740 uint32_t *buf, dma_low, dma_high;
743 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
745 /* Clear confirmation addr */
746 confirm = (volatile uint32_t *)sc->cmd;
751 * Send an rdma command to the PCIe engine, and wait for the
752 * response in the confirmation address. The firmware should
753 * write a -1 there to indicate it is alive and well
755 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
756 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
757 buf[0] = htobe32(dma_high); /* confirm addr MSW */
758 buf[1] = htobe32(dma_low); /* confirm addr LSW */
759 buf[2] = htobe32(0xffffffff); /* confirm data */
760 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
761 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
762 buf[3] = htobe32(dma_high); /* dummy addr MSW */
763 buf[4] = htobe32(dma_low); /* dummy addr LSW */
764 buf[5] = htobe32(enable); /* enable? */
766 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
768 mxge_pio_copy(submit, buf, 64);
773 while (*confirm != 0xffffffff && i < 20) {
777 if (*confirm != 0xffffffff) {
778 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)",
779 (enable ? "enable" : "disable"), confirm, *confirm);
784 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
787 char buf_bytes[sizeof(*buf) + 8];
788 volatile mcp_cmd_response_t *response = sc->cmd;
789 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
790 uint32_t dma_low, dma_high;
791 int err, sleep_total = 0;
793 /* Ensure buf is aligned to 8 bytes */
794 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
796 buf->data0 = htobe32(data->data0);
797 buf->data1 = htobe32(data->data1);
798 buf->data2 = htobe32(data->data2);
799 buf->cmd = htobe32(cmd);
800 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
803 buf->response_addr.low = htobe32(dma_low);
804 buf->response_addr.high = htobe32(dma_high);
806 response->result = 0xffffffff;
808 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
814 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
816 switch (be32toh(response->result)) {
818 data->data0 = be32toh(response->data);
824 case MXGEFW_CMD_UNKNOWN:
827 case MXGEFW_CMD_ERROR_UNALIGNED:
830 case MXGEFW_CMD_ERROR_BUSY:
833 case MXGEFW_CMD_ERROR_I2C_ABSENT:
837 if_printf(sc->ifp, "command %d failed, result = %d\n",
838 cmd, be32toh(response->result));
846 if_printf(sc->ifp, "command %d timed out result = %d\n",
847 cmd, be32toh(response->result));
853 mxge_adopt_running_firmware(mxge_softc_t *sc)
855 struct mcp_gen_header *hdr;
856 const size_t bytes = sizeof(struct mcp_gen_header);
861 * Find running firmware header
864 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET));
866 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
867 if_printf(sc->ifp, "Running firmware has bad header offset "
868 "(%zu)\n", hdr_offset);
873 * Copy header of running firmware from SRAM to host memory to
876 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK);
877 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
878 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes);
879 status = mxge_validate_firmware(sc, hdr);
880 kfree(hdr, M_DEVBUF);
883 * Check to see if adopted firmware has bug where adopting
884 * it will cause broadcasts to be filtered unless the NIC
885 * is kept in ALLMULTI mode
887 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
888 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
889 sc->adopted_rx_filter_bug = 1;
890 if_printf(sc->ifp, "Adopting fw %d.%d.%d: "
891 "working around rx filter bug\n",
892 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny);
899 mxge_load_firmware(mxge_softc_t *sc, int adopt)
901 volatile uint32_t *confirm;
902 volatile char *submit;
904 uint32_t *buf, size, dma_low, dma_high;
907 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
909 size = sc->sram_size;
910 status = mxge_load_firmware_helper(sc, &size);
916 * Try to use the currently running firmware, if
919 status = mxge_adopt_running_firmware(sc);
922 "failed to adopt running firmware\n");
925 if_printf(sc->ifp, "Successfully adopted running firmware\n");
927 if (sc->tx_boundary == 4096) {
929 "Using firmware currently running on NIC. "
931 if_printf(sc->ifp, "performance consider loading "
932 "optimized firmware\n");
934 sc->fw_name = mxge_fw_unaligned;
935 sc->tx_boundary = 2048;
939 /* Clear confirmation addr */
940 confirm = (volatile uint32_t *)sc->cmd;
945 * Send a reload command to the bootstrap MCP, and wait for the
946 * response in the confirmation address. The firmware should
947 * write a -1 there to indicate it is alive and well
950 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
951 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
953 buf[0] = htobe32(dma_high); /* confirm addr MSW */
954 buf[1] = htobe32(dma_low); /* confirm addr LSW */
955 buf[2] = htobe32(0xffffffff); /* confirm data */
958 * FIX: All newest firmware should un-protect the bottom of
959 * the sram before handoff. However, the very first interfaces
960 * do not. Therefore the handoff copy must skip the first 8 bytes
962 /* where the code starts*/
963 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
964 buf[4] = htobe32(size - 8); /* length of code */
965 buf[5] = htobe32(8); /* where to copy to */
966 buf[6] = htobe32(0); /* where to jump to */
968 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
969 mxge_pio_copy(submit, buf, 64);
974 while (*confirm != 0xffffffff && i < 20) {
978 if (*confirm != 0xffffffff) {
979 if_printf(sc->ifp,"handoff failed (%p = 0x%x)",
987 mxge_update_mac_address(mxge_softc_t *sc)
990 uint8_t *addr = sc->mac_addr;
992 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) |
993 (addr[2] << 8) | addr[3];
994 cmd.data1 = (addr[4] << 8) | (addr[5]);
995 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
999 mxge_change_pause(mxge_softc_t *sc, int pause)
1005 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd);
1007 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd);
1009 if_printf(sc->ifp, "Failed to set flow control mode\n");
1017 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1022 if (mxge_always_promisc)
1026 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd);
1028 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd);
1030 if_printf(sc->ifp, "Failed to set promisc mode\n");
1034 mxge_set_multicast_list(mxge_softc_t *sc)
1037 struct ifmultiaddr *ifma;
1038 struct ifnet *ifp = sc->ifp;
1041 /* This firmware is known to not support multicast */
1042 if (!sc->fw_multicast_support)
1045 /* Disable multicast filtering while we play with the lists*/
1046 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1048 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, "
1049 "error status: %d\n", err);
1053 if (sc->adopted_rx_filter_bug)
1056 if (ifp->if_flags & IFF_ALLMULTI) {
1057 /* Request to disable multicast filtering, so quit here */
1061 /* Flush all the filters */
1062 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1064 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1065 "error status: %d\n", err);
1070 * Walk the multicast list, and add each address
1072 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1073 if (ifma->ifma_addr->sa_family != AF_LINK)
1076 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1078 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1080 cmd.data0 = htonl(cmd.data0);
1081 cmd.data1 = htonl(cmd.data1);
1082 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1084 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1085 "error status: %d\n", err);
1086 /* Abort, leaving multicast filtering off */
1091 /* Enable multicast filtering */
1092 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1094 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, "
1095 "error status: %d\n", err);
1101 mxge_max_mtu(mxge_softc_t *sc)
1106 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1107 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1109 /* try to set nbufs to see if it we can
1110 use virtually contiguous jumbos */
1112 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1115 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1117 /* otherwise, we're limited to MJUMPAGESIZE */
1118 return MJUMPAGESIZE - MXGEFW_PAD;
1123 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1125 struct mxge_slice_state *ss;
1126 mxge_rx_done_t *rx_done;
1127 volatile uint32_t *irq_claim;
1129 int slice, status, rx_intr_size;
1132 * Try to send a reset command to the card to see if it
1135 memset(&cmd, 0, sizeof (cmd));
1136 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1138 if_printf(sc->ifp, "failed reset\n");
1142 mxge_dummy_rdma(sc, 1);
1145 * Set the intrq size
1146 * XXX assume 4byte mcp_slot
1148 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t);
1149 cmd.data0 = rx_intr_size;
1150 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1153 * Even though we already know how many slices are supported
1154 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1155 * has magic side effects, and must be called after a reset.
1156 * It must be called prior to calling any RSS related cmds,
1157 * including assigning an interrupt queue for anything but
1158 * slice 0. It must also be called *after*
1159 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1160 * the firmware to compute offsets.
1162 if (sc->num_slices > 1) {
1163 /* Ask the maximum number of slices it supports */
1164 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
1166 if_printf(sc->ifp, "failed to get number of slices\n");
1171 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1172 * to setting up the interrupt queue DMA
1174 cmd.data0 = sc->num_slices;
1175 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1176 if (sc->num_tx_rings > 1)
1177 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1178 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd);
1180 if_printf(sc->ifp, "failed to set number of slices\n");
1185 if (interrupts_setup) {
1186 /* Now exchange information about interrupts */
1187 for (slice = 0; slice < sc->num_slices; slice++) {
1188 ss = &sc->ss[slice];
1190 rx_done = &ss->rx_data.rx_done;
1191 memset(rx_done->entry, 0, rx_intr_size);
1194 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1196 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1198 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA,
1203 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET,
1205 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1208 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1210 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1211 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1214 if_printf(sc->ifp, "failed set interrupt parameters\n");
1218 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1220 /* Run a DMA benchmark */
1221 mxge_dma_test(sc, MXGEFW_DMA_TEST);
1223 for (slice = 0; slice < sc->num_slices; slice++) {
1224 ss = &sc->ss[slice];
1226 ss->irq_claim = irq_claim + (2 * slice);
1228 /* Reset mcp/driver shared state back to 0 */
1229 ss->rx_data.rx_done.idx = 0;
1232 ss->tx.pkt_done = 0;
1233 ss->tx.queue_active = 0;
1234 ss->tx.activate = 0;
1235 ss->tx.deactivate = 0;
1236 ss->rx_data.rx_big.cnt = 0;
1237 ss->rx_data.rx_small.cnt = 0;
1238 if (ss->fw_stats != NULL)
1239 bzero(ss->fw_stats, sizeof(*ss->fw_stats));
1241 sc->rdma_tags_available = 15;
1243 status = mxge_update_mac_address(sc);
1244 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1245 mxge_change_pause(sc, sc->pause);
1246 mxge_set_multicast_list(sc);
1249 cmd.data0 = sc->throttle;
1250 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd))
1251 if_printf(sc->ifp, "can't enable throttle\n");
1257 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1262 unsigned int throttle;
1265 throttle = sc->throttle;
1266 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1270 if (throttle == sc->throttle)
1273 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1276 ifnet_serialize_all(sc->ifp);
1278 cmd.data0 = throttle;
1279 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1281 sc->throttle = throttle;
1283 ifnet_deserialize_all(sc->ifp);
1288 mxge_change_use_rss(SYSCTL_HANDLER_ARGS)
1294 use_rss = sc->use_rss;
1295 err = sysctl_handle_int(oidp, &use_rss, arg2, req);
1299 if (use_rss == sc->use_rss)
1302 ifnet_serialize_all(sc->ifp);
1304 sc->use_rss = use_rss;
1305 if (sc->ifp->if_flags & IFF_RUNNING) {
1310 ifnet_deserialize_all(sc->ifp);
1315 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1318 unsigned int intr_coal_delay;
1322 intr_coal_delay = sc->intr_coal_delay;
1323 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1327 if (intr_coal_delay == sc->intr_coal_delay)
1330 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1333 ifnet_serialize_all(sc->ifp);
1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1336 sc->intr_coal_delay = intr_coal_delay;
1338 ifnet_deserialize_all(sc->ifp);
1343 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1349 arg2 = be32toh(*(int *)arg1);
1351 err = sysctl_handle_int(oidp, arg1, arg2, req);
1357 mxge_rem_sysctls(mxge_softc_t *sc)
1359 if (sc->ss != NULL) {
1360 struct mxge_slice_state *ss;
1363 for (slice = 0; slice < sc->num_slices; slice++) {
1364 ss = &sc->ss[slice];
1365 if (ss->sysctl_tree != NULL) {
1366 sysctl_ctx_free(&ss->sysctl_ctx);
1367 ss->sysctl_tree = NULL;
1372 if (sc->slice_sysctl_tree != NULL) {
1373 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1374 sc->slice_sysctl_tree = NULL;
1379 mxge_add_sysctls(mxge_softc_t *sc)
1381 struct sysctl_ctx_list *ctx;
1382 struct sysctl_oid_list *children;
1384 struct mxge_slice_state *ss;
1388 ctx = device_get_sysctl_ctx(sc->dev);
1389 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1390 fw = sc->ss[0].fw_stats;
1393 * Random information
1395 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
1396 CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
1398 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number",
1399 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number");
1401 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code",
1402 CTLFLAG_RD, &sc->product_code_string, 0, "product code");
1404 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width",
1405 CTLFLAG_RD, &sc->link_width, 0, "link width");
1407 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary",
1408 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary");
1410 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine",
1411 CTLFLAG_RD, &sc->wc, 0, "write combining PIO");
1413 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs",
1414 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s");
1416 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs",
1417 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s");
1419 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs",
1420 CTLFLAG_RD, &sc->read_write_dma, 0,
1421 "DMA concurrent Read/Write speed in MB/s");
1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets",
1424 CTLFLAG_RD, &sc->watchdog_resets, 0,
1425 "Number of times NIC was reset");
1428 * Performance related tunables
1430 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay",
1431 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I",
1432 "Interrupt coalescing delay in usecs");
1434 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle",
1435 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I",
1436 "Transmit throttling");
1438 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss",
1439 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I",
1442 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait",
1443 CTLFLAG_RW, &mxge_deassert_wait, 0,
1444 "Wait for IRQ line to go low in ihandler");
1447 * Stats block from firmware is in network byte order.
1450 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up",
1451 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0,
1452 mxge_handle_be32, "I", "link up");
1454 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available",
1455 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0,
1456 mxge_handle_be32, "I", "rdma_tags_available");
1458 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32",
1459 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0,
1460 mxge_handle_be32, "I", "dropped_bad_crc32");
1462 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy",
1463 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0,
1464 mxge_handle_be32, "I", "dropped_bad_phy");
1466 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered",
1467 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0,
1468 mxge_handle_be32, "I", "dropped_link_error_or_filtered");
1470 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow",
1471 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0,
1472 mxge_handle_be32, "I", "dropped_link_overflow");
1474 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered",
1475 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0,
1476 mxge_handle_be32, "I", "dropped_multicast_filtered");
1478 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer",
1479 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0,
1480 mxge_handle_be32, "I", "dropped_no_big_buffer");
1482 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer",
1483 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0,
1484 mxge_handle_be32, "I", "dropped_no_small_buffer");
1486 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun",
1487 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0,
1488 mxge_handle_be32, "I", "dropped_overrun");
1490 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause",
1491 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0,
1492 mxge_handle_be32, "I", "dropped_pause");
1494 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt",
1495 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0,
1496 mxge_handle_be32, "I", "dropped_runt");
1498 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered",
1499 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0,
1500 mxge_handle_be32, "I", "dropped_unicast_filtered");
1502 /* add counters exported for debugging from all slices */
1503 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1504 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx,
1505 children, OID_AUTO, "slice", CTLFLAG_RD, 0, "");
1506 if (sc->slice_sysctl_tree == NULL) {
1507 device_printf(sc->dev, "can't add slice sysctl node\n");
1511 for (slice = 0; slice < sc->num_slices; slice++) {
1512 ss = &sc->ss[slice];
1513 sysctl_ctx_init(&ss->sysctl_ctx);
1514 ctx = &ss->sysctl_ctx;
1515 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1516 ksprintf(slice_num, "%d", slice);
1517 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
1518 slice_num, CTLFLAG_RD, 0, "");
1519 if (ss->sysctl_tree == NULL) {
1520 device_printf(sc->dev,
1521 "can't add %d slice sysctl node\n", slice);
1522 return; /* XXX continue? */
1524 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1527 * XXX change to ULONG
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt",
1531 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt");
1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt",
1534 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt");
1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req",
1537 CTLFLAG_RD, &ss->tx.req, 0, "tx_req");
1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done",
1540 CTLFLAG_RD, &ss->tx.done, 0, "tx_done");
1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done",
1543 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done");
1545 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active",
1546 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active");
1548 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate",
1549 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate");
1551 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate",
1552 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate");
1557 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1558 * backwards one at a time and handle ring wraps
1560 static __inline void
1561 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1562 mcp_kreq_ether_send_t *src, int cnt)
1564 int idx, starting_slot;
1566 starting_slot = tx->req;
1569 idx = (starting_slot + cnt) & tx->mask;
1570 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
1576 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1577 * at most 32 bytes at a time, so as to avoid involving the software
1578 * pio handler in the nic. We re-write the first segment's flags
1579 * to mark them valid only after writing the entire chain
1581 static __inline void
1582 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1586 volatile uint32_t *dst_ints;
1587 mcp_kreq_ether_send_t *srcp;
1588 volatile mcp_kreq_ether_send_t *dstp, *dst;
1591 idx = tx->req & tx->mask;
1593 last_flags = src->flags;
1596 dst = dstp = &tx->lanai[idx];
1599 if ((idx + cnt) < tx->mask) {
1600 for (i = 0; i < cnt - 1; i += 2) {
1601 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1602 wmb(); /* force write every 32 bytes */
1608 * Submit all but the first request, and ensure
1609 * that it is submitted below
1611 mxge_submit_req_backwards(tx, src, cnt);
1615 /* Submit the first request */
1616 mxge_pio_copy(dstp, srcp, sizeof(*src));
1617 wmb(); /* barrier before setting valid flag */
1620 /* Re-write the last 32-bits with the valid flags */
1621 src->flags = last_flags;
1622 src_ints = (uint32_t *)src;
1624 dst_ints = (volatile uint32_t *)dst;
1626 *dst_ints = *src_ints;
1632 mxge_pullup_tso(struct mbuf **mp)
1634 int hoff, iphlen, thoff;
1638 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1640 iphlen = m->m_pkthdr.csum_iphlen;
1641 thoff = m->m_pkthdr.csum_thlen;
1642 hoff = m->m_pkthdr.csum_lhlen;
1644 KASSERT(iphlen > 0, ("invalid ip hlen"));
1645 KASSERT(thoff > 0, ("invalid tcp hlen"));
1646 KASSERT(hoff > 0, ("invalid ether hlen"));
1648 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1649 m = m_pullup(m, hoff + iphlen + thoff);
1660 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map,
1661 struct mbuf *m, int busdma_seg_cnt)
1663 mcp_kreq_ether_send_t *req;
1664 bus_dma_segment_t *seg;
1665 uint32_t low, high_swapped;
1666 int len, seglen, cum_len, cum_len_next;
1667 int next_is_first, chop, cnt, rdma_count, small;
1668 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1669 uint8_t flags, flags_next;
1670 struct mxge_buffer_state *info_last;
1671 bus_dmamap_t map = info_map->map;
1673 mss = m->m_pkthdr.tso_segsz;
1676 * Negative cum_len signifies to the send loop that we are
1677 * still in the header portion of the TSO packet.
1679 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen +
1680 m->m_pkthdr.csum_thlen);
1683 * TSO implies checksum offload on this hardware
1685 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1686 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1689 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1690 * out where to put the checksum by parsing the header.
1692 pseudo_hdr_offset = htobe16(mss);
1700 * "rdma_count" is the number of RDMAs belonging to the current
1701 * packet BEFORE the current send request. For non-TSO packets,
1702 * this is equal to "count".
1704 * For TSO packets, rdma_count needs to be reset to 0 after a
1707 * The rdma_count field of the send request is the number of
1708 * RDMAs of the packet starting at that request. For TSO send
1709 * requests with one ore more cuts in the middle, this is the
1710 * number of RDMAs starting after the last cut in the request.
1711 * All previous segments before the last cut implicitly have 1
1714 * Since the number of RDMAs is not known beforehand, it must be
1715 * filled-in retroactively - after each segmentation cut or at
1716 * the end of the entire packet.
1719 while (busdma_seg_cnt) {
1721 * Break the busdma segment up into pieces
1723 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1724 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1728 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1730 cum_len_next = cum_len + seglen;
1731 (req - rdma_count)->rdma_count = rdma_count + 1;
1732 if (__predict_true(cum_len >= 0)) {
1734 chop = (cum_len_next > mss);
1735 cum_len_next = cum_len_next % mss;
1736 next_is_first = (cum_len_next == 0);
1737 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1739 next_is_first * MXGEFW_FLAGS_FIRST;
1740 rdma_count |= -(chop | next_is_first);
1741 rdma_count += chop & !next_is_first;
1742 } else if (cum_len_next >= 0) {
1747 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1748 flags_next = MXGEFW_FLAGS_TSO_PLD |
1749 MXGEFW_FLAGS_FIRST |
1750 (small * MXGEFW_FLAGS_SMALL);
1753 req->addr_high = high_swapped;
1754 req->addr_low = htobe32(low);
1755 req->pseudo_hdr_offset = pseudo_hdr_offset;
1757 req->rdma_count = 1;
1758 req->length = htobe16(seglen);
1759 req->cksum_offset = cksum_offset;
1761 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1764 cum_len = cum_len_next;
1769 if (__predict_false(cksum_offset > seglen))
1770 cksum_offset -= seglen;
1773 if (__predict_false(cnt > tx->max_desc))
1779 (req - rdma_count)->rdma_count = rdma_count;
1783 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1784 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1786 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1788 info_map->map = info_last->map;
1789 info_last->map = map;
1792 mxge_submit_req(tx, tx->req_list, cnt);
1794 if (tx->send_go != NULL && tx->queue_active == 0) {
1795 /* Tell the NIC to start polling this slice */
1797 tx->queue_active = 1;
1804 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1810 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad)
1812 mcp_kreq_ether_send_t *req;
1813 bus_dma_segment_t *seg;
1815 int cnt, cum_len, err, i, idx, odd_flag;
1816 uint16_t pseudo_hdr_offset;
1817 uint8_t flags, cksum_offset;
1818 struct mxge_buffer_state *info_map, *info_last;
1820 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1821 err = mxge_pullup_tso(&m);
1822 if (__predict_false(err))
1827 * Map the frame for DMA
1829 idx = tx->req & tx->mask;
1830 info_map = &tx->info[idx];
1831 map = info_map->map;
1833 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m,
1834 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT);
1835 if (__predict_false(err != 0))
1837 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE);
1840 * TSO is different enough, we handle it in another routine
1842 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1843 return mxge_encap_tso(tx, info_map, m, cnt);
1847 pseudo_hdr_offset = 0;
1848 flags = MXGEFW_FLAGS_NO_TSO;
1851 * Checksum offloading
1853 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1854 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1855 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1856 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1857 req->cksum_offset = cksum_offset;
1858 flags |= MXGEFW_FLAGS_CKSUM;
1859 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1863 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1864 flags |= MXGEFW_FLAGS_SMALL;
1867 * Convert segments into a request list
1871 req->flags = MXGEFW_FLAGS_FIRST;
1872 for (i = 0; i < cnt; i++) {
1873 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1874 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1875 req->length = htobe16(seg->ds_len);
1876 req->cksum_offset = cksum_offset;
1877 if (cksum_offset > seg->ds_len)
1878 cksum_offset -= seg->ds_len;
1881 req->pseudo_hdr_offset = pseudo_hdr_offset;
1882 req->pad = 0; /* complete solid 16-byte block */
1883 req->rdma_count = 1;
1884 req->flags |= flags | ((cum_len & 1) * odd_flag);
1885 cum_len += seg->ds_len;
1893 * Pad runt to 60 bytes
1897 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad));
1898 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad));
1899 req->length = htobe16(60 - cum_len);
1900 req->cksum_offset = 0;
1901 req->pseudo_hdr_offset = pseudo_hdr_offset;
1902 req->pad = 0; /* complete solid 16-byte block */
1903 req->rdma_count = 1;
1904 req->flags |= flags | ((cum_len & 1) * odd_flag);
1908 tx->req_list[0].rdma_count = cnt;
1910 /* print what the firmware will see */
1911 for (i = 0; i < cnt; i++) {
1912 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1913 "cso:%d, flags:0x%x, rdma:%d\n",
1914 i, (int)ntohl(tx->req_list[i].addr_high),
1915 (int)ntohl(tx->req_list[i].addr_low),
1916 (int)ntohs(tx->req_list[i].length),
1917 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1918 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1919 tx->req_list[i].rdma_count);
1921 kprintf("--------------\n");
1923 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1925 info_map->map = info_last->map;
1926 info_last->map = map;
1929 mxge_submit_req(tx, tx->req_list, cnt);
1931 if (tx->send_go != NULL && tx->queue_active == 0) {
1932 /* Tell the NIC to start polling this slice */
1934 tx->queue_active = 1;
1946 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1948 mxge_softc_t *sc = ifp->if_softc;
1949 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1953 KKASSERT(tx->ifsq == ifsq);
1954 ASSERT_SERIALIZED(&tx->tx_serialize);
1956 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
1959 zeropad = sc->zeropad_dma.dmem_busaddr;
1960 while (tx->mask - (tx->req - tx->done) > tx->max_desc) {
1964 m = ifsq_dequeue(ifsq);
1969 error = mxge_encap(tx, m, zeropad);
1973 IFNET_STAT_INC(ifp, oerrors, 1);
1976 /* Ran out of transmit slots */
1977 ifsq_set_oactive(ifsq);
1980 tx->watchdog.wd_timer = 5;
1984 mxge_watchdog(struct ifaltq_subque *ifsq)
1986 struct ifnet *ifp = ifsq_get_ifp(ifsq);
1987 struct mxge_softc *sc = ifp->if_softc;
1988 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
1989 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1991 ASSERT_IFNET_SERIALIZED_ALL(ifp);
1993 /* Check for pause blocking before resetting */
1994 if (tx->watchdog_rx_pause == rx_pause) {
1995 mxge_warn_stuck(sc, tx, 0);
1996 mxge_watchdog_reset(sc);
1999 if_printf(ifp, "Flow control blocking xmits, "
2000 "check link partner\n");
2002 tx->watchdog_rx_pause = rx_pause;
2006 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2007 * at most 32 bytes at a time, so as to avoid involving the software
2008 * pio handler in the nic. We re-write the first segment's low
2009 * DMA address to mark it valid only after we write the entire chunk
2012 static __inline void
2013 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2014 mcp_kreq_ether_recv_t *src)
2018 low = src->addr_low;
2019 src->addr_low = 0xffffffff;
2020 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2022 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2024 src->addr_low = low;
2025 dst->addr_low = low;
2030 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2033 bus_dma_segment_t seg;
2035 int cnt, err, mflag;
2038 if (__predict_false(init))
2041 m = m_gethdr(mflag, MT_DATA);
2044 if (__predict_false(init)) {
2046 * During initialization, there
2047 * is nothing to setup; bail out
2053 m->m_len = m->m_pkthdr.len = MHLEN;
2055 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2056 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2059 if (__predict_false(init)) {
2061 * During initialization, there
2062 * is nothing to setup; bail out
2069 rx->info[idx].m = m;
2070 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2071 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2075 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2080 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2083 bus_dma_segment_t seg;
2085 int cnt, err, mflag;
2088 if (__predict_false(init))
2091 if (rx->cl_size == MCLBYTES)
2092 m = m_getcl(mflag, MT_DATA, M_PKTHDR);
2094 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
2097 if (__predict_false(init)) {
2099 * During initialization, there
2100 * is nothing to setup; bail out
2106 m->m_len = m->m_pkthdr.len = rx->cl_size;
2108 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2109 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2112 if (__predict_false(init)) {
2114 * During initialization, there
2115 * is nothing to setup; bail out
2122 rx->info[idx].m = m;
2123 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2124 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2128 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2133 * Myri10GE hardware checksums are not valid if the sender
2134 * padded the frame with non-zero padding. This is because
2135 * the firmware just does a simple 16-bit 1s complement
2136 * checksum across the entire frame, excluding the first 14
2137 * bytes. It is best to simply to check the checksum and
2138 * tell the stack about it only if the checksum is good
2140 static __inline uint16_t
2141 mxge_rx_csum(struct mbuf *m, int csum)
2143 const struct ether_header *eh;
2144 const struct ip *ip;
2147 eh = mtod(m, const struct ether_header *);
2149 /* Only deal with IPv4 TCP & UDP for now */
2150 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2153 ip = (const struct ip *)(eh + 1);
2154 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP))
2158 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2159 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2160 - (ip->ip_hl << 2) + ip->ip_p));
2169 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2171 struct ether_vlan_header *evl;
2174 evl = mtod(m, struct ether_vlan_header *);
2177 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2178 * what the firmware thought was the end of the ethernet
2182 /* Put checksum into host byte order */
2183 *csum = ntohs(*csum);
2185 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2187 *csum += ((*csum) < ~partial);
2188 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2189 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2192 * Restore checksum to network byte order;
2193 * later consumers expect this
2195 *csum = htons(*csum);
2198 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2199 m->m_flags |= M_VLANTAG;
2202 * Remove the 802.1q header by copying the Ethernet
2203 * addresses over it and adjusting the beginning of
2204 * the data in the mbuf. The encapsulated Ethernet
2205 * type field is already in place.
2207 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2208 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2209 m_adj(m, EVL_ENCAPLEN);
2213 static __inline void
2214 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx,
2215 uint32_t len, uint32_t csum)
2218 const struct ether_header *eh;
2219 bus_dmamap_t old_map;
2222 idx = rx->cnt & rx->mask;
2225 /* Save a pointer to the received mbuf */
2226 m = rx->info[idx].m;
2228 /* Try to replace the received mbuf */
2229 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) {
2230 /* Drop the frame -- the old mbuf is re-cycled */
2231 IFNET_STAT_INC(ifp, ierrors, 1);
2235 /* Unmap the received buffer */
2236 old_map = rx->info[idx].map;
2237 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2238 bus_dmamap_unload(rx->dmat, old_map);
2240 /* Swap the bus_dmamap_t's */
2241 rx->info[idx].map = rx->extra_map;
2242 rx->extra_map = old_map;
2245 * mcp implicitly skips 1st 2 bytes so that packet is properly
2248 m->m_data += MXGEFW_PAD;
2250 m->m_pkthdr.rcvif = ifp;
2251 m->m_len = m->m_pkthdr.len = len;
2253 IFNET_STAT_INC(ifp, ipackets, 1);
2255 eh = mtod(m, const struct ether_header *);
2256 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2257 mxge_vlan_tag_remove(m, &csum);
2259 /* If the checksum is valid, mark it in the mbuf header */
2260 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2261 mxge_rx_csum(m, csum) == 0) {
2262 /* Tell the stack that the checksum is good */
2263 m->m_pkthdr.csum_data = 0xffff;
2264 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2267 ifp->if_input(ifp, m, NULL, -1);
2270 static __inline void
2271 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx,
2272 uint32_t len, uint32_t csum)
2274 const struct ether_header *eh;
2276 bus_dmamap_t old_map;
2279 idx = rx->cnt & rx->mask;
2282 /* Save a pointer to the received mbuf */
2283 m = rx->info[idx].m;
2285 /* Try to replace the received mbuf */
2286 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) {
2287 /* Drop the frame -- the old mbuf is re-cycled */
2288 IFNET_STAT_INC(ifp, ierrors, 1);
2292 /* Unmap the received buffer */
2293 old_map = rx->info[idx].map;
2294 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2295 bus_dmamap_unload(rx->dmat, old_map);
2297 /* Swap the bus_dmamap_t's */
2298 rx->info[idx].map = rx->extra_map;
2299 rx->extra_map = old_map;
2302 * mcp implicitly skips 1st 2 bytes so that packet is properly
2305 m->m_data += MXGEFW_PAD;
2307 m->m_pkthdr.rcvif = ifp;
2308 m->m_len = m->m_pkthdr.len = len;
2310 IFNET_STAT_INC(ifp, ipackets, 1);
2312 eh = mtod(m, const struct ether_header *);
2313 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2314 mxge_vlan_tag_remove(m, &csum);
2316 /* If the checksum is valid, mark it in the mbuf header */
2317 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2318 mxge_rx_csum(m, csum) == 0) {
2319 /* Tell the stack that the checksum is good */
2320 m->m_pkthdr.csum_data = 0xffff;
2321 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2324 ifp->if_input(ifp, m, NULL, -1);
2327 static __inline void
2328 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle)
2330 mxge_rx_done_t *rx_done = &rx_data->rx_done;
2332 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) {
2333 uint16_t length, checksum;
2335 length = ntohs(rx_done->entry[rx_done->idx].length);
2336 rx_done->entry[rx_done->idx].length = 0;
2338 checksum = rx_done->entry[rx_done->idx].checksum;
2340 if (length <= MXGE_RX_SMALL_BUFLEN) {
2341 mxge_rx_done_small(ifp, &rx_data->rx_small,
2344 mxge_rx_done_big(ifp, &rx_data->rx_big,
2349 rx_done->idx &= rx_done->mask;
2354 static __inline void
2355 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx)
2357 ASSERT_SERIALIZED(&tx->tx_serialize);
2359 while (tx->pkt_done != mcp_idx) {
2363 idx = tx->done & tx->mask;
2366 m = tx->info[idx].m;
2368 * mbuf and DMA map only attached to the first
2373 IFNET_STAT_INC(ifp, opackets, 1);
2374 tx->info[idx].m = NULL;
2375 bus_dmamap_unload(tx->dmat, tx->info[idx].map);
2381 * If we have space, clear OACTIVE to tell the stack that
2382 * its OK to send packets
2384 if (tx->req - tx->done < (tx->mask + 1) / 2) {
2385 ifsq_clr_oactive(tx->ifsq);
2386 if (tx->req == tx->done) {
2387 /* Reset watchdog */
2388 tx->watchdog.wd_timer = 0;
2392 if (!ifsq_is_empty(tx->ifsq))
2393 ifsq_devstart(tx->ifsq);
2395 if (tx->send_stop != NULL && tx->req == tx->done) {
2397 * Let the NIC stop polling this queue, since there
2398 * are no more transmits pending
2401 tx->queue_active = 0;
2407 static struct mxge_media_type mxge_xfp_media_types[] = {
2408 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2409 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2410 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2411 {IFM_NONE, (1 << 5), "10GBASE-ER"},
2412 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2413 {IFM_NONE, (1 << 3), "10GBASE-SW"},
2414 {IFM_NONE, (1 << 2), "10GBASE-LW"},
2415 {IFM_NONE, (1 << 1), "10GBASE-EW"},
2416 {IFM_NONE, (1 << 0), "Reserved"}
2419 static struct mxge_media_type mxge_sfp_media_types[] = {
2420 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2421 {IFM_NONE, (1 << 7), "Reserved"},
2422 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2423 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2424 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2425 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2429 mxge_media_set(mxge_softc_t *sc, int media_type)
2433 if (media_type == IFM_NONE)
2437 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
2439 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL);
2440 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt);
2442 sc->current_media = media_type;
2446 mxge_media_unset(mxge_softc_t *sc)
2448 ifmedia_removeall(&sc->media);
2449 sc->current_media = IFM_NONE;
2453 mxge_media_init(mxge_softc_t *sc)
2458 mxge_media_unset(sc);
2461 * Parse the product code to deterimine the interface type
2462 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2463 * after the 3rd dash in the driver's cached copy of the
2464 * EEPROM's product code string.
2466 ptr = sc->product_code_string;
2468 if_printf(sc->ifp, "Missing product code\n");
2472 for (i = 0; i < 3; i++, ptr++) {
2473 ptr = strchr(ptr, '-');
2475 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i);
2479 if (*ptr == 'C' || *(ptr +1) == 'C') {
2481 sc->connector = MXGE_CX4;
2482 mxge_media_set(sc, IFM_10G_CX4);
2483 } else if (*ptr == 'Q') {
2484 /* -Q is Quad Ribbon Fiber */
2485 sc->connector = MXGE_QRF;
2486 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n");
2487 /* DragonFly has no media type for Quad ribbon fiber */
2488 } else if (*ptr == 'R') {
2490 sc->connector = MXGE_XFP;
2491 /* NOTE: ifmedia will be installed later */
2492 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2493 /* -S or -2S is SFP+ */
2494 sc->connector = MXGE_SFP;
2495 /* NOTE: ifmedia will be installed later */
2497 sc->connector = MXGE_UNK;
2498 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr);
2503 * Determine the media type for a NIC. Some XFPs will identify
2504 * themselves only when their link is up, so this is initiated via a
2505 * link up interrupt. However, this can potentially take up to
2506 * several milliseconds, so it is run via the watchdog routine, rather
2507 * than in the interrupt handler itself.
2510 mxge_media_probe(mxge_softc_t *sc)
2513 const char *cage_type;
2514 struct mxge_media_type *mxge_media_types = NULL;
2515 int i, err, ms, mxge_media_type_entries;
2518 sc->need_media_probe = 0;
2520 if (sc->connector == MXGE_XFP) {
2522 mxge_media_types = mxge_xfp_media_types;
2523 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
2524 byte = MXGE_XFP_COMPLIANCE_BYTE;
2526 } else if (sc->connector == MXGE_SFP) {
2527 /* -S or -2S is SFP+ */
2528 mxge_media_types = mxge_sfp_media_types;
2529 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
2533 /* nothing to do; media type cannot change */
2538 * At this point we know the NIC has an XFP cage, so now we
2539 * try to determine what is in the cage by using the
2540 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2541 * register. We read just one byte, which may take over
2545 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2547 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2548 if (err != MXGEFW_CMD_OK) {
2549 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE)
2550 if_printf(sc->ifp, "failed to read XFP\n");
2551 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT)
2552 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n");
2554 if_printf(sc->ifp, "I2C read failed, err: %d", err);
2555 mxge_media_unset(sc);
2559 /* Now we wait for the data to be cached */
2561 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2562 for (ms = 0; err == EBUSY && ms < 50; ms++) {
2565 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2567 if (err != MXGEFW_CMD_OK) {
2568 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n",
2569 cage_type, err, ms);
2570 mxge_media_unset(sc);
2574 if (cmd.data0 == mxge_media_types[0].bitmask) {
2576 if_printf(sc->ifp, "%s:%s\n", cage_type,
2577 mxge_media_types[0].name);
2579 if (sc->current_media != mxge_media_types[0].flag) {
2580 mxge_media_unset(sc);
2581 mxge_media_set(sc, mxge_media_types[0].flag);
2585 for (i = 1; i < mxge_media_type_entries; i++) {
2586 if (cmd.data0 & mxge_media_types[i].bitmask) {
2588 if_printf(sc->ifp, "%s:%s\n", cage_type,
2589 mxge_media_types[i].name);
2592 if (sc->current_media != mxge_media_types[i].flag) {
2593 mxge_media_unset(sc);
2594 mxge_media_set(sc, mxge_media_types[i].flag);
2599 mxge_media_unset(sc);
2601 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type,
2607 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats)
2609 if (sc->link_state != stats->link_up) {
2610 sc->link_state = stats->link_up;
2611 if (sc->link_state) {
2612 sc->ifp->if_link_state = LINK_STATE_UP;
2613 if_link_state_change(sc->ifp);
2615 if_printf(sc->ifp, "link up\n");
2617 sc->ifp->if_link_state = LINK_STATE_DOWN;
2618 if_link_state_change(sc->ifp);
2620 if_printf(sc->ifp, "link down\n");
2622 sc->need_media_probe = 1;
2625 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) {
2626 sc->rdma_tags_available = be32toh(stats->rdma_tags_available);
2627 if_printf(sc->ifp, "RDMA timed out! %d tags left\n",
2628 sc->rdma_tags_available);
2631 if (stats->link_down) {
2632 sc->down_cnt += stats->link_down;
2634 sc->ifp->if_link_state = LINK_STATE_DOWN;
2635 if_link_state_change(sc->ifp);
2640 mxge_serialize_skipmain(struct mxge_softc *sc)
2642 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
2646 mxge_deserialize_skipmain(struct mxge_softc *sc)
2648 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
2652 mxge_legacy(void *arg)
2654 struct mxge_slice_state *ss = arg;
2655 mxge_softc_t *sc = ss->sc;
2656 mcp_irq_data_t *stats = ss->fw_stats;
2657 mxge_tx_ring_t *tx = &ss->tx;
2658 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2659 uint32_t send_done_count;
2662 ASSERT_SERIALIZED(&sc->main_serialize);
2664 /* Make sure the DMA has finished */
2667 valid = stats->valid;
2669 /* Lower legacy IRQ */
2670 *sc->irq_deassert = 0;
2671 if (!mxge_deassert_wait) {
2672 /* Don't wait for conf. that irq is low */
2676 mxge_serialize_skipmain(sc);
2679 * Loop while waiting for legacy irq deassertion
2680 * XXX do we really want to loop?
2683 /* Check for transmit completes and receives */
2684 send_done_count = be32toh(stats->send_done_count);
2685 while ((send_done_count != tx->pkt_done) ||
2686 (rx_done->entry[rx_done->idx].length != 0)) {
2687 if (send_done_count != tx->pkt_done) {
2688 mxge_tx_done(&sc->arpcom.ac_if, tx,
2689 (int)send_done_count);
2691 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2692 send_done_count = be32toh(stats->send_done_count);
2694 if (mxge_deassert_wait)
2696 } while (*((volatile uint8_t *)&stats->valid));
2698 mxge_deserialize_skipmain(sc);
2700 /* Fw link & error stats meaningful only on the first slice */
2701 if (__predict_false(stats->stats_updated))
2702 mxge_intr_status(sc, stats);
2704 /* Check to see if we have rx token to pass back */
2706 *ss->irq_claim = be32toh(3);
2707 *(ss->irq_claim + 1) = be32toh(3);
2713 struct mxge_slice_state *ss = arg;
2714 mxge_softc_t *sc = ss->sc;
2715 mcp_irq_data_t *stats = ss->fw_stats;
2716 mxge_tx_ring_t *tx = &ss->tx;
2717 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2718 uint32_t send_done_count;
2720 #ifndef IFPOLL_ENABLE
2721 const boolean_t polling = FALSE;
2723 boolean_t polling = FALSE;
2726 ASSERT_SERIALIZED(&sc->main_serialize);
2728 /* Make sure the DMA has finished */
2729 if (__predict_false(!stats->valid))
2732 valid = stats->valid;
2735 #ifdef IFPOLL_ENABLE
2736 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2741 /* Check for receives */
2742 lwkt_serialize_enter(&ss->rx_data.rx_serialize);
2743 if (rx_done->entry[rx_done->idx].length != 0)
2744 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2745 lwkt_serialize_exit(&ss->rx_data.rx_serialize);
2749 * Check for transmit completes
2752 * Since pkt_done is only changed by mxge_tx_done(),
2753 * which is called only in interrupt handler, the
2754 * check w/o holding tx serializer is MPSAFE.
2756 send_done_count = be32toh(stats->send_done_count);
2757 if (send_done_count != tx->pkt_done) {
2758 lwkt_serialize_enter(&tx->tx_serialize);
2759 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2760 lwkt_serialize_exit(&tx->tx_serialize);
2763 if (__predict_false(stats->stats_updated))
2764 mxge_intr_status(sc, stats);
2766 /* Check to see if we have rx token to pass back */
2767 if (!polling && (valid & 0x1))
2768 *ss->irq_claim = be32toh(3);
2769 *(ss->irq_claim + 1) = be32toh(3);
2773 mxge_msix_rx(void *arg)
2775 struct mxge_slice_state *ss = arg;
2776 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2778 #ifdef IFPOLL_ENABLE
2779 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2783 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2785 if (rx_done->entry[rx_done->idx].length != 0)
2786 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1);
2788 *ss->irq_claim = be32toh(3);
2792 mxge_msix_rxtx(void *arg)
2794 struct mxge_slice_state *ss = arg;
2795 mxge_softc_t *sc = ss->sc;
2796 mcp_irq_data_t *stats = ss->fw_stats;
2797 mxge_tx_ring_t *tx = &ss->tx;
2798 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2799 uint32_t send_done_count;
2801 #ifndef IFPOLL_ENABLE
2802 const boolean_t polling = FALSE;
2804 boolean_t polling = FALSE;
2807 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2809 /* Make sure the DMA has finished */
2810 if (__predict_false(!stats->valid))
2813 valid = stats->valid;
2816 #ifdef IFPOLL_ENABLE
2817 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2821 /* Check for receives */
2822 if (!polling && rx_done->entry[rx_done->idx].length != 0)
2823 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2826 * Check for transmit completes
2829 * Since pkt_done is only changed by mxge_tx_done(),
2830 * which is called only in interrupt handler, the
2831 * check w/o holding tx serializer is MPSAFE.
2833 send_done_count = be32toh(stats->send_done_count);
2834 if (send_done_count != tx->pkt_done) {
2835 lwkt_serialize_enter(&tx->tx_serialize);
2836 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2837 lwkt_serialize_exit(&tx->tx_serialize);
2840 /* Check to see if we have rx token to pass back */
2841 if (!polling && (valid & 0x1))
2842 *ss->irq_claim = be32toh(3);
2843 *(ss->irq_claim + 1) = be32toh(3);
2847 mxge_init(void *arg)
2849 struct mxge_softc *sc = arg;
2851 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp);
2852 if ((sc->ifp->if_flags & IFF_RUNNING) == 0)
2857 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2861 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2862 if (ss->rx_data.rx_big.info[i].m == NULL)
2864 bus_dmamap_unload(ss->rx_data.rx_big.dmat,
2865 ss->rx_data.rx_big.info[i].map);
2866 m_freem(ss->rx_data.rx_big.info[i].m);
2867 ss->rx_data.rx_big.info[i].m = NULL;
2870 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2871 if (ss->rx_data.rx_small.info[i].m == NULL)
2873 bus_dmamap_unload(ss->rx_data.rx_small.dmat,
2874 ss->rx_data.rx_small.info[i].map);
2875 m_freem(ss->rx_data.rx_small.info[i].m);
2876 ss->rx_data.rx_small.info[i].m = NULL;
2879 /* Transmit ring used only on the first slice */
2880 if (ss->tx.info == NULL)
2883 for (i = 0; i <= ss->tx.mask; i++) {
2884 if (ss->tx.info[i].m == NULL)
2886 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map);
2887 m_freem(ss->tx.info[i].m);
2888 ss->tx.info[i].m = NULL;
2893 mxge_free_mbufs(mxge_softc_t *sc)
2897 for (slice = 0; slice < sc->num_slices; slice++)
2898 mxge_free_slice_mbufs(&sc->ss[slice]);
2902 mxge_free_slice_rings(struct mxge_slice_state *ss)
2906 if (ss->rx_data.rx_done.entry != NULL) {
2907 mxge_dma_free(&ss->rx_done_dma);
2908 ss->rx_data.rx_done.entry = NULL;
2911 if (ss->tx.req_list != NULL) {
2912 kfree(ss->tx.req_list, M_DEVBUF);
2913 ss->tx.req_list = NULL;
2916 if (ss->tx.seg_list != NULL) {
2917 kfree(ss->tx.seg_list, M_DEVBUF);
2918 ss->tx.seg_list = NULL;
2921 if (ss->rx_data.rx_small.shadow != NULL) {
2922 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF);
2923 ss->rx_data.rx_small.shadow = NULL;
2926 if (ss->rx_data.rx_big.shadow != NULL) {
2927 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF);
2928 ss->rx_data.rx_big.shadow = NULL;
2931 if (ss->tx.info != NULL) {
2932 if (ss->tx.dmat != NULL) {
2933 for (i = 0; i <= ss->tx.mask; i++) {
2934 bus_dmamap_destroy(ss->tx.dmat,
2935 ss->tx.info[i].map);
2937 bus_dma_tag_destroy(ss->tx.dmat);
2939 kfree(ss->tx.info, M_DEVBUF);
2943 if (ss->rx_data.rx_small.info != NULL) {
2944 if (ss->rx_data.rx_small.dmat != NULL) {
2945 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2946 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2947 ss->rx_data.rx_small.info[i].map);
2949 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2950 ss->rx_data.rx_small.extra_map);
2951 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
2953 kfree(ss->rx_data.rx_small.info, M_DEVBUF);
2954 ss->rx_data.rx_small.info = NULL;
2957 if (ss->rx_data.rx_big.info != NULL) {
2958 if (ss->rx_data.rx_big.dmat != NULL) {
2959 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2960 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2961 ss->rx_data.rx_big.info[i].map);
2963 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2964 ss->rx_data.rx_big.extra_map);
2965 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
2967 kfree(ss->rx_data.rx_big.info, M_DEVBUF);
2968 ss->rx_data.rx_big.info = NULL;
2973 mxge_free_rings(mxge_softc_t *sc)
2980 for (slice = 0; slice < sc->num_slices; slice++)
2981 mxge_free_slice_rings(&sc->ss[slice]);
2985 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2986 int tx_ring_entries)
2988 mxge_softc_t *sc = ss->sc;
2993 * Allocate per-slice receive resources
2996 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask =
2997 rx_ring_entries - 1;
2998 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1;
3000 /* Allocate the rx shadow rings */
3001 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow);
3002 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3004 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow);
3005 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3007 /* Allocate the rx host info rings */
3008 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info);
3009 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3011 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info);
3012 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3014 /* Allocate the rx busdma resources */
3015 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3017 4096, /* boundary */
3018 BUS_SPACE_MAXADDR, /* low */
3019 BUS_SPACE_MAXADDR, /* high */
3020 NULL, NULL, /* filter */
3021 MHLEN, /* maxsize */
3023 MHLEN, /* maxsegsize */
3024 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3026 &ss->rx_data.rx_small.dmat); /* tag */
3028 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3033 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK,
3034 &ss->rx_data.rx_small.extra_map);
3036 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err);
3037 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3038 ss->rx_data.rx_small.dmat = NULL;
3041 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3042 err = bus_dmamap_create(ss->rx_data.rx_small.dmat,
3043 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map);
3047 device_printf(sc->dev, "Err %d rx_small dmamap\n", err);
3049 for (j = 0; j < i; ++j) {
3050 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3051 ss->rx_data.rx_small.info[j].map);
3053 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3054 ss->rx_data.rx_small.extra_map);
3055 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3056 ss->rx_data.rx_small.dmat = NULL;
3061 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3063 4096, /* boundary */
3064 BUS_SPACE_MAXADDR, /* low */
3065 BUS_SPACE_MAXADDR, /* high */
3066 NULL, NULL, /* filter */
3069 4096, /* maxsegsize*/
3070 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3072 &ss->rx_data.rx_big.dmat); /* tag */
3074 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3079 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3080 &ss->rx_data.rx_big.extra_map);
3082 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err);
3083 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3084 ss->rx_data.rx_big.dmat = NULL;
3087 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3088 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3089 &ss->rx_data.rx_big.info[i].map);
3093 device_printf(sc->dev, "Err %d rx_big dmamap\n", err);
3094 for (j = 0; j < i; ++j) {
3095 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3096 ss->rx_data.rx_big.info[j].map);
3098 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3099 ss->rx_data.rx_big.extra_map);
3100 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3101 ss->rx_data.rx_big.dmat = NULL;
3107 * Now allocate TX resources
3110 ss->tx.mask = tx_ring_entries - 1;
3111 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3114 * Allocate the tx request copy block; MUST be at least 8 bytes
3117 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4);
3118 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes),
3119 M_DEVBUF, M_WAITOK);
3121 /* Allocate the tx busdma segment list */
3122 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc;
3123 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3125 /* Allocate the tx host info ring */
3126 bytes = tx_ring_entries * sizeof(*ss->tx.info);
3127 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3129 /* Allocate the tx busdma resources */
3130 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3132 sc->tx_boundary, /* boundary */
3133 BUS_SPACE_MAXADDR, /* low */
3134 BUS_SPACE_MAXADDR, /* high */
3135 NULL, NULL, /* filter */
3137 sizeof(struct ether_vlan_header),
3139 ss->tx.max_desc - 2, /* num segs */
3140 sc->tx_boundary, /* maxsegsz */
3141 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
3142 BUS_DMA_ONEBPAGE, /* flags */
3143 &ss->tx.dmat); /* tag */
3145 device_printf(sc->dev, "Err %d allocating tx dmat\n", err);
3150 * Now use these tags to setup DMA maps for each slot in the ring
3152 for (i = 0; i <= ss->tx.mask; i++) {
3153 err = bus_dmamap_create(ss->tx.dmat,
3154 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map);
3158 device_printf(sc->dev, "Err %d tx dmamap\n", err);
3159 for (j = 0; j < i; ++j) {
3160 bus_dmamap_destroy(ss->tx.dmat,
3161 ss->tx.info[j].map);
3163 bus_dma_tag_destroy(ss->tx.dmat);
3172 mxge_alloc_rings(mxge_softc_t *sc)
3176 int tx_ring_entries, rx_ring_entries;
3179 /* Get ring sizes */
3180 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3182 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3185 tx_ring_size = cmd.data0;
3187 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t);
3188 rx_ring_entries = sc->rx_intr_slots / 2;
3191 device_printf(sc->dev, "tx desc %d, rx desc %d\n",
3192 tx_ring_entries, rx_ring_entries);
3195 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices;
3196 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters;
3198 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3199 ifq_set_ready(&sc->ifp->if_snd);
3200 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings);
3202 if (sc->num_tx_rings > 1) {
3203 sc->ifp->if_mapsubq = ifq_mapsubq_mask;
3204 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1);
3207 for (slice = 0; slice < sc->num_slices; slice++) {
3208 err = mxge_alloc_slice_rings(&sc->ss[slice],
3209 rx_ring_entries, tx_ring_entries);
3211 device_printf(sc->dev,
3212 "alloc %d slice rings failed\n", slice);
3220 mxge_choose_params(int mtu, int *cl_size)
3222 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3224 if (bufsize < MCLBYTES) {
3225 *cl_size = MCLBYTES;
3227 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu));
3228 *cl_size = MJUMPAGESIZE;
3233 mxge_slice_open(struct mxge_slice_state *ss, int cl_size)
3238 slice = ss - ss->sc->ss;
3241 * Get the lanai pointers to the send and receive rings
3245 if (ss->sc->num_tx_rings == 1) {
3248 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET,
3250 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3251 (ss->sc->sram + cmd.data0);
3252 /* Leave send_go and send_stop as NULL */
3256 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3257 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3258 (ss->sc->sram + cmd.data0);
3259 ss->tx.send_go = (volatile uint32_t *)
3260 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3261 ss->tx.send_stop = (volatile uint32_t *)
3262 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3266 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3267 ss->rx_data.rx_small.lanai =
3268 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3271 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3272 ss->rx_data.rx_big.lanai =
3273 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3276 if_printf(ss->sc->ifp,
3277 "failed to get ring sizes or locations\n");
3282 * Stock small receive ring
3284 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3285 err = mxge_get_buf_small(&ss->rx_data.rx_small,
3286 ss->rx_data.rx_small.info[i].map, i, TRUE);
3288 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i,
3289 ss->rx_data.rx_small.mask + 1);
3295 * Stock big receive ring
3297 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3298 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff;
3299 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff;
3302 ss->rx_data.rx_big.cl_size = cl_size;
3304 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3305 err = mxge_get_buf_big(&ss->rx_data.rx_big,
3306 ss->rx_data.rx_big.info[i].map, i, TRUE);
3308 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i,
3309 ss->rx_data.rx_big.mask + 1);
3317 mxge_open(mxge_softc_t *sc)
3319 struct ifnet *ifp = sc->ifp;
3321 int err, slice, cl_size, i;
3323 volatile uint8_t *itable;
3324 struct mxge_slice_state *ss;
3326 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3328 /* Copy the MAC address in case it was overridden */
3329 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN);
3331 err = mxge_reset(sc, 1);
3333 if_printf(ifp, "failed to reset\n");
3337 if (sc->num_slices > 1) {
3338 /* Setup the indirection table */
3339 cmd.data0 = sc->num_slices;
3340 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd);
3342 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
3344 if_printf(ifp, "failed to setup rss tables\n");
3348 /* Just enable an identity mapping */
3349 itable = sc->sram + cmd.data0;
3350 for (i = 0; i < sc->num_slices; i++)
3351 itable[i] = (uint8_t)i;
3354 volatile uint8_t *hwkey;
3355 uint8_t swkey[MXGE_HWRSS_KEYLEN];
3357 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
3360 if_printf(ifp, "failed to get rsskey\n");
3363 hwkey = sc->sram + cmd.data0;
3365 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN);
3366 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i)
3367 hwkey[i] = swkey[i];
3370 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED,
3373 if_printf(ifp, "failed to update rsskey\n");
3377 if_printf(ifp, "RSS key updated\n");
3383 if_printf(ifp, "input hash: RSS\n");
3384 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 |
3385 MXGEFW_RSS_HASH_TYPE_TCP_IPV4;
3388 if_printf(ifp, "input hash: SRC_DST_PORT\n");
3389 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
3391 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3393 if_printf(ifp, "failed to enable slices\n");
3398 cmd.data0 = MXGEFW_TSO_MODE_NDIS;
3399 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd);
3402 * Can't change TSO mode to NDIS, never allow TSO then
3404 if_printf(ifp, "failed to set TSO mode\n");
3405 ifp->if_capenable &= ~IFCAP_TSO;
3406 ifp->if_capabilities &= ~IFCAP_TSO;
3407 ifp->if_hwassist &= ~CSUM_TSO;
3410 mxge_choose_params(ifp->if_mtu, &cl_size);
3413 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd);
3415 * Error is only meaningful if we're trying to set
3416 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3420 * Give the firmware the mtu and the big and small buffer
3421 * sizes. The firmware wants the big buf size to be a power
3422 * of two. Luckily, DragonFly's clusters are powers of two
3424 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3425 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3427 cmd.data0 = MXGE_RX_SMALL_BUFLEN;
3428 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
3430 cmd.data0 = cl_size;
3431 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3434 if_printf(ifp, "failed to setup params\n");
3438 /* Now give him the pointer to the stats block */
3439 for (slice = 0; slice < sc->num_slices; slice++) {
3440 ss = &sc->ss[slice];
3441 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3442 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3443 cmd.data2 = sizeof(struct mcp_irq_data);
3444 cmd.data2 |= (slice << 16);
3445 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3449 bus = sc->ss->fw_stats_dma.dmem_busaddr;
3450 bus += offsetof(struct mcp_irq_data, send_done_count);
3451 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3452 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3453 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3456 /* Firmware cannot support multicast without STATS_DMA_V2 */
3457 sc->fw_multicast_support = 0;
3459 sc->fw_multicast_support = 1;
3463 if_printf(ifp, "failed to setup params\n");
3467 for (slice = 0; slice < sc->num_slices; slice++) {
3468 err = mxge_slice_open(&sc->ss[slice], cl_size);
3470 if_printf(ifp, "couldn't open slice %d\n", slice);
3475 /* Finally, start the firmware running */
3476 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3478 if_printf(ifp, "Couldn't bring up link\n");
3482 ifp->if_flags |= IFF_RUNNING;
3483 for (i = 0; i < sc->num_tx_rings; ++i) {
3484 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3486 ifsq_clr_oactive(tx->ifsq);
3487 ifsq_watchdog_start(&tx->watchdog);
3493 mxge_free_mbufs(sc);
3498 mxge_close(mxge_softc_t *sc, int down)
3500 struct ifnet *ifp = sc->ifp;
3502 int err, old_down_cnt, i;
3504 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3507 old_down_cnt = sc->down_cnt;
3510 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3512 if_printf(ifp, "Couldn't bring down link\n");
3514 if (old_down_cnt == sc->down_cnt) {
3519 ifnet_deserialize_all(ifp);
3520 DELAY(10 * sc->intr_coal_delay);
3521 ifnet_serialize_all(ifp);
3525 if (old_down_cnt == sc->down_cnt)
3526 if_printf(ifp, "never got down irq\n");
3528 mxge_free_mbufs(sc);
3530 ifp->if_flags &= ~IFF_RUNNING;
3531 for (i = 0; i < sc->num_tx_rings; ++i) {
3532 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3534 ifsq_clr_oactive(tx->ifsq);
3535 ifsq_watchdog_stop(&tx->watchdog);
3540 mxge_setup_cfg_space(mxge_softc_t *sc)
3542 device_t dev = sc->dev;
3544 uint16_t lnk, pectl;
3546 /* Find the PCIe link width and set max read request to 4KB */
3547 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) {
3548 lnk = pci_read_config(dev, reg + 0x12, 2);
3549 sc->link_width = (lnk >> 4) & 0x3f;
3551 if (sc->pectl == 0) {
3552 pectl = pci_read_config(dev, reg + 0x8, 2);
3553 pectl = (pectl & ~0x7000) | (5 << 12);
3554 pci_write_config(dev, reg + 0x8, pectl, 2);
3557 /* Restore saved pectl after watchdog reset */
3558 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3562 /* Enable DMA and memory space access */
3563 pci_enable_busmaster(dev);
3567 mxge_read_reboot(mxge_softc_t *sc)
3569 device_t dev = sc->dev;
3572 /* Find the vendor specific offset */
3573 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3574 if_printf(sc->ifp, "could not find vendor specific offset\n");
3575 return (uint32_t)-1;
3577 /* Enable read32 mode */
3578 pci_write_config(dev, vs + 0x10, 0x3, 1);
3579 /* Tell NIC which register to read */
3580 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3581 return pci_read_config(dev, vs + 0x14, 4);
3585 mxge_watchdog_reset(mxge_softc_t *sc)
3587 struct pci_devinfo *dinfo;
3594 if_printf(sc->ifp, "Watchdog reset!\n");
3597 * Check to see if the NIC rebooted. If it did, then all of
3598 * PCI config space has been reset, and things like the
3599 * busmaster bit will be zero. If this is the case, then we
3600 * must restore PCI config space before the NIC can be used
3603 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3604 if (cmd == 0xffff) {
3606 * Maybe the watchdog caught the NIC rebooting; wait
3607 * up to 100ms for it to finish. If it does not come
3608 * back, then give up
3611 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3613 if_printf(sc->ifp, "NIC disappeared!\n");
3615 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3616 /* Print the reboot status */
3617 reboot = mxge_read_reboot(sc);
3618 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot);
3620 running = sc->ifp->if_flags & IFF_RUNNING;
3623 * Quiesce NIC so that TX routines will not try to
3624 * xmit after restoration of BAR
3627 /* Mark the link as down */
3628 if (sc->link_state) {
3629 sc->ifp->if_link_state = LINK_STATE_DOWN;
3630 if_link_state_change(sc->ifp);
3634 /* Restore PCI configuration space */
3635 dinfo = device_get_ivars(sc->dev);
3636 pci_cfg_restore(sc->dev, dinfo);
3638 /* And redo any changes we made to our config space */
3639 mxge_setup_cfg_space(sc);
3642 err = mxge_load_firmware(sc, 0);
3644 if_printf(sc->ifp, "Unable to re-load f/w\n");
3645 if (running && !err) {
3648 err = mxge_open(sc);
3650 for (i = 0; i < sc->num_tx_rings; ++i)
3651 ifsq_devstart_sched(sc->ss[i].tx.ifsq);
3653 sc->watchdog_resets++;
3655 if_printf(sc->ifp, "NIC did not reboot, not resetting\n");
3659 if_printf(sc->ifp, "watchdog reset failed\n");
3663 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3668 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3670 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice);
3671 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3672 tx->req, tx->done, tx->queue_active);
3673 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n",
3674 tx->activate, tx->deactivate);
3675 if_printf(sc->ifp, "pkt_done=%d fw=%d\n",
3676 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count));
3680 mxge_update_stats(mxge_softc_t *sc)
3682 u_long ipackets, opackets, pkts;
3684 IFNET_STAT_GET(sc->ifp, ipackets, ipackets);
3685 IFNET_STAT_GET(sc->ifp, opackets, opackets);
3687 pkts = ipackets - sc->ipackets;
3688 pkts += opackets - sc->opackets;
3690 sc->ipackets = ipackets;
3691 sc->opackets = opackets;
3697 mxge_tick(void *arg)
3699 mxge_softc_t *sc = arg;
3704 lwkt_serialize_enter(&sc->main_serialize);
3707 if (sc->ifp->if_flags & IFF_RUNNING) {
3708 /* Aggregate stats from different slices */
3709 pkts = mxge_update_stats(sc);
3710 if (sc->need_media_probe)
3711 mxge_media_probe(sc);
3716 /* Ensure NIC did not suffer h/w fault while idle */
3717 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3718 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3720 mxge_serialize_skipmain(sc);
3721 mxge_watchdog_reset(sc);
3722 mxge_deserialize_skipmain(sc);
3726 /* Look less often if NIC is idle */
3731 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3733 lwkt_serialize_exit(&sc->main_serialize);
3737 mxge_media_change(struct ifnet *ifp)
3739 mxge_softc_t *sc = ifp->if_softc;
3740 const struct ifmedia *ifm = &sc->media;
3743 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
3752 return mxge_change_pause(sc, pause);
3756 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3758 struct ifnet *ifp = sc->ifp;
3759 int real_mtu, old_mtu;
3762 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3763 if (mtu > sc->max_mtu || real_mtu < 60)
3766 old_mtu = ifp->if_mtu;
3768 if (ifp->if_flags & IFF_RUNNING) {
3770 err = mxge_open(sc);
3772 ifp->if_mtu = old_mtu;
3781 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3783 mxge_softc_t *sc = ifp->if_softc;
3785 ifmr->ifm_status = IFM_AVALID;
3786 ifmr->ifm_active = IFM_ETHER;
3789 ifmr->ifm_status |= IFM_ACTIVE;
3792 * Autoselect is not supported, so the current media
3793 * should be delivered.
3795 ifmr->ifm_active |= sc->current_media;
3796 if (sc->current_media != IFM_NONE) {
3797 ifmr->ifm_active |= MXGE_IFM;
3799 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
3804 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data,
3805 struct ucred *cr __unused)
3807 mxge_softc_t *sc = ifp->if_softc;
3808 struct ifreq *ifr = (struct ifreq *)data;
3811 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3816 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3823 if (ifp->if_flags & IFF_UP) {
3824 if (!(ifp->if_flags & IFF_RUNNING)) {
3825 err = mxge_open(sc);
3828 * Take care of PROMISC and ALLMULTI
3831 mxge_change_promisc(sc,
3832 ifp->if_flags & IFF_PROMISC);
3833 mxge_set_multicast_list(sc);
3836 if (ifp->if_flags & IFF_RUNNING)
3843 mxge_set_multicast_list(sc);
3847 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3848 if (mask & IFCAP_TXCSUM) {
3849 ifp->if_capenable ^= IFCAP_TXCSUM;
3850 if (ifp->if_capenable & IFCAP_TXCSUM)
3851 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
3853 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
3855 if (mask & IFCAP_TSO) {
3856 ifp->if_capenable ^= IFCAP_TSO;
3857 if (ifp->if_capenable & IFCAP_TSO)
3858 ifp->if_hwassist |= CSUM_TSO;
3860 ifp->if_hwassist &= ~CSUM_TSO;
3862 if (mask & IFCAP_RXCSUM)
3863 ifp->if_capenable ^= IFCAP_RXCSUM;
3864 if (mask & IFCAP_VLAN_HWTAGGING)
3865 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3870 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3871 &sc->media, command);
3875 err = ether_ioctl(ifp, command, data);
3882 mxge_fetch_tunables(mxge_softc_t *sc)
3886 sc->intr_coal_delay = mxge_intr_coal_delay;
3887 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000))
3888 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY;
3891 if (mxge_ticks == 0)
3892 mxge_ticks = hz / 2;
3894 ifm = ifmedia_str2ethfc(mxge_flowctrl);
3895 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE))
3898 sc->use_rss = mxge_use_rss;
3900 sc->throttle = mxge_throttle;
3901 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE)
3902 sc->throttle = MXGE_MAX_THROTTLE;
3903 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE)
3904 sc->throttle = MXGE_MIN_THROTTLE;
3908 mxge_free_slices(mxge_softc_t *sc)
3910 struct mxge_slice_state *ss;
3916 for (i = 0; i < sc->num_slices; i++) {
3918 if (ss->fw_stats != NULL) {
3919 mxge_dma_free(&ss->fw_stats_dma);
3920 ss->fw_stats = NULL;
3922 if (ss->rx_data.rx_done.entry != NULL) {
3923 mxge_dma_free(&ss->rx_done_dma);
3924 ss->rx_data.rx_done.entry = NULL;
3927 kfree(sc->ss, M_DEVBUF);
3932 mxge_alloc_slices(mxge_softc_t *sc)
3935 struct mxge_slice_state *ss;
3937 int err, i, rx_ring_size;
3939 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3941 device_printf(sc->dev, "Cannot determine rx ring size\n");
3944 rx_ring_size = cmd.data0;
3945 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t));
3947 bytes = sizeof(*sc->ss) * sc->num_slices;
3948 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO);
3950 for (i = 0; i < sc->num_slices; i++) {
3955 lwkt_serialize_init(&ss->rx_data.rx_serialize);
3956 lwkt_serialize_init(&ss->tx.tx_serialize);
3960 * Allocate per-slice rx interrupt queue
3961 * XXX assume 4bytes mcp_slot
3963 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t);
3964 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096);
3966 device_printf(sc->dev,
3967 "alloc %d slice rx_done failed\n", i);
3970 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr;
3973 * Allocate the per-slice firmware stats
3975 bytes = sizeof(*ss->fw_stats);
3976 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3977 sizeof(*ss->fw_stats), 64);
3979 device_printf(sc->dev,
3980 "alloc %d fw_stats failed\n", i);
3983 ss->fw_stats = ss->fw_stats_dma.dmem_addr;
3989 mxge_slice_probe(mxge_softc_t *sc)
3991 int status, max_intr_slots, max_slices, num_slices;
3992 int msix_cnt, msix_enable, i, multi_tx;
3997 sc->num_tx_rings = 1;
3999 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices);
4000 if (num_slices == 1)
4006 msix_enable = device_getenv_int(sc->dev, "msix.enable",
4011 msix_cnt = pci_msix_count(sc->dev);
4016 * Round down MSI-X vector count to the nearest power of 2
4019 while ((1 << (i + 1)) <= msix_cnt)
4024 * Now load the slice aware firmware see what it supports
4026 old_fw = sc->fw_name;
4027 if (old_fw == mxge_fw_aligned)
4028 sc->fw_name = mxge_fw_rss_aligned;
4030 sc->fw_name = mxge_fw_rss_unaligned;
4031 status = mxge_load_firmware(sc, 0);
4033 device_printf(sc->dev, "Falling back to a single slice\n");
4038 * Try to send a reset command to the card to see if it is alive
4040 memset(&cmd, 0, sizeof(cmd));
4041 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4043 device_printf(sc->dev, "failed reset\n");
4048 * Get rx ring size to calculate rx interrupt queue size
4050 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4052 device_printf(sc->dev, "Cannot determine rx ring size\n");
4055 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t));
4058 * Tell it the size of the rx interrupt queue
4060 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot);
4061 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4063 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4068 * Ask the maximum number of slices it supports
4070 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4072 device_printf(sc->dev,
4073 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4076 max_slices = cmd.data0;
4079 * Round down max slices count to the nearest power of 2
4082 while ((1 << (i + 1)) <= max_slices)
4084 max_slices = 1 << i;
4086 if (max_slices > msix_cnt)
4087 max_slices = msix_cnt;
4089 sc->num_slices = num_slices;
4090 sc->num_slices = if_ring_count2(sc->num_slices, max_slices);
4092 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx);
4094 sc->num_tx_rings = sc->num_slices;
4097 device_printf(sc->dev, "using %d slices, max %d\n",
4098 sc->num_slices, max_slices);
4101 if (sc->num_slices == 1)
4106 sc->fw_name = old_fw;
4107 mxge_load_firmware(sc, 0);
4111 mxge_setup_serialize(struct mxge_softc *sc)
4115 /* Main + rx + tx */
4116 sc->nserialize = (2 * sc->num_slices) + 1;
4118 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
4119 M_DEVBUF, M_WAITOK | M_ZERO);
4124 * NOTE: Order is critical
4127 KKASSERT(i < sc->nserialize);
4128 sc->serializes[i++] = &sc->main_serialize;
4130 for (slice = 0; slice < sc->num_slices; ++slice) {
4131 KKASSERT(i < sc->nserialize);
4132 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize;
4135 for (slice = 0; slice < sc->num_slices; ++slice) {
4136 KKASSERT(i < sc->nserialize);
4137 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize;
4140 KKASSERT(i == sc->nserialize);
4144 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4146 struct mxge_softc *sc = ifp->if_softc;
4148 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4152 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4154 struct mxge_softc *sc = ifp->if_softc;
4156 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4160 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4162 struct mxge_softc *sc = ifp->if_softc;
4164 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4170 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4171 boolean_t serialized)
4173 struct mxge_softc *sc = ifp->if_softc;
4175 ifnet_serialize_array_assert(sc->serializes, sc->nserialize,
4179 #endif /* INVARIANTS */
4181 #ifdef IFPOLL_ENABLE
4184 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle)
4186 struct mxge_slice_state *ss = xss;
4187 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
4189 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
4191 if (rx_done->entry[rx_done->idx].length != 0) {
4192 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle);
4196 * This register writting obviously has cost,
4197 * however, if we don't hand back the rx token,
4198 * the upcoming packets may suffer rediculously
4199 * large delay, as observed on 8AL-C using ping(8).
4201 *ss->irq_claim = be32toh(3);
4206 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4208 struct mxge_softc *sc = ifp->if_softc;
4215 * Only poll rx; polling tx and status don't seem to work
4217 for (i = 0; i < sc->num_slices; ++i) {
4218 struct mxge_slice_state *ss = &sc->ss[i];
4219 int idx = ss->intr_cpuid;
4221 KKASSERT(idx < ncpus2);
4222 info->ifpi_rx[idx].poll_func = mxge_npoll_rx;
4223 info->ifpi_rx[idx].arg = ss;
4224 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize;
4228 #endif /* IFPOLL_ENABLE */
4231 mxge_attach(device_t dev)
4233 mxge_softc_t *sc = device_get_softc(dev);
4234 struct ifnet *ifp = &sc->arpcom.ac_if;
4238 * Avoid rewriting half the lines in this file to use
4239 * &sc->arpcom.ac_if instead
4243 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4245 /* IFM_ETH_FORCEPAUSE can't be changed */
4246 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE,
4247 mxge_media_change, mxge_media_status);
4249 lwkt_serialize_init(&sc->main_serialize);
4251 mxge_fetch_tunables(sc);
4253 err = bus_dma_tag_create(NULL, /* parent */
4256 BUS_SPACE_MAXADDR, /* low */
4257 BUS_SPACE_MAXADDR, /* high */
4258 NULL, NULL, /* filter */
4259 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
4261 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
4263 &sc->parent_dmat); /* tag */
4265 device_printf(dev, "Err %d allocating parent dmat\n", err);
4269 callout_init_mp(&sc->co_hdl);
4271 mxge_setup_cfg_space(sc);
4274 * Map the board into the kernel
4277 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
4279 if (sc->mem_res == NULL) {
4280 device_printf(dev, "could not map memory\n");
4285 sc->sram = rman_get_virtual(sc->mem_res);
4286 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4287 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4288 device_printf(dev, "impossible memory region size %ld\n",
4289 rman_get_size(sc->mem_res));
4295 * Make NULL terminated copy of the EEPROM strings section of
4298 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4299 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4300 rman_get_bushandle(sc->mem_res),
4301 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4302 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2);
4303 err = mxge_parse_strings(sc);
4305 device_printf(dev, "parse EEPROM string failed\n");
4310 * Enable write combining for efficient use of PCIe bus
4315 * Allocate the out of band DMA memory
4317 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64);
4319 device_printf(dev, "alloc cmd DMA buf failed\n");
4322 sc->cmd = sc->cmd_dma.dmem_addr;
4324 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4326 device_printf(dev, "alloc zeropad DMA buf failed\n");
4330 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4332 device_printf(dev, "alloc dmabench DMA buf failed\n");
4336 /* Select & load the firmware */
4337 err = mxge_select_firmware(sc);
4339 device_printf(dev, "select firmware failed\n");
4343 mxge_slice_probe(sc);
4344 err = mxge_alloc_slices(sc);
4346 device_printf(dev, "alloc slices failed\n");
4350 err = mxge_alloc_intr(sc);
4352 device_printf(dev, "alloc intr failed\n");
4356 /* Setup serializes */
4357 mxge_setup_serialize(sc);
4359 err = mxge_reset(sc, 0);
4361 device_printf(dev, "reset failed\n");
4365 err = mxge_alloc_rings(sc);
4367 device_printf(dev, "failed to allocate rings\n");
4371 ifp->if_baudrate = IF_Gbps(10UL);
4372 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO;
4373 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4375 ifp->if_capabilities |= IFCAP_VLAN_MTU;
4377 /* Well, its software, sigh */
4378 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
4380 ifp->if_capenable = ifp->if_capabilities;
4383 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4384 ifp->if_init = mxge_init;
4385 ifp->if_ioctl = mxge_ioctl;
4386 ifp->if_start = mxge_start;
4387 #ifdef IFPOLL_ENABLE
4388 if (sc->intr_type != PCI_INTR_TYPE_LEGACY)
4389 ifp->if_npoll = mxge_npoll;
4391 ifp->if_serialize = mxge_serialize;
4392 ifp->if_deserialize = mxge_deserialize;
4393 ifp->if_tryserialize = mxge_tryserialize;
4395 ifp->if_serialize_assert = mxge_serialize_assert;
4398 /* Increase TSO burst length */
4399 ifp->if_tsolen = (32 * ETHERMTU);
4401 /* Initialise the ifmedia structure */
4402 mxge_media_init(sc);
4403 mxge_media_probe(sc);
4405 ether_ifattach(ifp, sc->mac_addr, NULL);
4407 /* Setup TX rings and subqueues */
4408 for (i = 0; i < sc->num_tx_rings; ++i) {
4409 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
4410 struct mxge_slice_state *ss = &sc->ss[i];
4412 ifsq_set_cpuid(ifsq, ss->intr_cpuid);
4413 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize);
4414 ifsq_set_priv(ifsq, &ss->tx);
4417 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog);
4422 * We are not ready to do "gather" jumbo frame, so
4423 * limit MTU to MJUMPAGESIZE
4425 sc->max_mtu = MJUMPAGESIZE -
4426 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1;
4429 err = mxge_setup_intr(sc);
4431 device_printf(dev, "alloc and setup intr failed\n");
4432 ether_ifdetach(ifp);
4436 mxge_add_sysctls(sc);
4438 /* Increase non-cluster mbuf limit; used by small RX rings */
4439 mb_inclimit(ifp->if_nmbclusters);
4441 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc,
4442 sc->ss[0].intr_cpuid);
4451 mxge_detach(device_t dev)
4453 mxge_softc_t *sc = device_get_softc(dev);
4455 if (device_is_attached(dev)) {
4456 struct ifnet *ifp = sc->ifp;
4457 int mblimit = ifp->if_nmbclusters;
4459 ifnet_serialize_all(ifp);
4462 if (ifp->if_flags & IFF_RUNNING)
4464 callout_stop(&sc->co_hdl);
4466 mxge_teardown_intr(sc, sc->num_slices);
4468 ifnet_deserialize_all(ifp);
4470 callout_terminate(&sc->co_hdl);
4472 ether_ifdetach(ifp);
4474 /* Decrease non-cluster mbuf limit increased by us */
4475 mb_inclimit(-mblimit);
4477 ifmedia_removeall(&sc->media);
4479 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL &&
4481 mxge_dummy_rdma(sc, 0);
4484 mxge_rem_sysctls(sc);
4485 mxge_free_rings(sc);
4487 /* MUST after sysctls, intr and rings are freed */
4488 mxge_free_slices(sc);
4490 if (sc->dmabench_dma.dmem_addr != NULL)
4491 mxge_dma_free(&sc->dmabench_dma);
4492 if (sc->zeropad_dma.dmem_addr != NULL)
4493 mxge_dma_free(&sc->zeropad_dma);
4494 if (sc->cmd_dma.dmem_addr != NULL)
4495 mxge_dma_free(&sc->cmd_dma);
4497 if (sc->msix_table_res != NULL) {
4498 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2),
4499 sc->msix_table_res);
4501 if (sc->mem_res != NULL) {
4502 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS,
4506 if (sc->parent_dmat != NULL)
4507 bus_dma_tag_destroy(sc->parent_dmat);
4513 mxge_shutdown(device_t dev)
4519 mxge_free_msix(struct mxge_softc *sc, boolean_t setup)
4523 KKASSERT(sc->num_slices > 1);
4525 for (i = 0; i < sc->num_slices; ++i) {
4526 struct mxge_slice_state *ss = &sc->ss[i];
4528 if (ss->intr_res != NULL) {
4529 bus_release_resource(sc->dev, SYS_RES_IRQ,
4530 ss->intr_rid, ss->intr_res);
4532 if (ss->intr_rid >= 0)
4533 pci_release_msix_vector(sc->dev, ss->intr_rid);
4536 pci_teardown_msix(sc->dev);
4540 mxge_alloc_msix(struct mxge_softc *sc)
4542 struct mxge_slice_state *ss;
4543 int offset, rid, error, i;
4544 boolean_t setup = FALSE;
4546 KKASSERT(sc->num_slices > 1);
4548 if (sc->num_slices == ncpus2) {
4553 offset_def = (sc->num_slices * device_get_unit(sc->dev)) %
4556 offset = device_getenv_int(sc->dev, "msix.offset", offset_def);
4557 if (offset >= ncpus2 ||
4558 offset % sc->num_slices != 0) {
4559 device_printf(sc->dev, "invalid msix.offset %d, "
4560 "use %d\n", offset, offset_def);
4561 offset = offset_def;
4567 ss->intr_serialize = &sc->main_serialize;
4568 ss->intr_func = mxge_msi;
4569 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4570 "%s comb", device_get_nameunit(sc->dev));
4571 ss->intr_desc = ss->intr_desc0;
4572 ss->intr_cpuid = offset;
4574 for (i = 1; i < sc->num_slices; ++i) {
4577 ss->intr_serialize = &ss->rx_data.rx_serialize;
4578 if (sc->num_tx_rings == 1) {
4579 ss->intr_func = mxge_msix_rx;
4580 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4581 "%s rx", device_get_nameunit(sc->dev));
4583 ss->intr_func = mxge_msix_rxtx;
4584 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4585 "%s rxtx", device_get_nameunit(sc->dev));
4587 ss->intr_desc = ss->intr_desc0;
4588 ss->intr_cpuid = offset + i;
4592 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4594 if (sc->msix_table_res == NULL) {
4595 device_printf(sc->dev, "couldn't alloc MSI-X table res\n");
4599 error = pci_setup_msix(sc->dev);
4601 device_printf(sc->dev, "could not setup MSI-X\n");
4606 for (i = 0; i < sc->num_slices; ++i) {
4609 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid,
4612 device_printf(sc->dev, "could not alloc "
4613 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid);
4617 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4618 &ss->intr_rid, RF_ACTIVE);
4619 if (ss->intr_res == NULL) {
4620 device_printf(sc->dev, "could not alloc "
4621 "MSI-X %d resource\n", i);
4627 pci_enable_msix(sc->dev);
4628 sc->intr_type = PCI_INTR_TYPE_MSIX;
4631 mxge_free_msix(sc, setup);
4636 mxge_alloc_intr(struct mxge_softc *sc)
4638 struct mxge_slice_state *ss;
4641 if (sc->num_slices > 1) {
4644 error = mxge_alloc_msix(sc);
4647 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX);
4653 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable,
4654 &ss->intr_rid, &irq_flags);
4656 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4657 &ss->intr_rid, irq_flags);
4658 if (ss->intr_res == NULL) {
4659 device_printf(sc->dev, "could not alloc interrupt\n");
4663 if (sc->intr_type == PCI_INTR_TYPE_LEGACY)
4664 ss->intr_func = mxge_legacy;
4666 ss->intr_func = mxge_msi;
4667 ss->intr_serialize = &sc->main_serialize;
4668 ss->intr_cpuid = rman_get_cpuid(ss->intr_res);
4674 mxge_setup_intr(struct mxge_softc *sc)
4678 for (i = 0; i < sc->num_slices; ++i) {
4679 struct mxge_slice_state *ss = &sc->ss[i];
4682 error = bus_setup_intr_descr(sc->dev, ss->intr_res,
4683 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand,
4684 ss->intr_serialize, ss->intr_desc);
4686 device_printf(sc->dev, "can't setup %dth intr\n", i);
4687 mxge_teardown_intr(sc, i);
4695 mxge_teardown_intr(struct mxge_softc *sc, int cnt)
4702 for (i = 0; i < cnt; ++i) {
4703 struct mxge_slice_state *ss = &sc->ss[i];
4705 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand);
4710 mxge_free_intr(struct mxge_softc *sc)
4715 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4716 struct mxge_slice_state *ss = &sc->ss[0];
4718 if (ss->intr_res != NULL) {
4719 bus_release_resource(sc->dev, SYS_RES_IRQ,
4720 ss->intr_rid, ss->intr_res);
4722 if (sc->intr_type == PCI_INTR_TYPE_MSI)
4723 pci_release_msi(sc->dev);
4725 mxge_free_msix(sc, TRUE);