get mxge to build, stage 12/many
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
CommitLineData
8892ea20
AE
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
b3535a6f 31/*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
8892ea20
AE
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
8892ea20
AE
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/module.h>
44#include <sys/socket.h>
45#include <sys/sysctl.h>
8892ea20
AE
46
47/* count xmits ourselves, rather than via drbr */
48#define NO_SLOW_STATS
49#include <net/if.h>
50#include <net/if_arp.h>
51#include <net/ethernet.h>
52#include <net/if_dl.h>
53#include <net/if_media.h>
54
55#include <net/bpf.h>
56
57#include <net/if_types.h>
b3535a6f 58#include <net/vlan/if_vlan_var.h>
8892ea20
AE
59#include <net/zlib.h>
60
61#include <netinet/in_systm.h>
62#include <netinet/in.h>
63#include <netinet/ip.h>
64#include <netinet/tcp.h>
65
8892ea20
AE
66#include <machine/resource.h>
67#include <sys/bus.h>
68#include <sys/rman.h>
8892ea20 69
b3535a6f
AE
70#include <bus/pci/pcireg.h>
71#include <bus/pci/pcivar.h>
72#include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
8892ea20
AE
73
74#include <vm/vm.h> /* for pmap_mapdev() */
75#include <vm/pmap.h>
76
77#if defined(__i386) || defined(__amd64)
78#include <machine/specialreg.h>
79#endif
80
b3535a6f
AE
81#include <dev/netif/mxge/mxge_mcp.h>
82#include <dev/netif/mxge/mcp_gen_header.h>
8892ea20 83/*#define MXGE_FAKE_IFP*/
b3535a6f 84#include <dev/netif/mxge/if_mxge_var.h>
8892ea20
AE
85#ifdef IFNET_BUF_RING
86#include <sys/buf_ring.h>
87#endif
88
89#include "opt_inet.h"
90
91/* tunable params */
92static int mxge_nvidia_ecrc_enable = 1;
93static int mxge_force_firmware = 0;
94static int mxge_intr_coal_delay = 30;
95static int mxge_deassert_wait = 1;
96static int mxge_flow_control = 1;
97static int mxge_verbose = 0;
98static int mxge_lro_cnt = 8;
99static int mxge_ticks;
100static int mxge_max_slices = 1;
101static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102static int mxge_always_promisc = 0;
103static int mxge_initial_mtu = ETHERMTU_JUMBO;
104static char *mxge_fw_unaligned = "mxge_ethp_z8e";
105static char *mxge_fw_aligned = "mxge_eth_z8e";
106static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
107static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
108
109static int mxge_probe(device_t dev);
110static int mxge_attach(device_t dev);
111static int mxge_detach(device_t dev);
112static int mxge_shutdown(device_t dev);
113static void mxge_intr(void *arg);
114
115static device_method_t mxge_methods[] =
116{
117 /* Device interface */
118 DEVMETHOD(device_probe, mxge_probe),
119 DEVMETHOD(device_attach, mxge_attach),
120 DEVMETHOD(device_detach, mxge_detach),
121 DEVMETHOD(device_shutdown, mxge_shutdown),
122 {0, 0}
123};
124
125static driver_t mxge_driver =
126{
127 "mxge",
128 mxge_methods,
129 sizeof(mxge_softc_t),
130};
131
132static devclass_t mxge_devclass;
133
134/* Declare ourselves to be a child of the PCI bus.*/
135DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
136MODULE_DEPEND(mxge, firmware, 1, 1, 1);
137MODULE_DEPEND(mxge, zlib, 1, 1, 1);
138
139static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
140static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
141static int mxge_close(mxge_softc_t *sc);
142static int mxge_open(mxge_softc_t *sc);
143static void mxge_tick(void *arg);
144
145static int
146mxge_probe(device_t dev)
147{
148 int rev;
149
150
151 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
152 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
153 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
154 rev = pci_get_revid(dev);
155 switch (rev) {
156 case MXGE_PCI_REV_Z8E:
157 device_set_desc(dev, "Myri10G-PCIE-8A");
158 break;
159 case MXGE_PCI_REV_Z8ES:
160 device_set_desc(dev, "Myri10G-PCIE-8B");
161 break;
162 default:
163 device_set_desc(dev, "Myri10G-PCIE-8??");
164 device_printf(dev, "Unrecognized rev %d NIC\n",
165 rev);
166 break;
167 }
168 return 0;
169 }
170 return ENXIO;
171}
172
173static void
174mxge_enable_wc(mxge_softc_t *sc)
175{
176#if defined(__i386) || defined(__amd64)
177 vm_offset_t len;
178 int err;
179
180 sc->wc = 1;
181 len = rman_get_size(sc->mem_res);
182 err = pmap_change_attr((vm_offset_t) sc->sram,
183 len, PAT_WRITE_COMBINING);
184 if (err != 0) {
185 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
186 err);
187 sc->wc = 0;
188 }
189#endif
190}
191
192
193/* callback to get our DMA address */
194static void
195mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
196 int error)
197{
198 if (error == 0) {
199 *(bus_addr_t *) arg = segs->ds_addr;
200 }
201}
202
203static int
204mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
205 bus_size_t alignment)
206{
207 int err;
208 device_t dev = sc->dev;
209 bus_size_t boundary, maxsegsize;
210
211 if (bytes > 4096 && alignment == 4096) {
212 boundary = 0;
213 maxsegsize = bytes;
214 } else {
215 boundary = 4096;
216 maxsegsize = 4096;
217 }
218
219 /* allocate DMAable memory tags */
220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
221 alignment, /* alignment */
222 boundary, /* boundary */
223 BUS_SPACE_MAXADDR, /* low */
224 BUS_SPACE_MAXADDR, /* high */
225 NULL, NULL, /* filter */
226 bytes, /* maxsize */
227 1, /* num segs */
228 maxsegsize, /* maxsegsize */
229 BUS_DMA_COHERENT, /* flags */
230 NULL, NULL, /* lock */
231 &dma->dmat); /* tag */
232 if (err != 0) {
233 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
234 return err;
235 }
236
237 /* allocate DMAable memory & map */
238 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
239 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
240 | BUS_DMA_ZERO), &dma->map);
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
243 goto abort_with_dmat;
244 }
245
246 /* load the memory */
247 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
248 mxge_dmamap_callback,
249 (void *)&dma->bus_addr, 0);
250 if (err != 0) {
251 device_printf(dev, "couldn't load map (err = %d)\n", err);
252 goto abort_with_mem;
253 }
254 return 0;
255
256abort_with_mem:
257 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
258abort_with_dmat:
259 (void)bus_dma_tag_destroy(dma->dmat);
260 return err;
261}
262
263
264static void
265mxge_dma_free(mxge_dma_t *dma)
266{
267 bus_dmamap_unload(dma->dmat, dma->map);
268 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
270}
271
272/*
273 * The eeprom strings on the lanaiX have the format
274 * SN=x\0
275 * MAC=x:x:x:x:x:x\0
276 * PC=text\0
277 */
278
279static int
280mxge_parse_strings(mxge_softc_t *sc)
281{
282#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
283
284 char *ptr, *limit;
285 int i, found_mac;
286
287 ptr = sc->eeprom_strings;
288 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
289 found_mac = 0;
290 while (ptr < limit && *ptr != '\0') {
291 if (memcmp(ptr, "MAC=", 4) == 0) {
292 ptr += 1;
293 sc->mac_addr_string = ptr;
294 for (i = 0; i < 6; i++) {
295 ptr += 3;
296 if ((ptr + 2) > limit)
297 goto abort;
298 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
299 found_mac = 1;
300 }
301 } else if (memcmp(ptr, "PC=", 3) == 0) {
302 ptr += 3;
303 strncpy(sc->product_code_string, ptr,
304 sizeof (sc->product_code_string) - 1);
305 } else if (memcmp(ptr, "SN=", 3) == 0) {
306 ptr += 3;
307 strncpy(sc->serial_number_string, ptr,
308 sizeof (sc->serial_number_string) - 1);
309 }
310 MXGE_NEXT_STRING(ptr);
311 }
312
313 if (found_mac)
314 return 0;
315
316 abort:
317 device_printf(sc->dev, "failed to parse eeprom_strings\n");
318
319 return ENXIO;
320}
321
322#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
323static void
324mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
325{
326 uint32_t val;
327 unsigned long base, off;
328 char *va, *cfgptr;
329 device_t pdev, mcp55;
330 uint16_t vendor_id, device_id, word;
331 uintptr_t bus, slot, func, ivend, idev;
332 uint32_t *ptr32;
333
334
335 if (!mxge_nvidia_ecrc_enable)
336 return;
337
338 pdev = device_get_parent(device_get_parent(sc->dev));
339 if (pdev == NULL) {
340 device_printf(sc->dev, "could not find parent?\n");
341 return;
342 }
343 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
344 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
345
346 if (vendor_id != 0x10de)
347 return;
348
349 base = 0;
350
351 if (device_id == 0x005d) {
352 /* ck804, base address is magic */
353 base = 0xe0000000UL;
354 } else if (device_id >= 0x0374 && device_id <= 0x378) {
355 /* mcp55, base address stored in chipset */
356 mcp55 = pci_find_bsf(0, 0, 0);
357 if (mcp55 &&
358 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
359 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
360 word = pci_read_config(mcp55, 0x90, 2);
361 base = ((unsigned long)word & 0x7ffeU) << 25;
362 }
363 }
364 if (!base)
365 return;
366
367 /* XXXX
368 Test below is commented because it is believed that doing
369 config read/write beyond 0xff will access the config space
370 for the next larger function. Uncomment this and remove
371 the hacky pmap_mapdev() way of accessing config space when
372 FreeBSD grows support for extended pcie config space access
373 */
374#if 0
375 /* See if we can, by some miracle, access the extended
376 config space */
377 val = pci_read_config(pdev, 0x178, 4);
378 if (val != 0xffffffff) {
379 val |= 0x40;
380 pci_write_config(pdev, 0x178, val, 4);
381 return;
382 }
383#endif
384 /* Rather than using normal pci config space writes, we must
385 * map the Nvidia config space ourselves. This is because on
386 * opteron/nvidia class machine the 0xe000000 mapping is
387 * handled by the nvidia chipset, that means the internal PCI
388 * device (the on-chip northbridge), or the amd-8131 bridge
389 * and things behind them are not visible by this method.
390 */
391
392 BUS_READ_IVAR(device_get_parent(pdev), pdev,
393 PCI_IVAR_BUS, &bus);
394 BUS_READ_IVAR(device_get_parent(pdev), pdev,
395 PCI_IVAR_SLOT, &slot);
396 BUS_READ_IVAR(device_get_parent(pdev), pdev,
397 PCI_IVAR_FUNCTION, &func);
398 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 PCI_IVAR_VENDOR, &ivend);
400 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 PCI_IVAR_DEVICE, &idev);
402
403 off = base
404 + 0x00100000UL * (unsigned long)bus
405 + 0x00001000UL * (unsigned long)(func
406 + 8 * slot);
407
408 /* map it into the kernel */
409 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
410
411
412 if (va == NULL) {
413 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
414 return;
415 }
416 /* get a pointer to the config space mapped into the kernel */
417 cfgptr = va + (off & PAGE_MASK);
418
419 /* make sure that we can really access it */
420 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
421 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
422 if (! (vendor_id == ivend && device_id == idev)) {
423 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
424 vendor_id, device_id);
425 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
426 return;
427 }
428
429 ptr32 = (uint32_t*)(cfgptr + 0x178);
430 val = *ptr32;
431
432 if (val == 0xffffffff) {
433 device_printf(sc->dev, "extended mapping failed\n");
434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
435 return;
436 }
437 *ptr32 = val | 0x40;
438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
439 if (mxge_verbose)
440 device_printf(sc->dev,
441 "Enabled ECRC on upstream Nvidia bridge "
442 "at %d:%d:%d\n",
443 (int)bus, (int)slot, (int)func);
444 return;
445}
446#else
447static void
448mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
449{
450 device_printf(sc->dev,
451 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
452 return;
453}
454#endif
455
456
457static int
458mxge_dma_test(mxge_softc_t *sc, int test_type)
459{
460 mxge_cmd_t cmd;
461 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
462 int status;
463 uint32_t len;
464 char *test = " ";
465
466
467 /* Run a small DMA test.
468 * The magic multipliers to the length tell the firmware
469 * to do DMA read, write, or read+write tests. The
470 * results are returned in cmd.data0. The upper 16
471 * bits of the return is the number of transfers completed.
472 * The lower 16 bits is the time in 0.5us ticks that the
473 * transfers took to complete.
474 */
475
476 len = sc->tx_boundary;
477
478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
480 cmd.data2 = len * 0x10000;
481 status = mxge_send_cmd(sc, test_type, &cmd);
482 if (status != 0) {
483 test = "read";
484 goto abort;
485 }
486 sc->read_dma = ((cmd.data0>>16) * len * 2) /
487 (cmd.data0 & 0xffff);
488 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
489 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
490 cmd.data2 = len * 0x1;
491 status = mxge_send_cmd(sc, test_type, &cmd);
492 if (status != 0) {
493 test = "write";
494 goto abort;
495 }
496 sc->write_dma = ((cmd.data0>>16) * len * 2) /
497 (cmd.data0 & 0xffff);
498
499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
501 cmd.data2 = len * 0x10001;
502 status = mxge_send_cmd(sc, test_type, &cmd);
503 if (status != 0) {
504 test = "read/write";
505 goto abort;
506 }
507 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
508 (cmd.data0 & 0xffff);
509
510abort:
511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
512 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
513 test, status);
514
515 return status;
516}
517
518/*
519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
520 * when the PCI-E Completion packets are aligned on an 8-byte
521 * boundary. Some PCI-E chip sets always align Completion packets; on
522 * the ones that do not, the alignment can be enforced by enabling
523 * ECRC generation (if supported).
524 *
525 * When PCI-E Completion packets are not aligned, it is actually more
526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
527 *
528 * If the driver can neither enable ECRC nor verify that it has
529 * already been enabled, then it must use a firmware image which works
530 * around unaligned completion packets (ethp_z8e.dat), and it should
531 * also ensure that it never gives the device a Read-DMA which is
532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
533 * enabled, then the driver should use the aligned (eth_z8e.dat)
534 * firmware image, and set tx_boundary to 4KB.
535 */
536
537static int
538mxge_firmware_probe(mxge_softc_t *sc)
539{
540 device_t dev = sc->dev;
541 int reg, status;
542 uint16_t pectl;
543
544 sc->tx_boundary = 4096;
545 /*
546 * Verify the max read request size was set to 4KB
547 * before trying the test with 4KB.
548 */
549 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
550 pectl = pci_read_config(dev, reg + 0x8, 2);
551 if ((pectl & (5 << 12)) != (5 << 12)) {
552 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
553 pectl);
554 sc->tx_boundary = 2048;
555 }
556 }
557
558 /*
559 * load the optimized firmware (which assumes aligned PCIe
560 * completions) in order to see if it works on this host.
561 */
562 sc->fw_name = mxge_fw_aligned;
563 status = mxge_load_firmware(sc, 1);
564 if (status != 0) {
565 return status;
566 }
567
568 /*
569 * Enable ECRC if possible
570 */
571 mxge_enable_nvidia_ecrc(sc);
572
573 /*
574 * Run a DMA test which watches for unaligned completions and
575 * aborts on the first one seen.
576 */
577
578 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
579 if (status == 0)
580 return 0; /* keep the aligned firmware */
581
582 if (status != E2BIG)
583 device_printf(dev, "DMA test failed: %d\n", status);
584 if (status == ENOSYS)
585 device_printf(dev, "Falling back to ethp! "
586 "Please install up to date fw\n");
587 return status;
588}
589
590static int
591mxge_select_firmware(mxge_softc_t *sc)
592{
593 int aligned = 0;
594
595
596 if (mxge_force_firmware != 0) {
597 if (mxge_force_firmware == 1)
598 aligned = 1;
599 else
600 aligned = 0;
601 if (mxge_verbose)
602 device_printf(sc->dev,
603 "Assuming %s completions (forced)\n",
604 aligned ? "aligned" : "unaligned");
605 goto abort;
606 }
607
608 /* if the PCIe link width is 4 or less, we can use the aligned
609 firmware and skip any checks */
610 if (sc->link_width != 0 && sc->link_width <= 4) {
611 device_printf(sc->dev,
612 "PCIe x%d Link, expect reduced performance\n",
613 sc->link_width);
614 aligned = 1;
615 goto abort;
616 }
617
618 if (0 == mxge_firmware_probe(sc))
619 return 0;
620
621abort:
622 if (aligned) {
623 sc->fw_name = mxge_fw_aligned;
624 sc->tx_boundary = 4096;
625 } else {
626 sc->fw_name = mxge_fw_unaligned;
627 sc->tx_boundary = 2048;
628 }
629 return (mxge_load_firmware(sc, 0));
630}
631
632union qualhack
633{
634 const char *ro_char;
635 char *rw_char;
636};
637
638static int
639mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
640{
641
642
643 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
644 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
645 be32toh(hdr->mcp_type));
646 return EIO;
647 }
648
649 /* save firmware version for sysctl */
650 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
651 if (mxge_verbose)
652 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
653
b6670ba0 654 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
8892ea20
AE
655 &sc->fw_ver_minor, &sc->fw_ver_tiny);
656
657 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
658 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
659 device_printf(sc->dev, "Found firmware version %s\n",
660 sc->fw_version);
661 device_printf(sc->dev, "Driver needs %d.%d\n",
662 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
663 return EINVAL;
664 }
665 return 0;
666
667}
668
669static void *
670z_alloc(void *nil, u_int items, u_int size)
671{
672 void *ptr;
673
d777b84f 674 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
8892ea20
AE
675 return ptr;
676}
677
678static void
679z_free(void *nil, void *ptr)
680{
d777b84f 681 kfree(ptr, M_TEMP);
8892ea20
AE
682}
683
684
685static int
686mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
687{
688 z_stream zs;
689 char *inflate_buffer;
690 const struct firmware *fw;
691 const mcp_gen_header_t *hdr;
692 unsigned hdr_offset;
693 int status;
694 unsigned int i;
695 char dummy;
696 size_t fw_len;
697
698 fw = firmware_get(sc->fw_name);
699 if (fw == NULL) {
700 device_printf(sc->dev, "Could not find firmware image %s\n",
701 sc->fw_name);
702 return ENOENT;
703 }
704
705
706
707 /* setup zlib and decompress f/w */
708 bzero(&zs, sizeof (zs));
709 zs.zalloc = z_alloc;
710 zs.zfree = z_free;
711 status = inflateInit(&zs);
712 if (status != Z_OK) {
713 status = EIO;
714 goto abort_with_fw;
715 }
716
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len = (size_t) fw->version;
d777b84f 720 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
8892ea20
AE
721 if (inflate_buffer == NULL)
722 goto abort_with_zs;
723 zs.avail_in = fw->datasize;
724 zs.next_in = __DECONST(char *, fw->data);
725 zs.avail_out = fw_len;
726 zs.next_out = inflate_buffer;
727 status = inflate(&zs, Z_FINISH);
728 if (status != Z_STREAM_END) {
729 device_printf(sc->dev, "zlib %d\n", status);
730 status = EIO;
731 goto abort_with_buffer;
732 }
733
734 /* check id */
735 hdr_offset = htobe32(*(const uint32_t *)
736 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
738 device_printf(sc->dev, "Bad firmware file");
739 status = EIO;
740 goto abort_with_buffer;
741 }
742 hdr = (const void*)(inflate_buffer + hdr_offset);
743
744 status = mxge_validate_firmware(sc, hdr);
745 if (status != 0)
746 goto abort_with_buffer;
747
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i = 0; i < fw_len; i += 256) {
750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
751 inflate_buffer + i,
752 min(256U, (unsigned)(fw_len - i)));
753 wmb();
754 dummy = *sc->sram;
755 wmb();
756 }
757
758 *limit = fw_len;
759 status = 0;
760abort_with_buffer:
d777b84f 761 kfree(inflate_buffer, M_TEMP);
8892ea20
AE
762abort_with_zs:
763 inflateEnd(&zs);
764abort_with_fw:
765 firmware_put(fw, FIRMWARE_UNLOAD);
766 return status;
767}
768
769/*
770 * Enable or disable periodic RDMAs from the host to make certain
771 * chipsets resend dropped PCIe messages
772 */
773
774static void
775mxge_dummy_rdma(mxge_softc_t *sc, int enable)
776{
777 char buf_bytes[72];
778 volatile uint32_t *confirm;
779 volatile char *submit;
780 uint32_t *buf, dma_low, dma_high;
781 int i;
782
783 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
784
785 /* clear confirmation addr */
786 confirm = (volatile uint32_t *)sc->cmd;
787 *confirm = 0;
788 wmb();
789
790 /* send an rdma command to the PCIe engine, and wait for the
791 response in the confirmation address. The firmware should
792 write a -1 there to indicate it is alive and well
793 */
794
795 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
796 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
797 buf[0] = htobe32(dma_high); /* confirm addr MSW */
798 buf[1] = htobe32(dma_low); /* confirm addr LSW */
799 buf[2] = htobe32(0xffffffff); /* confirm data */
800 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
802 buf[3] = htobe32(dma_high); /* dummy addr MSW */
803 buf[4] = htobe32(dma_low); /* dummy addr LSW */
804 buf[5] = htobe32(enable); /* enable? */
805
806
807 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
808
809 mxge_pio_copy(submit, buf, 64);
810 wmb();
811 DELAY(1000);
812 wmb();
813 i = 0;
814 while (*confirm != 0xffffffff && i < 20) {
815 DELAY(1000);
816 i++;
817 }
818 if (*confirm != 0xffffffff) {
819 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
820 (enable ? "enable" : "disable"), confirm,
821 *confirm);
822 }
823 return;
824}
825
826static int
827mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
828{
829 mcp_cmd_t *buf;
830 char buf_bytes[sizeof(*buf) + 8];
831 volatile mcp_cmd_response_t *response = sc->cmd;
832 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
833 uint32_t dma_low, dma_high;
834 int err, sleep_total = 0;
835
836 /* ensure buf is aligned to 8 bytes */
837 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
838
839 buf->data0 = htobe32(data->data0);
840 buf->data1 = htobe32(data->data1);
841 buf->data2 = htobe32(data->data2);
842 buf->cmd = htobe32(cmd);
843 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
844 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
845
846 buf->response_addr.low = htobe32(dma_low);
847 buf->response_addr.high = htobe32(dma_high);
e8a47a7f 848 lockmgr(&sc->cmd_lock, LK_EXCLUSIVE);
8892ea20
AE
849 response->result = 0xffffffff;
850 wmb();
851 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
852
853 /* wait up to 20ms */
854 err = EAGAIN;
855 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
856 bus_dmamap_sync(sc->cmd_dma.dmat,
857 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
858 wmb();
859 switch (be32toh(response->result)) {
860 case 0:
861 data->data0 = be32toh(response->data);
862 err = 0;
863 break;
864 case 0xffffffff:
865 DELAY(1000);
866 break;
867 case MXGEFW_CMD_UNKNOWN:
868 err = ENOSYS;
869 break;
870 case MXGEFW_CMD_ERROR_UNALIGNED:
871 err = E2BIG;
872 break;
873 case MXGEFW_CMD_ERROR_BUSY:
874 err = EBUSY;
875 break;
876 default:
877 device_printf(sc->dev,
878 "mxge: command %d "
879 "failed, result = %d\n",
880 cmd, be32toh(response->result));
881 err = ENXIO;
882 break;
883 }
884 if (err != EAGAIN)
885 break;
886 }
887 if (err == EAGAIN)
888 device_printf(sc->dev, "mxge: command %d timed out"
889 "result = %d\n",
890 cmd, be32toh(response->result));
e8a47a7f 891 lockmgr(&sc->cmd_lock, LK_RELEASE);
8892ea20
AE
892 return err;
893}
894
895static int
896mxge_adopt_running_firmware(mxge_softc_t *sc)
897{
898 struct mcp_gen_header *hdr;
899 const size_t bytes = sizeof (struct mcp_gen_header);
900 size_t hdr_offset;
901 int status;
902
903 /* find running firmware header */
904 hdr_offset = htobe32(*(volatile uint32_t *)
905 (sc->sram + MCP_HEADER_PTR_OFFSET));
906
907 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
908 device_printf(sc->dev,
909 "Running firmware has bad header offset (%d)\n",
910 (int)hdr_offset);
911 return EIO;
912 }
913
914 /* copy header of running firmware from SRAM to host memory to
915 * validate firmware */
d777b84f 916 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
8892ea20 917 if (hdr == NULL) {
d777b84f 918 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
8892ea20
AE
919 return ENOMEM;
920 }
921 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
922 rman_get_bushandle(sc->mem_res),
923 hdr_offset, (char *)hdr, bytes);
924 status = mxge_validate_firmware(sc, hdr);
d777b84f 925 kfree(hdr, M_DEVBUF);
8892ea20
AE
926
927 /*
928 * check to see if adopted firmware has bug where adopting
929 * it will cause broadcasts to be filtered unless the NIC
930 * is kept in ALLMULTI mode
931 */
932 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
933 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
934 sc->adopted_rx_filter_bug = 1;
935 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
936 "working around rx filter bug\n",
937 sc->fw_ver_major, sc->fw_ver_minor,
938 sc->fw_ver_tiny);
939 }
940
941 return status;
942}
943
944
945static int
946mxge_load_firmware(mxge_softc_t *sc, int adopt)
947{
948 volatile uint32_t *confirm;
949 volatile char *submit;
950 char buf_bytes[72];
951 uint32_t *buf, size, dma_low, dma_high;
952 int status, i;
953
954 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
955
956 size = sc->sram_size;
957 status = mxge_load_firmware_helper(sc, &size);
958 if (status) {
959 if (!adopt)
960 return status;
961 /* Try to use the currently running firmware, if
962 it is new enough */
963 status = mxge_adopt_running_firmware(sc);
964 if (status) {
965 device_printf(sc->dev,
966 "failed to adopt running firmware\n");
967 return status;
968 }
969 device_printf(sc->dev,
970 "Successfully adopted running firmware\n");
971 if (sc->tx_boundary == 4096) {
972 device_printf(sc->dev,
973 "Using firmware currently running on NIC"
974 ". For optimal\n");
975 device_printf(sc->dev,
976 "performance consider loading optimized "
977 "firmware\n");
978 }
979 sc->fw_name = mxge_fw_unaligned;
980 sc->tx_boundary = 2048;
981 return 0;
982 }
983 /* clear confirmation addr */
984 confirm = (volatile uint32_t *)sc->cmd;
985 *confirm = 0;
986 wmb();
987 /* send a reload command to the bootstrap MCP, and wait for the
988 response in the confirmation address. The firmware should
989 write a -1 there to indicate it is alive and well
990 */
991
992 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
993 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
994
995 buf[0] = htobe32(dma_high); /* confirm addr MSW */
996 buf[1] = htobe32(dma_low); /* confirm addr LSW */
997 buf[2] = htobe32(0xffffffff); /* confirm data */
998
999 /* FIX: All newest firmware should un-protect the bottom of
1000 the sram before handoff. However, the very first interfaces
1001 do not. Therefore the handoff copy must skip the first 8 bytes
1002 */
1003 /* where the code starts*/
1004 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1005 buf[4] = htobe32(size - 8); /* length of code */
1006 buf[5] = htobe32(8); /* where to copy to */
1007 buf[6] = htobe32(0); /* where to jump to */
1008
1009 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1010 mxge_pio_copy(submit, buf, 64);
1011 wmb();
1012 DELAY(1000);
1013 wmb();
1014 i = 0;
1015 while (*confirm != 0xffffffff && i < 20) {
1016 DELAY(1000*10);
1017 i++;
1018 bus_dmamap_sync(sc->cmd_dma.dmat,
1019 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1020 }
1021 if (*confirm != 0xffffffff) {
1022 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1023 confirm, *confirm);
1024
1025 return ENXIO;
1026 }
1027 return 0;
1028}
1029
1030static int
1031mxge_update_mac_address(mxge_softc_t *sc)
1032{
1033 mxge_cmd_t cmd;
1034 uint8_t *addr = sc->mac_addr;
1035 int status;
1036
1037
1038 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1039 | (addr[2] << 8) | addr[3]);
1040
1041 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1042
1043 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1044 return status;
1045}
1046
1047static int
1048mxge_change_pause(mxge_softc_t *sc, int pause)
1049{
1050 mxge_cmd_t cmd;
1051 int status;
1052
1053 if (pause)
1054 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1055 &cmd);
1056 else
1057 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1058 &cmd);
1059
1060 if (status) {
1061 device_printf(sc->dev, "Failed to set flow control mode\n");
1062 return ENXIO;
1063 }
1064 sc->pause = pause;
1065 return 0;
1066}
1067
1068static void
1069mxge_change_promisc(mxge_softc_t *sc, int promisc)
1070{
1071 mxge_cmd_t cmd;
1072 int status;
1073
1074 if (mxge_always_promisc)
1075 promisc = 1;
1076
1077 if (promisc)
1078 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1079 &cmd);
1080 else
1081 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1082 &cmd);
1083
1084 if (status) {
1085 device_printf(sc->dev, "Failed to set promisc mode\n");
1086 }
1087}
1088
1089static void
1090mxge_set_multicast_list(mxge_softc_t *sc)
1091{
1092 mxge_cmd_t cmd;
1093 struct ifmultiaddr *ifma;
1094 struct ifnet *ifp = sc->ifp;
1095 int err;
1096
1097 /* This firmware is known to not support multicast */
1098 if (!sc->fw_multicast_support)
1099 return;
1100
1101 /* Disable multicast filtering while we play with the lists*/
1102 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1103 if (err != 0) {
1104 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1105 " error status: %d\n", err);
1106 return;
1107 }
1108
1109 if (sc->adopted_rx_filter_bug)
1110 return;
1111
1112 if (ifp->if_flags & IFF_ALLMULTI)
1113 /* request to disable multicast filtering, so quit here */
1114 return;
1115
1116 /* Flush all the filters */
1117
1118 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1119 if (err != 0) {
1120 device_printf(sc->dev,
1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1122 ", error status: %d\n", err);
1123 return;
1124 }
1125
1126 /* Walk the multicast list, and add each address */
1127
1128 if_maddr_rlock(ifp);
1129 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1130 if (ifma->ifma_addr->sa_family != AF_LINK)
1131 continue;
1132 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1133 &cmd.data0, 4);
1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1135 &cmd.data1, 2);
1136 cmd.data0 = htonl(cmd.data0);
1137 cmd.data1 = htonl(cmd.data1);
1138 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1139 if (err != 0) {
1140 device_printf(sc->dev, "Failed "
1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1142 "%d\t", err);
1143 /* abort, leaving multicast filtering off */
1144 if_maddr_runlock(ifp);
1145 return;
1146 }
1147 }
1148 if_maddr_runlock(ifp);
1149 /* Enable multicast filtering */
1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1151 if (err != 0) {
1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1153 ", error status: %d\n", err);
1154 }
1155}
1156
1157static int
1158mxge_max_mtu(mxge_softc_t *sc)
1159{
1160 mxge_cmd_t cmd;
1161 int status;
1162
1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1164 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1165
1166 /* try to set nbufs to see if it we can
1167 use virtually contiguous jumbos */
1168 cmd.data0 = 0;
1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1170 &cmd);
1171 if (status == 0)
1172 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1173
1174 /* otherwise, we're limited to MJUMPAGESIZE */
1175 return MJUMPAGESIZE - MXGEFW_PAD;
1176}
1177
1178static int
1179mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1180{
1181 struct mxge_slice_state *ss;
1182 mxge_rx_done_t *rx_done;
1183 volatile uint32_t *irq_claim;
1184 mxge_cmd_t cmd;
1185 int slice, status;
1186
1187 /* try to send a reset command to the card to see if it
1188 is alive */
1189 memset(&cmd, 0, sizeof (cmd));
1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1191 if (status != 0) {
1192 device_printf(sc->dev, "failed reset\n");
1193 return ENXIO;
1194 }
1195
1196 mxge_dummy_rdma(sc, 1);
1197
1198
1199 /* set the intrq size */
1200 cmd.data0 = sc->rx_ring_size;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1202
1203 /*
1204 * Even though we already know how many slices are supported
1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1206 * has magic side effects, and must be called after a reset.
1207 * It must be called prior to calling any RSS related cmds,
1208 * including assigning an interrupt queue for anything but
1209 * slice 0. It must also be called *after*
1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1211 * the firmware to compute offsets.
1212 */
1213
1214 if (sc->num_slices > 1) {
1215 /* ask the maximum number of slices it supports */
1216 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1217 &cmd);
1218 if (status != 0) {
1219 device_printf(sc->dev,
1220 "failed to get number of slices\n");
1221 return status;
1222 }
1223 /*
1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1225 * to setting up the interrupt queue DMA
1226 */
1227 cmd.data0 = sc->num_slices;
1228 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1229#ifdef IFNET_BUF_RING
1230 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1231#endif
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1233 &cmd);
1234 if (status != 0) {
1235 device_printf(sc->dev,
1236 "failed to set number of slices\n");
1237 return status;
1238 }
1239 }
1240
1241
1242 if (interrupts_setup) {
1243 /* Now exchange information about interrupts */
1244 for (slice = 0; slice < sc->num_slices; slice++) {
1245 rx_done = &sc->ss[slice].rx_done;
1246 memset(rx_done->entry, 0, sc->rx_ring_size);
1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data2 = slice;
1250 status |= mxge_send_cmd(sc,
1251 MXGEFW_CMD_SET_INTRQ_DMA,
1252 &cmd);
1253 }
1254 }
1255
1256 status |= mxge_send_cmd(sc,
1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1258
1259
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1261
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1264
1265
1266 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1267 &cmd);
1268 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1269 if (status != 0) {
1270 device_printf(sc->dev, "failed set interrupt parameters\n");
1271 return status;
1272 }
1273
1274
1275 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1276
1277
1278 /* run a DMA benchmark */
1279 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1280
1281 for (slice = 0; slice < sc->num_slices; slice++) {
1282 ss = &sc->ss[slice];
1283
1284 ss->irq_claim = irq_claim + (2 * slice);
1285 /* reset mcp/driver shared state back to 0 */
1286 ss->rx_done.idx = 0;
1287 ss->rx_done.cnt = 0;
1288 ss->tx.req = 0;
1289 ss->tx.done = 0;
1290 ss->tx.pkt_done = 0;
1291 ss->tx.queue_active = 0;
1292 ss->tx.activate = 0;
1293 ss->tx.deactivate = 0;
1294 ss->tx.wake = 0;
1295 ss->tx.defrag = 0;
1296 ss->tx.stall = 0;
1297 ss->rx_big.cnt = 0;
1298 ss->rx_small.cnt = 0;
1299 ss->lro_bad_csum = 0;
1300 ss->lro_queued = 0;
1301 ss->lro_flushed = 0;
1302 if (ss->fw_stats != NULL) {
1303 ss->fw_stats->valid = 0;
1304 ss->fw_stats->send_done_count = 0;
1305 }
1306 }
1307 sc->rdma_tags_available = 15;
1308 status = mxge_update_mac_address(sc);
1309 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1310 mxge_change_pause(sc, sc->pause);
1311 mxge_set_multicast_list(sc);
1312 return status;
1313}
1314
1315static int
1316mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1317{
1318 mxge_softc_t *sc;
1319 unsigned int intr_coal_delay;
1320 int err;
1321
1322 sc = arg1;
1323 intr_coal_delay = sc->intr_coal_delay;
1324 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1325 if (err != 0) {
1326 return err;
1327 }
1328 if (intr_coal_delay == sc->intr_coal_delay)
1329 return 0;
1330
1331 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1332 return EINVAL;
1333
e8a47a7f 1334 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1336 sc->intr_coal_delay = intr_coal_delay;
1337
e8a47a7f 1338 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1339 return err;
1340}
1341
1342static int
1343mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1344{
1345 mxge_softc_t *sc;
1346 unsigned int enabled;
1347 int err;
1348
1349 sc = arg1;
1350 enabled = sc->pause;
1351 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1352 if (err != 0) {
1353 return err;
1354 }
1355 if (enabled == sc->pause)
1356 return 0;
1357
e8a47a7f 1358 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 1359 err = mxge_change_pause(sc, enabled);
e8a47a7f 1360 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1361 return err;
1362}
1363
1364static int
1365mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1366{
1367 struct ifnet *ifp;
1368 int err = 0;
1369
1370 ifp = sc->ifp;
1371 if (lro_cnt == 0)
1372 ifp->if_capenable &= ~IFCAP_LRO;
1373 else
1374 ifp->if_capenable |= IFCAP_LRO;
1375 sc->lro_cnt = lro_cnt;
1376 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1377 mxge_close(sc);
1378 err = mxge_open(sc);
1379 }
1380 return err;
1381}
1382
1383static int
1384mxge_change_lro(SYSCTL_HANDLER_ARGS)
1385{
1386 mxge_softc_t *sc;
1387 unsigned int lro_cnt;
1388 int err;
1389
1390 sc = arg1;
1391 lro_cnt = sc->lro_cnt;
1392 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1393 if (err != 0)
1394 return err;
1395
1396 if (lro_cnt == sc->lro_cnt)
1397 return 0;
1398
1399 if (lro_cnt > 128)
1400 return EINVAL;
1401
e8a47a7f 1402 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 1403 err = mxge_change_lro_locked(sc, lro_cnt);
e8a47a7f 1404 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1405 return err;
1406}
1407
1408static int
1409mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1410{
1411 int err;
1412
1413 if (arg1 == NULL)
1414 return EFAULT;
1415 arg2 = be32toh(*(int *)arg1);
1416 arg1 = NULL;
1417 err = sysctl_handle_int(oidp, arg1, arg2, req);
1418
1419 return err;
1420}
1421
1422static void
1423mxge_rem_sysctls(mxge_softc_t *sc)
1424{
1425 struct mxge_slice_state *ss;
1426 int slice;
1427
1428 if (sc->slice_sysctl_tree == NULL)
1429 return;
1430
1431 for (slice = 0; slice < sc->num_slices; slice++) {
1432 ss = &sc->ss[slice];
1433 if (ss == NULL || ss->sysctl_tree == NULL)
1434 continue;
1435 sysctl_ctx_free(&ss->sysctl_ctx);
1436 ss->sysctl_tree = NULL;
1437 }
1438 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1439 sc->slice_sysctl_tree = NULL;
1440}
1441
1442static void
1443mxge_add_sysctls(mxge_softc_t *sc)
1444{
1445 struct sysctl_ctx_list *ctx;
1446 struct sysctl_oid_list *children;
1447 mcp_irq_data_t *fw;
1448 struct mxge_slice_state *ss;
1449 int slice;
1450 char slice_num[8];
1451
b6737651
AE
1452 ctx = &sc->sysctl_ctx;
1453 sysctl_ctx_init(ctx);
1454 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1455 OID_AUTO,
1456 device_get_nameunit(sc->dev),
1457 CTLFLAG_RD, 0, "");
1458 if (sc->sysctl_tree == NULL) {
1459 device_printf(sc->dev, "can't add sysctl node\n");
1460 return;
1461 }
1462
1463 children = SYSCTL_CHILDREN(sc->sysctl_tree);
8892ea20
AE
1464 fw = sc->ss[0].fw_stats;
1465
1466 /* random information */
1467 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1468 "firmware_version",
1469 CTLFLAG_RD, &sc->fw_version,
1470 0, "firmware version");
1471 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1472 "serial_number",
1473 CTLFLAG_RD, &sc->serial_number_string,
1474 0, "serial number");
1475 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1476 "product_code",
1477 CTLFLAG_RD, &sc->product_code_string,
1478 0, "product_code");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 "pcie_link_width",
1481 CTLFLAG_RD, &sc->link_width,
1482 0, "tx_boundary");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 "tx_boundary",
1485 CTLFLAG_RD, &sc->tx_boundary,
1486 0, "tx_boundary");
1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 "write_combine",
1489 CTLFLAG_RD, &sc->wc,
1490 0, "write combining PIO?");
1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 "read_dma_MBs",
1493 CTLFLAG_RD, &sc->read_dma,
1494 0, "DMA Read speed in MB/s");
1495 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 "write_dma_MBs",
1497 CTLFLAG_RD, &sc->write_dma,
1498 0, "DMA Write speed in MB/s");
1499 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1500 "read_write_dma_MBs",
1501 CTLFLAG_RD, &sc->read_write_dma,
1502 0, "DMA concurrent Read/Write speed in MB/s");
1503
1504
1505 /* performance related tunables */
1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1507 "intr_coal_delay",
1508 CTLTYPE_INT|CTLFLAG_RW, sc,
1509 0, mxge_change_intr_coal,
1510 "I", "interrupt coalescing delay in usecs");
1511
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 "flow_control_enabled",
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_flow_control,
1516 "I", "interrupt coalescing delay in usecs");
1517
1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 "deassert_wait",
1520 CTLFLAG_RW, &mxge_deassert_wait,
1521 0, "Wait for IRQ line to go low in ihandler");
1522
1523 /* stats block from firmware is in network byte order.
1524 Need to swap it */
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "link_up",
1527 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1528 0, mxge_handle_be32,
1529 "I", "link up");
1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1531 "rdma_tags_available",
1532 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1533 0, mxge_handle_be32,
1534 "I", "rdma_tags_available");
1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 "dropped_bad_crc32",
1537 CTLTYPE_INT|CTLFLAG_RD,
1538 &fw->dropped_bad_crc32,
1539 0, mxge_handle_be32,
1540 "I", "dropped_bad_crc32");
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "dropped_bad_phy",
1543 CTLTYPE_INT|CTLFLAG_RD,
1544 &fw->dropped_bad_phy,
1545 0, mxge_handle_be32,
1546 "I", "dropped_bad_phy");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_link_error_or_filtered",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_link_error_or_filtered,
1551 0, mxge_handle_be32,
1552 "I", "dropped_link_error_or_filtered");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "dropped_link_overflow",
1555 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1556 0, mxge_handle_be32,
1557 "I", "dropped_link_overflow");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_multicast_filtered",
1560 CTLTYPE_INT|CTLFLAG_RD,
1561 &fw->dropped_multicast_filtered,
1562 0, mxge_handle_be32,
1563 "I", "dropped_multicast_filtered");
1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1565 "dropped_no_big_buffer",
1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1567 0, mxge_handle_be32,
1568 "I", "dropped_no_big_buffer");
1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1570 "dropped_no_small_buffer",
1571 CTLTYPE_INT|CTLFLAG_RD,
1572 &fw->dropped_no_small_buffer,
1573 0, mxge_handle_be32,
1574 "I", "dropped_no_small_buffer");
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_overrun",
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1578 0, mxge_handle_be32,
1579 "I", "dropped_overrun");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_pause",
1582 CTLTYPE_INT|CTLFLAG_RD,
1583 &fw->dropped_pause,
1584 0, mxge_handle_be32,
1585 "I", "dropped_pause");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1587 "dropped_runt",
1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1589 0, mxge_handle_be32,
1590 "I", "dropped_runt");
1591
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1593 "dropped_unicast_filtered",
1594 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1595 0, mxge_handle_be32,
1596 "I", "dropped_unicast_filtered");
1597
1598 /* verbose printing? */
1599 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1600 "verbose",
1601 CTLFLAG_RW, &mxge_verbose,
1602 0, "verbose printing");
1603
1604 /* lro */
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "lro_cnt",
1607 CTLTYPE_INT|CTLFLAG_RW, sc,
1608 0, mxge_change_lro,
1609 "I", "number of lro merge queues");
1610
1611
1612 /* add counters exported for debugging from all slices */
1613 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1614 sc->slice_sysctl_tree =
1615 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1616 "slice", CTLFLAG_RD, 0, "");
1617
1618 for (slice = 0; slice < sc->num_slices; slice++) {
1619 ss = &sc->ss[slice];
1620 sysctl_ctx_init(&ss->sysctl_ctx);
1621 ctx = &ss->sysctl_ctx;
1622 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
b6737651 1623 ksprintf(slice_num, "%d", slice);
8892ea20
AE
1624 ss->sysctl_tree =
1625 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1626 CTLFLAG_RD, 0, "");
1627 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1628 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1629 "rx_small_cnt",
1630 CTLFLAG_RD, &ss->rx_small.cnt,
1631 0, "rx_small_cnt");
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1633 "rx_big_cnt",
1634 CTLFLAG_RD, &ss->rx_big.cnt,
1635 0, "rx_small_cnt");
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1637 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1638 0, "number of lro merge queues flushed");
1639
1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1641 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1642 0, "number of frames appended to lro merge"
1643 "queues");
1644
1645#ifndef IFNET_BUF_RING
1646 /* only transmit from slice 0 for now */
1647 if (slice > 0)
1648 continue;
1649#endif
1650 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 "tx_req",
1652 CTLFLAG_RD, &ss->tx.req,
1653 0, "tx_req");
1654
1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1656 "tx_done",
1657 CTLFLAG_RD, &ss->tx.done,
1658 0, "tx_done");
1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 "tx_pkt_done",
1661 CTLFLAG_RD, &ss->tx.pkt_done,
1662 0, "tx_done");
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "tx_stall",
1665 CTLFLAG_RD, &ss->tx.stall,
1666 0, "tx_stall");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "tx_wake",
1669 CTLFLAG_RD, &ss->tx.wake,
1670 0, "tx_wake");
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 "tx_defrag",
1673 CTLFLAG_RD, &ss->tx.defrag,
1674 0, "tx_defrag");
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "tx_queue_active",
1677 CTLFLAG_RD, &ss->tx.queue_active,
1678 0, "tx_queue_active");
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 "tx_activate",
1681 CTLFLAG_RD, &ss->tx.activate,
1682 0, "tx_activate");
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 "tx_deactivate",
1685 CTLFLAG_RD, &ss->tx.deactivate,
1686 0, "tx_deactivate");
1687 }
1688}
1689
1690/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1691 backwards one at a time and handle ring wraps */
1692
1693static inline void
1694mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1695 mcp_kreq_ether_send_t *src, int cnt)
1696{
1697 int idx, starting_slot;
1698 starting_slot = tx->req;
1699 while (cnt > 1) {
1700 cnt--;
1701 idx = (starting_slot + cnt) & tx->mask;
1702 mxge_pio_copy(&tx->lanai[idx],
1703 &src[cnt], sizeof(*src));
1704 wmb();
1705 }
1706}
1707
1708/*
1709 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1710 * at most 32 bytes at a time, so as to avoid involving the software
1711 * pio handler in the nic. We re-write the first segment's flags
1712 * to mark them valid only after writing the entire chain
1713 */
1714
1715static inline void
1716mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1717 int cnt)
1718{
1719 int idx, i;
1720 uint32_t *src_ints;
1721 volatile uint32_t *dst_ints;
1722 mcp_kreq_ether_send_t *srcp;
1723 volatile mcp_kreq_ether_send_t *dstp, *dst;
1724 uint8_t last_flags;
1725
1726 idx = tx->req & tx->mask;
1727
1728 last_flags = src->flags;
1729 src->flags = 0;
1730 wmb();
1731 dst = dstp = &tx->lanai[idx];
1732 srcp = src;
1733
1734 if ((idx + cnt) < tx->mask) {
1735 for (i = 0; i < (cnt - 1); i += 2) {
1736 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1737 wmb(); /* force write every 32 bytes */
1738 srcp += 2;
1739 dstp += 2;
1740 }
1741 } else {
1742 /* submit all but the first request, and ensure
1743 that it is submitted below */
1744 mxge_submit_req_backwards(tx, src, cnt);
1745 i = 0;
1746 }
1747 if (i < cnt) {
1748 /* submit the first request */
1749 mxge_pio_copy(dstp, srcp, sizeof(*src));
1750 wmb(); /* barrier before setting valid flag */
1751 }
1752
1753 /* re-write the last 32-bits with the valid flags */
1754 src->flags = last_flags;
1755 src_ints = (uint32_t *)src;
1756 src_ints+=3;
1757 dst_ints = (volatile uint32_t *)dst;
1758 dst_ints+=3;
1759 *dst_ints = *src_ints;
1760 tx->req += cnt;
1761 wmb();
1762}
1763
1764#if IFCAP_TSO4
1765
1766static void
1767mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1768 int busdma_seg_cnt, int ip_off)
1769{
1770 mxge_tx_ring_t *tx;
1771 mcp_kreq_ether_send_t *req;
1772 bus_dma_segment_t *seg;
1773 struct ip *ip;
1774 struct tcphdr *tcp;
1775 uint32_t low, high_swapped;
1776 int len, seglen, cum_len, cum_len_next;
1777 int next_is_first, chop, cnt, rdma_count, small;
1778 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1779 uint8_t flags, flags_next;
1780 static int once;
1781
1782 mss = m->m_pkthdr.tso_segsz;
1783
1784 /* negative cum_len signifies to the
1785 * send loop that we are still in the
1786 * header portion of the TSO packet.
1787 */
1788
1789 /* ensure we have the ethernet, IP and TCP
1790 header together in the first mbuf, copy
1791 it to a scratch buffer if not */
1792 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1793 m_copydata(m, 0, ip_off + sizeof (*ip),
1794 ss->scratch);
1795 ip = (struct ip *)(ss->scratch + ip_off);
1796 } else {
1797 ip = (struct ip *)(mtod(m, char *) + ip_off);
1798 }
1799 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1800 + sizeof (*tcp))) {
1801 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1802 + sizeof (*tcp), ss->scratch);
1803 ip = (struct ip *)(mtod(m, char *) + ip_off);
1804 }
1805
1806 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1807 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1808
1809 /* TSO implies checksum offload on this hardware */
1810 cksum_offset = ip_off + (ip->ip_hl << 2);
1811 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1812
1813
1814 /* for TSO, pseudo_hdr_offset holds mss.
1815 * The firmware figures out where to put
1816 * the checksum by parsing the header. */
1817 pseudo_hdr_offset = htobe16(mss);
1818
1819 tx = &ss->tx;
1820 req = tx->req_list;
1821 seg = tx->seg_list;
1822 cnt = 0;
1823 rdma_count = 0;
1824 /* "rdma_count" is the number of RDMAs belonging to the
1825 * current packet BEFORE the current send request. For
1826 * non-TSO packets, this is equal to "count".
1827 * For TSO packets, rdma_count needs to be reset
1828 * to 0 after a segment cut.
1829 *
1830 * The rdma_count field of the send request is
1831 * the number of RDMAs of the packet starting at
1832 * that request. For TSO send requests with one ore more cuts
1833 * in the middle, this is the number of RDMAs starting
1834 * after the last cut in the request. All previous
1835 * segments before the last cut implicitly have 1 RDMA.
1836 *
1837 * Since the number of RDMAs is not known beforehand,
1838 * it must be filled-in retroactively - after each
1839 * segmentation cut or at the end of the entire packet.
1840 */
1841
1842 while (busdma_seg_cnt) {
1843 /* Break the busdma segment up into pieces*/
1844 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1845 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1846 len = seg->ds_len;
1847
1848 while (len) {
1849 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1850 seglen = len;
1851 cum_len_next = cum_len + seglen;
1852 (req-rdma_count)->rdma_count = rdma_count + 1;
1853 if (__predict_true(cum_len >= 0)) {
1854 /* payload */
1855 chop = (cum_len_next > mss);
1856 cum_len_next = cum_len_next % mss;
1857 next_is_first = (cum_len_next == 0);
1858 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1859 flags_next |= next_is_first *
1860 MXGEFW_FLAGS_FIRST;
1861 rdma_count |= -(chop | next_is_first);
1862 rdma_count += chop & !next_is_first;
1863 } else if (cum_len_next >= 0) {
1864 /* header ends */
1865 rdma_count = -1;
1866 cum_len_next = 0;
1867 seglen = -cum_len;
1868 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1869 flags_next = MXGEFW_FLAGS_TSO_PLD |
1870 MXGEFW_FLAGS_FIRST |
1871 (small * MXGEFW_FLAGS_SMALL);
1872 }
1873
1874 req->addr_high = high_swapped;
1875 req->addr_low = htobe32(low);
1876 req->pseudo_hdr_offset = pseudo_hdr_offset;
1877 req->pad = 0;
1878 req->rdma_count = 1;
1879 req->length = htobe16(seglen);
1880 req->cksum_offset = cksum_offset;
1881 req->flags = flags | ((cum_len & 1) *
1882 MXGEFW_FLAGS_ALIGN_ODD);
1883 low += seglen;
1884 len -= seglen;
1885 cum_len = cum_len_next;
1886 flags = flags_next;
1887 req++;
1888 cnt++;
1889 rdma_count++;
1890 if (__predict_false(cksum_offset > seglen))
1891 cksum_offset -= seglen;
1892 else
1893 cksum_offset = 0;
1894 if (__predict_false(cnt > tx->max_desc))
1895 goto drop;
1896 }
1897 busdma_seg_cnt--;
1898 seg++;
1899 }
1900 (req-rdma_count)->rdma_count = rdma_count;
1901
1902 do {
1903 req--;
1904 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1905 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1906
1907 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1908 mxge_submit_req(tx, tx->req_list, cnt);
1909#ifdef IFNET_BUF_RING
1910 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1911 /* tell the NIC to start polling this slice */
1912 *tx->send_go = 1;
1913 tx->queue_active = 1;
1914 tx->activate++;
1915 wmb();
1916 }
1917#endif
1918 return;
1919
1920drop:
1921 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1922 m_freem(m);
1923 ss->oerrors++;
1924 if (!once) {
6c348da6
AE
1925 kprintf("tx->max_desc exceeded via TSO!\n");
1926 kprintf("mss = %d, %ld, %d!\n", mss,
8892ea20
AE
1927 (long)seg - (long)tx->seg_list, tx->max_desc);
1928 once = 1;
1929 }
1930 return;
1931
1932}
1933
1934#endif /* IFCAP_TSO4 */
1935
1936#ifdef MXGE_NEW_VLAN_API
1937/*
1938 * We reproduce the software vlan tag insertion from
1939 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1940 * vlan tag insertion. We need to advertise this in order to have the
1941 * vlan interface respect our csum offload flags.
1942 */
1943static struct mbuf *
1944mxge_vlan_tag_insert(struct mbuf *m)
1945{
1946 struct ether_vlan_header *evl;
1947
1db10a72 1948 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, MB_DONTWAIT);
8892ea20
AE
1949 if (__predict_false(m == NULL))
1950 return NULL;
1951 if (m->m_len < sizeof(*evl)) {
1952 m = m_pullup(m, sizeof(*evl));
1953 if (__predict_false(m == NULL))
1954 return NULL;
1955 }
1956 /*
1957 * Transform the Ethernet header into an Ethernet header
1958 * with 802.1Q encapsulation.
1959 */
1960 evl = mtod(m, struct ether_vlan_header *);
1961 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1962 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1963 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1964 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1965 m->m_flags &= ~M_VLANTAG;
1966 return m;
1967}
1968#endif /* MXGE_NEW_VLAN_API */
1969
1970static void
1971mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1972{
1973 mxge_softc_t *sc;
1974 mcp_kreq_ether_send_t *req;
1975 bus_dma_segment_t *seg;
1976 struct mbuf *m_tmp;
1977 struct ifnet *ifp;
1978 mxge_tx_ring_t *tx;
1979 struct ip *ip;
1980 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1981 uint16_t pseudo_hdr_offset;
1982 uint8_t flags, cksum_offset;
1983
1984
1985 sc = ss->sc;
1986 ifp = sc->ifp;
1987 tx = &ss->tx;
1988
1989 ip_off = sizeof (struct ether_header);
1990#ifdef MXGE_NEW_VLAN_API
1991 if (m->m_flags & M_VLANTAG) {
1992 m = mxge_vlan_tag_insert(m);
1993 if (__predict_false(m == NULL))
1994 goto drop;
1995 ip_off += ETHER_VLAN_ENCAP_LEN;
1996 }
1997#endif
1998 /* (try to) map the frame for DMA */
1999 idx = tx->req & tx->mask;
2000 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2001 m, tx->seg_list, &cnt,
2002 BUS_DMA_NOWAIT);
2003 if (__predict_false(err == EFBIG)) {
2004 /* Too many segments in the chain. Try
2005 to defrag */
2006 m_tmp = m_defrag(m, M_NOWAIT);
2007 if (m_tmp == NULL) {
2008 goto drop;
2009 }
2010 ss->tx.defrag++;
2011 m = m_tmp;
2012 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2013 tx->info[idx].map,
2014 m, tx->seg_list, &cnt,
2015 BUS_DMA_NOWAIT);
2016 }
2017 if (__predict_false(err != 0)) {
2018 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2019 " packet len = %d\n", err, m->m_pkthdr.len);
2020 goto drop;
2021 }
2022 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2023 BUS_DMASYNC_PREWRITE);
2024 tx->info[idx].m = m;
2025
2026#if IFCAP_TSO4
2027 /* TSO is different enough, we handle it in another routine */
2028 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2029 mxge_encap_tso(ss, m, cnt, ip_off);
2030 return;
2031 }
2032#endif
2033
2034 req = tx->req_list;
2035 cksum_offset = 0;
2036 pseudo_hdr_offset = 0;
2037 flags = MXGEFW_FLAGS_NO_TSO;
2038
2039 /* checksum offloading? */
2040 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2041 /* ensure ip header is in first mbuf, copy
2042 it to a scratch buffer if not */
2043 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2044 m_copydata(m, 0, ip_off + sizeof (*ip),
2045 ss->scratch);
2046 ip = (struct ip *)(ss->scratch + ip_off);
2047 } else {
2048 ip = (struct ip *)(mtod(m, char *) + ip_off);
2049 }
2050 cksum_offset = ip_off + (ip->ip_hl << 2);
2051 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2052 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2053 req->cksum_offset = cksum_offset;
2054 flags |= MXGEFW_FLAGS_CKSUM;
2055 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2056 } else {
2057 odd_flag = 0;
2058 }
2059 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2060 flags |= MXGEFW_FLAGS_SMALL;
2061
2062 /* convert segments into a request list */
2063 cum_len = 0;
2064 seg = tx->seg_list;
2065 req->flags = MXGEFW_FLAGS_FIRST;
2066 for (i = 0; i < cnt; i++) {
2067 req->addr_low =
2068 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2069 req->addr_high =
2070 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2071 req->length = htobe16(seg->ds_len);
2072 req->cksum_offset = cksum_offset;
2073 if (cksum_offset > seg->ds_len)
2074 cksum_offset -= seg->ds_len;
2075 else
2076 cksum_offset = 0;
2077 req->pseudo_hdr_offset = pseudo_hdr_offset;
2078 req->pad = 0; /* complete solid 16-byte block */
2079 req->rdma_count = 1;
2080 req->flags |= flags | ((cum_len & 1) * odd_flag);
2081 cum_len += seg->ds_len;
2082 seg++;
2083 req++;
2084 req->flags = 0;
2085 }
2086 req--;
2087 /* pad runts to 60 bytes */
2088 if (cum_len < 60) {
2089 req++;
2090 req->addr_low =
2091 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2092 req->addr_high =
2093 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2094 req->length = htobe16(60 - cum_len);
2095 req->cksum_offset = 0;
2096 req->pseudo_hdr_offset = pseudo_hdr_offset;
2097 req->pad = 0; /* complete solid 16-byte block */
2098 req->rdma_count = 1;
2099 req->flags |= flags | ((cum_len & 1) * odd_flag);
2100 cnt++;
2101 }
2102
2103 tx->req_list[0].rdma_count = cnt;
2104#if 0
2105 /* print what the firmware will see */
2106 for (i = 0; i < cnt; i++) {
6c348da6 2107 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
8892ea20
AE
2108 "cso:%d, flags:0x%x, rdma:%d\n",
2109 i, (int)ntohl(tx->req_list[i].addr_high),
2110 (int)ntohl(tx->req_list[i].addr_low),
2111 (int)ntohs(tx->req_list[i].length),
2112 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2113 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2114 tx->req_list[i].rdma_count);
2115 }
6c348da6 2116 kprintf("--------------\n");
8892ea20
AE
2117#endif
2118 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2119 mxge_submit_req(tx, tx->req_list, cnt);
2120#ifdef IFNET_BUF_RING
2121 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2122 /* tell the NIC to start polling this slice */
2123 *tx->send_go = 1;
2124 tx->queue_active = 1;
2125 tx->activate++;
2126 wmb();
2127 }
2128#endif
2129 return;
2130
2131drop:
2132 m_freem(m);
2133 ss->oerrors++;
2134 return;
2135}
2136
2137#ifdef IFNET_BUF_RING
2138static void
2139mxge_qflush(struct ifnet *ifp)
2140{
2141 mxge_softc_t *sc = ifp->if_softc;
2142 mxge_tx_ring_t *tx;
2143 struct mbuf *m;
2144 int slice;
2145
2146 for (slice = 0; slice < sc->num_slices; slice++) {
2147 tx = &sc->ss[slice].tx;
e8a47a7f 2148 lockmgr(&tx->lock, LK_EXCLUSIVE);
8892ea20
AE
2149 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2150 m_freem(m);
e8a47a7f 2151 lockmgr(&tx->lock, LK_RELEASE);
8892ea20
AE
2152 }
2153 if_qflush(ifp);
2154}
2155
2156static inline void
2157mxge_start_locked(struct mxge_slice_state *ss)
2158{
2159 mxge_softc_t *sc;
2160 struct mbuf *m;
2161 struct ifnet *ifp;
2162 mxge_tx_ring_t *tx;
2163
2164 sc = ss->sc;
2165 ifp = sc->ifp;
2166 tx = &ss->tx;
2167
2168 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2169 m = drbr_dequeue(ifp, tx->br);
2170 if (m == NULL) {
2171 return;
2172 }
2173 /* let BPF see it */
2174 BPF_MTAP(ifp, m);
2175
2176 /* give it to the nic */
2177 mxge_encap(ss, m);
2178 }
2179 /* ran out of transmit slots */
2180 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2181 && (!drbr_empty(ifp, tx->br))) {
2182 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2183 tx->stall++;
2184 }
2185}
2186
2187static int
2188mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2189{
2190 mxge_softc_t *sc;
2191 struct ifnet *ifp;
2192 mxge_tx_ring_t *tx;
2193 int err;
2194
2195 sc = ss->sc;
2196 ifp = sc->ifp;
2197 tx = &ss->tx;
2198
2199 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2200 IFF_DRV_RUNNING) {
2201 err = drbr_enqueue(ifp, tx->br, m);
2202 return (err);
2203 }
2204
2205 if (drbr_empty(ifp, tx->br) &&
2206 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2207 /* let BPF see it */
2208 BPF_MTAP(ifp, m);
2209 /* give it to the nic */
2210 mxge_encap(ss, m);
2211 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2212 return (err);
2213 }
2214 if (!drbr_empty(ifp, tx->br))
2215 mxge_start_locked(ss);
2216 return (0);
2217}
2218
2219static int
2220mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2221{
2222 mxge_softc_t *sc = ifp->if_softc;
2223 struct mxge_slice_state *ss;
2224 mxge_tx_ring_t *tx;
2225 int err = 0;
2226 int slice;
2227
2228 slice = m->m_pkthdr.flowid;
2229 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2230
2231 ss = &sc->ss[slice];
2232 tx = &ss->tx;
2233
e8a47a7f 2234 if (lockmgr(&tx->lock, LK_EXCLUSIVE|LK_NOWAIT)) {
8892ea20 2235 err = mxge_transmit_locked(ss, m);
e8a47a7f 2236 lockmgr(&tx->lock, LK_RELEASE);
8892ea20
AE
2237 } else {
2238 err = drbr_enqueue(ifp, tx->br, m);
2239 }
2240
2241 return (err);
2242}
2243
2244#else
2245
2246static inline void
2247mxge_start_locked(struct mxge_slice_state *ss)
2248{
2249 mxge_softc_t *sc;
2250 struct mbuf *m;
2251 struct ifnet *ifp;
2252 mxge_tx_ring_t *tx;
2253
2254 sc = ss->sc;
2255 ifp = sc->ifp;
2256 tx = &ss->tx;
2257 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2258 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2259 if (m == NULL) {
2260 return;
2261 }
2262 /* let BPF see it */
2263 BPF_MTAP(ifp, m);
2264
2265 /* give it to the nic */
2266 mxge_encap(ss, m);
2267 }
2268 /* ran out of transmit slots */
2269 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2270 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2271 tx->stall++;
2272 }
2273}
2274#endif
2275static void
2276mxge_start(struct ifnet *ifp)
2277{
2278 mxge_softc_t *sc = ifp->if_softc;
2279 struct mxge_slice_state *ss;
2280
2281 /* only use the first slice for now */
2282 ss = &sc->ss[0];
e8a47a7f 2283 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
8892ea20 2284 mxge_start_locked(ss);
e8a47a7f 2285 lockmgr(&ss->tx.lock, LK_RELEASE);
8892ea20
AE
2286}
2287
2288/*
2289 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2290 * at most 32 bytes at a time, so as to avoid involving the software
2291 * pio handler in the nic. We re-write the first segment's low
2292 * DMA address to mark it valid only after we write the entire chunk
2293 * in a burst
2294 */
2295static inline void
2296mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2297 mcp_kreq_ether_recv_t *src)
2298{
2299 uint32_t low;
2300
2301 low = src->addr_low;
2302 src->addr_low = 0xffffffff;
2303 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2304 wmb();
2305 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2306 wmb();
2307 src->addr_low = low;
2308 dst->addr_low = low;
2309 wmb();
2310}
2311
2312static int
2313mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2314{
2315 bus_dma_segment_t seg;
2316 struct mbuf *m;
2317 mxge_rx_ring_t *rx = &ss->rx_small;
2318 int cnt, err;
2319
17eb0737 2320 m = m_gethdr(MB_DONTWAIT, MT_DATA);
8892ea20
AE
2321 if (m == NULL) {
2322 rx->alloc_fail++;
2323 err = ENOBUFS;
2324 goto done;
2325 }
2326 m->m_len = MHLEN;
2327 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2328 &seg, &cnt, BUS_DMA_NOWAIT);
2329 if (err != 0) {
2330 m_free(m);
2331 goto done;
2332 }
2333 rx->info[idx].m = m;
2334 rx->shadow[idx].addr_low =
2335 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2336 rx->shadow[idx].addr_high =
2337 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2338
2339done:
2340 if ((idx & 7) == 7)
2341 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2342 return err;
2343}
2344
2345static int
2346mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2347{
2348 bus_dma_segment_t seg[3];
2349 struct mbuf *m;
2350 mxge_rx_ring_t *rx = &ss->rx_big;
2351 int cnt, err, i;
2352
2353 if (rx->cl_size == MCLBYTES)
17eb0737 2354 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
8892ea20 2355 else
17eb0737 2356 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
8892ea20
AE
2357 if (m == NULL) {
2358 rx->alloc_fail++;
2359 err = ENOBUFS;
2360 goto done;
2361 }
2362 m->m_len = rx->mlen;
2363 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2364 seg, &cnt, BUS_DMA_NOWAIT);
2365 if (err != 0) {
2366 m_free(m);
2367 goto done;
2368 }
2369 rx->info[idx].m = m;
2370 rx->shadow[idx].addr_low =
2371 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2372 rx->shadow[idx].addr_high =
2373 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2374
2375#if MXGE_VIRT_JUMBOS
2376 for (i = 1; i < cnt; i++) {
2377 rx->shadow[idx + i].addr_low =
2378 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2379 rx->shadow[idx + i].addr_high =
2380 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2381 }
2382#endif
2383
2384done:
2385 for (i = 0; i < rx->nbufs; i++) {
2386 if ((idx & 7) == 7) {
2387 mxge_submit_8rx(&rx->lanai[idx - 7],
2388 &rx->shadow[idx - 7]);
2389 }
2390 idx++;
2391 }
2392 return err;
2393}
2394
2395/*
2396 * Myri10GE hardware checksums are not valid if the sender
2397 * padded the frame with non-zero padding. This is because
2398 * the firmware just does a simple 16-bit 1s complement
2399 * checksum across the entire frame, excluding the first 14
2400 * bytes. It is best to simply to check the checksum and
2401 * tell the stack about it only if the checksum is good
2402 */
2403
2404static inline uint16_t
2405mxge_rx_csum(struct mbuf *m, int csum)
2406{
2407 struct ether_header *eh;
2408 struct ip *ip;
2409 uint16_t c;
2410
2411 eh = mtod(m, struct ether_header *);
2412
2413 /* only deal with IPv4 TCP & UDP for now */
2414 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2415 return 1;
2416 ip = (struct ip *)(eh + 1);
2417 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2418 ip->ip_p != IPPROTO_UDP))
2419 return 1;
2420#ifdef INET
2421 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2422 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2423 - (ip->ip_hl << 2) + ip->ip_p));
2424#else
2425 c = 1;
2426#endif
2427 c ^= 0xffff;
2428 return (c);
2429}
2430
2431static void
2432mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2433{
2434 struct ether_vlan_header *evl;
2435 struct ether_header *eh;
2436 uint32_t partial;
2437
2438 evl = mtod(m, struct ether_vlan_header *);
2439 eh = mtod(m, struct ether_header *);
2440
2441 /*
2442 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2443 * after what the firmware thought was the end of the ethernet
2444 * header.
2445 */
2446
2447 /* put checksum into host byte order */
2448 *csum = ntohs(*csum);
2449 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2450 (*csum) += ~partial;
2451 (*csum) += ((*csum) < ~partial);
2452 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2453 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2454
2455 /* restore checksum to network byte order;
2456 later consumers expect this */
2457 *csum = htons(*csum);
2458
2459 /* save the tag */
2460#ifdef MXGE_NEW_VLAN_API
2461 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2462#else
2463 {
2464 struct m_tag *mtag;
2465 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2466 M_NOWAIT);
2467 if (mtag == NULL)
2468 return;
2469 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2470 m_tag_prepend(m, mtag);
2471 }
2472
2473#endif
2474 m->m_flags |= M_VLANTAG;
2475
2476 /*
2477 * Remove the 802.1q header by copying the Ethernet
2478 * addresses over it and adjusting the beginning of
2479 * the data in the mbuf. The encapsulated Ethernet
2480 * type field is already in place.
2481 */
2482 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2483 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2484 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2485}
2486
2487
2488static inline void
2489mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2490{
2491 mxge_softc_t *sc;
2492 struct ifnet *ifp;
2493 struct mbuf *m;
2494 struct ether_header *eh;
2495 mxge_rx_ring_t *rx;
2496 bus_dmamap_t old_map;
2497 int idx;
2498 uint16_t tcpudp_csum;
2499
2500 sc = ss->sc;
2501 ifp = sc->ifp;
2502 rx = &ss->rx_big;
2503 idx = rx->cnt & rx->mask;
2504 rx->cnt += rx->nbufs;
2505 /* save a pointer to the received mbuf */
2506 m = rx->info[idx].m;
2507 /* try to replace the received mbuf */
2508 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2509 /* drop the frame -- the old mbuf is re-cycled */
2510 ifp->if_ierrors++;
2511 return;
2512 }
2513
2514 /* unmap the received buffer */
2515 old_map = rx->info[idx].map;
2516 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2517 bus_dmamap_unload(rx->dmat, old_map);
2518
2519 /* swap the bus_dmamap_t's */
2520 rx->info[idx].map = rx->extra_map;
2521 rx->extra_map = old_map;
2522
2523 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2524 * aligned */
2525 m->m_data += MXGEFW_PAD;
2526
2527 m->m_pkthdr.rcvif = ifp;
2528 m->m_len = m->m_pkthdr.len = len;
2529 ss->ipackets++;
2530 eh = mtod(m, struct ether_header *);
2531 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2532 mxge_vlan_tag_remove(m, &csum);
2533 }
2534 /* if the checksum is valid, mark it in the mbuf header */
2535 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2536 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2537 return;
2538 /* otherwise, it was a UDP frame, or a TCP frame which
2539 we could not do LRO on. Tell the stack that the
2540 checksum is good */
2541 m->m_pkthdr.csum_data = 0xffff;
2542 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2543 }
2544 /* flowid only valid if RSS hashing is enabled */
2545 if (sc->num_slices > 1) {
2546 m->m_pkthdr.flowid = (ss - sc->ss);
2547 m->m_flags |= M_FLOWID;
2548 }
2549 /* pass the frame up the stack */
2550 (*ifp->if_input)(ifp, m);
2551}
2552
2553static inline void
2554mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2555{
2556 mxge_softc_t *sc;
2557 struct ifnet *ifp;
2558 struct ether_header *eh;
2559 struct mbuf *m;
2560 mxge_rx_ring_t *rx;
2561 bus_dmamap_t old_map;
2562 int idx;
2563 uint16_t tcpudp_csum;
2564
2565 sc = ss->sc;
2566 ifp = sc->ifp;
2567 rx = &ss->rx_small;
2568 idx = rx->cnt & rx->mask;
2569 rx->cnt++;
2570 /* save a pointer to the received mbuf */
2571 m = rx->info[idx].m;
2572 /* try to replace the received mbuf */
2573 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2574 /* drop the frame -- the old mbuf is re-cycled */
2575 ifp->if_ierrors++;
2576 return;
2577 }
2578
2579 /* unmap the received buffer */
2580 old_map = rx->info[idx].map;
2581 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2582 bus_dmamap_unload(rx->dmat, old_map);
2583
2584 /* swap the bus_dmamap_t's */
2585 rx->info[idx].map = rx->extra_map;
2586 rx->extra_map = old_map;
2587
2588 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2589 * aligned */
2590 m->m_data += MXGEFW_PAD;
2591
2592 m->m_pkthdr.rcvif = ifp;
2593 m->m_len = m->m_pkthdr.len = len;
2594 ss->ipackets++;
2595 eh = mtod(m, struct ether_header *);
2596 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2597 mxge_vlan_tag_remove(m, &csum);
2598 }
2599 /* if the checksum is valid, mark it in the mbuf header */
2600 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2601 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2602 return;
2603 /* otherwise, it was a UDP frame, or a TCP frame which
2604 we could not do LRO on. Tell the stack that the
2605 checksum is good */
2606 m->m_pkthdr.csum_data = 0xffff;
2607 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2608 }
2609 /* flowid only valid if RSS hashing is enabled */
2610 if (sc->num_slices > 1) {
2611 m->m_pkthdr.flowid = (ss - sc->ss);
2612 m->m_flags |= M_FLOWID;
2613 }
2614 /* pass the frame up the stack */
2615 (*ifp->if_input)(ifp, m);
2616}
2617
2618static inline void
2619mxge_clean_rx_done(struct mxge_slice_state *ss)
2620{
2621 mxge_rx_done_t *rx_done = &ss->rx_done;
2622 int limit = 0;
2623 uint16_t length;
2624 uint16_t checksum;
2625
2626
2627 while (rx_done->entry[rx_done->idx].length != 0) {
2628 length = ntohs(rx_done->entry[rx_done->idx].length);
2629 rx_done->entry[rx_done->idx].length = 0;
2630 checksum = rx_done->entry[rx_done->idx].checksum;
2631 if (length <= (MHLEN - MXGEFW_PAD))
2632 mxge_rx_done_small(ss, length, checksum);
2633 else
2634 mxge_rx_done_big(ss, length, checksum);
2635 rx_done->cnt++;
2636 rx_done->idx = rx_done->cnt & rx_done->mask;
2637
2638 /* limit potential for livelock */
2639 if (__predict_false(++limit > rx_done->mask / 2))
2640 break;
2641 }
2642#ifdef INET
2643 while (!SLIST_EMPTY(&ss->lro_active)) {
2644 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2645 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2646 mxge_lro_flush(ss, lro);
2647 }
2648#endif
2649}
2650
2651
2652static inline void
2653mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2654{
2655 struct ifnet *ifp;
2656 mxge_tx_ring_t *tx;
2657 struct mbuf *m;
2658 bus_dmamap_t map;
2659 int idx;
2660 int *flags;
2661
2662 tx = &ss->tx;
2663 ifp = ss->sc->ifp;
2664 while (tx->pkt_done != mcp_idx) {
2665 idx = tx->done & tx->mask;
2666 tx->done++;
2667 m = tx->info[idx].m;
2668 /* mbuf and DMA map only attached to the first
2669 segment per-mbuf */
2670 if (m != NULL) {
2671 ss->obytes += m->m_pkthdr.len;
2672 if (m->m_flags & M_MCAST)
2673 ss->omcasts++;
2674 ss->opackets++;
2675 tx->info[idx].m = NULL;
2676 map = tx->info[idx].map;
2677 bus_dmamap_unload(tx->dmat, map);
2678 m_freem(m);
2679 }
2680 if (tx->info[idx].flag) {
2681 tx->info[idx].flag = 0;
2682 tx->pkt_done++;
2683 }
2684 }
2685
2686 /* If we have space, clear IFF_OACTIVE to tell the stack that
2687 its OK to send packets */
2688#ifdef IFNET_BUF_RING
2689 flags = &ss->if_drv_flags;
2690#else
2691 flags = &ifp->if_drv_flags;
2692#endif
e8a47a7f 2693 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
8892ea20
AE
2694 if ((*flags) & IFF_DRV_OACTIVE &&
2695 tx->req - tx->done < (tx->mask + 1)/4) {
2696 *(flags) &= ~IFF_DRV_OACTIVE;
2697 ss->tx.wake++;
2698 mxge_start_locked(ss);
2699 }
2700#ifdef IFNET_BUF_RING
2701 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2702 /* let the NIC stop polling this queue, since there
2703 * are no more transmits pending */
2704 if (tx->req == tx->done) {
2705 *tx->send_stop = 1;
2706 tx->queue_active = 0;
2707 tx->deactivate++;
2708 wmb();
2709 }
2710 }
2711#endif
e8a47a7f 2712 lockmgr(&ss->tx.lock, LK_RELEASE);
8892ea20
AE
2713
2714}
2715
2716static struct mxge_media_type mxge_xfp_media_types[] =
2717{
2718 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2719 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2720 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2721 {0, (1 << 5), "10GBASE-ER"},
2722 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2723 {0, (1 << 3), "10GBASE-SW"},
2724 {0, (1 << 2), "10GBASE-LW"},
2725 {0, (1 << 1), "10GBASE-EW"},
2726 {0, (1 << 0), "Reserved"}
2727};
2728static struct mxge_media_type mxge_sfp_media_types[] =
2729{
2730 {0, (1 << 7), "Reserved"},
2731 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2732 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2733 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2734};
2735
2736static void
2737mxge_set_media(mxge_softc_t *sc, int type)
2738{
2739 sc->media_flags |= type;
2740 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2741 ifmedia_set(&sc->media, sc->media_flags);
2742}
2743
2744
2745/*
2746 * Determine the media type for a NIC. Some XFPs will identify
2747 * themselves only when their link is up, so this is initiated via a
2748 * link up interrupt. However, this can potentially take up to
2749 * several milliseconds, so it is run via the watchdog routine, rather
2750 * than in the interrupt handler itself. This need only be done
2751 * once, not each time the link is up.
2752 */
2753static void
2754mxge_media_probe(mxge_softc_t *sc)
2755{
2756 mxge_cmd_t cmd;
2757 char *cage_type;
2758 char *ptr;
2759 struct mxge_media_type *mxge_media_types = NULL;
2760 int i, err, ms, mxge_media_type_entries;
2761 uint32_t byte;
2762
2763 sc->need_media_probe = 0;
2764
2765 /* if we've already set a media type, we're done */
2766 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2767 return;
2768
2769 /*
2770 * parse the product code to deterimine the interface type
2771 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2772 * after the 3rd dash in the driver's cached copy of the
2773 * EEPROM's product code string.
2774 */
2775 ptr = sc->product_code_string;
2776 if (ptr == NULL) {
2777 device_printf(sc->dev, "Missing product code\n");
2778 }
2779
2780 for (i = 0; i < 3; i++, ptr++) {
2781 ptr = index(ptr, '-');
2782 if (ptr == NULL) {
2783 device_printf(sc->dev,
2784 "only %d dashes in PC?!?\n", i);
2785 return;
2786 }
2787 }
2788 if (*ptr == 'C') {
2789 /* -C is CX4 */
2790 mxge_set_media(sc, IFM_10G_CX4);
2791 return;
2792 }
2793 else if (*ptr == 'Q') {
2794 /* -Q is Quad Ribbon Fiber */
2795 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2796 /* FreeBSD has no media type for Quad ribbon fiber */
2797 return;
2798 }
2799
2800 if (*ptr == 'R') {
2801 /* -R is XFP */
2802 mxge_media_types = mxge_xfp_media_types;
2803 mxge_media_type_entries =
2804 sizeof (mxge_xfp_media_types) /
2805 sizeof (mxge_xfp_media_types[0]);
2806 byte = MXGE_XFP_COMPLIANCE_BYTE;
2807 cage_type = "XFP";
2808 }
2809
2810 if (*ptr == 'S' || *(ptr +1) == 'S') {
2811 /* -S or -2S is SFP+ */
2812 mxge_media_types = mxge_sfp_media_types;
2813 mxge_media_type_entries =
2814 sizeof (mxge_sfp_media_types) /
2815 sizeof (mxge_sfp_media_types[0]);
2816 cage_type = "SFP+";
2817 byte = 3;
2818 }
2819
2820 if (mxge_media_types == NULL) {
2821 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2822 return;
2823 }
2824
2825 /*
2826 * At this point we know the NIC has an XFP cage, so now we
2827 * try to determine what is in the cage by using the
2828 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2829 * register. We read just one byte, which may take over
2830 * a millisecond
2831 */
2832
2833 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2834 cmd.data1 = byte;
2835 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2836 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2837 device_printf(sc->dev, "failed to read XFP\n");
2838 }
2839 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2840 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2841 }
2842 if (err != MXGEFW_CMD_OK) {
2843 return;
2844 }
2845
2846 /* now we wait for the data to be cached */
2847 cmd.data0 = byte;
2848 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2849 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2850 DELAY(1000);
2851 cmd.data0 = byte;
2852 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2853 }
2854 if (err != MXGEFW_CMD_OK) {
2855 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2856 cage_type, err, ms);
2857 return;
2858 }
2859
2860 if (cmd.data0 == mxge_media_types[0].bitmask) {
2861 if (mxge_verbose)
2862 device_printf(sc->dev, "%s:%s\n", cage_type,
2863 mxge_media_types[0].name);
2864 mxge_set_media(sc, IFM_10G_CX4);
2865 return;
2866 }
2867 for (i = 1; i < mxge_media_type_entries; i++) {
2868 if (cmd.data0 & mxge_media_types[i].bitmask) {
2869 if (mxge_verbose)
2870 device_printf(sc->dev, "%s:%s\n",
2871 cage_type,
2872 mxge_media_types[i].name);
2873
2874 mxge_set_media(sc, mxge_media_types[i].flag);
2875 return;
2876 }
2877 }
2878 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2879 cmd.data0);
2880
2881 return;
2882}
2883
2884static void
2885mxge_intr(void *arg)
2886{
2887 struct mxge_slice_state *ss = arg;
2888 mxge_softc_t *sc = ss->sc;
2889 mcp_irq_data_t *stats = ss->fw_stats;
2890 mxge_tx_ring_t *tx = &ss->tx;
2891 mxge_rx_done_t *rx_done = &ss->rx_done;
2892 uint32_t send_done_count;
2893 uint8_t valid;
2894
2895
2896#ifndef IFNET_BUF_RING
2897 /* an interrupt on a non-zero slice is implicitly valid
2898 since MSI-X irqs are not shared */
2899 if (ss != sc->ss) {
2900 mxge_clean_rx_done(ss);
2901 *ss->irq_claim = be32toh(3);
2902 return;
2903 }
2904#endif
2905
2906 /* make sure the DMA has finished */
2907 if (!stats->valid) {
2908 return;
2909 }
2910 valid = stats->valid;
2911
2912 if (sc->legacy_irq) {
2913 /* lower legacy IRQ */
2914 *sc->irq_deassert = 0;
2915 if (!mxge_deassert_wait)
2916 /* don't wait for conf. that irq is low */
2917 stats->valid = 0;
2918 } else {
2919 stats->valid = 0;
2920 }
2921
2922 /* loop while waiting for legacy irq deassertion */
2923 do {
2924 /* check for transmit completes and receives */
2925 send_done_count = be32toh(stats->send_done_count);
2926 while ((send_done_count != tx->pkt_done) ||
2927 (rx_done->entry[rx_done->idx].length != 0)) {
2928 if (send_done_count != tx->pkt_done)
2929 mxge_tx_done(ss, (int)send_done_count);
2930 mxge_clean_rx_done(ss);
2931 send_done_count = be32toh(stats->send_done_count);
2932 }
2933 if (sc->legacy_irq && mxge_deassert_wait)
2934 wmb();
2935 } while (*((volatile uint8_t *) &stats->valid));
2936
2937 /* fw link & error stats meaningful only on the first slice */
2938 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2939 if (sc->link_state != stats->link_up) {
2940 sc->link_state = stats->link_up;
2941 if (sc->link_state) {
73a22abe
AE
2942 sc->ifp->if_link_state = LINK_STATE_UP;
2943 if_link_state_change(sc->ifp);
8892ea20
AE
2944 if (mxge_verbose)
2945 device_printf(sc->dev, "link up\n");
2946 } else {
73a22abe
AE
2947 sc->ifp->if_link_state = LINK_STATE_DOWN;
2948 if_link_state_change(sc->ifp);
8892ea20
AE
2949 if (mxge_verbose)
2950 device_printf(sc->dev, "link down\n");
2951 }
2952 sc->need_media_probe = 1;
2953 }
2954 if (sc->rdma_tags_available !=
2955 be32toh(stats->rdma_tags_available)) {
2956 sc->rdma_tags_available =
2957 be32toh(stats->rdma_tags_available);
2958 device_printf(sc->dev, "RDMA timed out! %d tags "
2959 "left\n", sc->rdma_tags_available);
2960 }
2961
2962 if (stats->link_down) {
2963 sc->down_cnt += stats->link_down;
2964 sc->link_state = 0;
2965 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2966 }
2967 }
2968
2969 /* check to see if we have rx token to pass back */
2970 if (valid & 0x1)
2971 *ss->irq_claim = be32toh(3);
2972 *(ss->irq_claim + 1) = be32toh(3);
2973}
2974
2975static void
2976mxge_init(void *arg)
2977{
2978}
2979
2980
2981
2982static void
2983mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2984{
2985 struct lro_entry *lro_entry;
2986 int i;
2987
2988 while (!SLIST_EMPTY(&ss->lro_free)) {
2989 lro_entry = SLIST_FIRST(&ss->lro_free);
2990 SLIST_REMOVE_HEAD(&ss->lro_free, next);
d777b84f 2991 kfree(lro_entry, M_DEVBUF);
8892ea20
AE
2992 }
2993
2994 for (i = 0; i <= ss->rx_big.mask; i++) {
2995 if (ss->rx_big.info[i].m == NULL)
2996 continue;
2997 bus_dmamap_unload(ss->rx_big.dmat,
2998 ss->rx_big.info[i].map);
2999 m_freem(ss->rx_big.info[i].m);
3000 ss->rx_big.info[i].m = NULL;
3001 }
3002
3003 for (i = 0; i <= ss->rx_small.mask; i++) {
3004 if (ss->rx_small.info[i].m == NULL)
3005 continue;
3006 bus_dmamap_unload(ss->rx_small.dmat,
3007 ss->rx_small.info[i].map);
3008 m_freem(ss->rx_small.info[i].m);
3009 ss->rx_small.info[i].m = NULL;
3010 }
3011
3012 /* transmit ring used only on the first slice */
3013 if (ss->tx.info == NULL)
3014 return;
3015
3016 for (i = 0; i <= ss->tx.mask; i++) {
3017 ss->tx.info[i].flag = 0;
3018 if (ss->tx.info[i].m == NULL)
3019 continue;
3020 bus_dmamap_unload(ss->tx.dmat,
3021 ss->tx.info[i].map);
3022 m_freem(ss->tx.info[i].m);
3023 ss->tx.info[i].m = NULL;
3024 }
3025}
3026
3027static void
3028mxge_free_mbufs(mxge_softc_t *sc)
3029{
3030 int slice;
3031
3032 for (slice = 0; slice < sc->num_slices; slice++)
3033 mxge_free_slice_mbufs(&sc->ss[slice]);
3034}
3035
3036static void
3037mxge_free_slice_rings(struct mxge_slice_state *ss)
3038{
3039 int i;
3040
3041
3042 if (ss->rx_done.entry != NULL)
3043 mxge_dma_free(&ss->rx_done.dma);
3044 ss->rx_done.entry = NULL;
3045
3046 if (ss->tx.req_bytes != NULL)
d777b84f 3047 kfree(ss->tx.req_bytes, M_DEVBUF);
8892ea20
AE
3048 ss->tx.req_bytes = NULL;
3049
3050 if (ss->tx.seg_list != NULL)
d777b84f 3051 kfree(ss->tx.seg_list, M_DEVBUF);
8892ea20
AE
3052 ss->tx.seg_list = NULL;
3053
3054 if (ss->rx_small.shadow != NULL)
d777b84f 3055 kfree(ss->rx_small.shadow, M_DEVBUF);
8892ea20
AE
3056 ss->rx_small.shadow = NULL;
3057
3058 if (ss->rx_big.shadow != NULL)
d777b84f 3059 kfree(ss->rx_big.shadow, M_DEVBUF);
8892ea20
AE
3060 ss->rx_big.shadow = NULL;
3061
3062 if (ss->tx.info != NULL) {
3063 if (ss->tx.dmat != NULL) {
3064 for (i = 0; i <= ss->tx.mask; i++) {
3065 bus_dmamap_destroy(ss->tx.dmat,
3066 ss->tx.info[i].map);
3067 }
3068 bus_dma_tag_destroy(ss->tx.dmat);
3069 }
d777b84f 3070 kfree(ss->tx.info, M_DEVBUF);
8892ea20
AE
3071 }
3072 ss->tx.info = NULL;
3073
3074 if (ss->rx_small.info != NULL) {
3075 if (ss->rx_small.dmat != NULL) {
3076 for (i = 0; i <= ss->rx_small.mask; i++) {
3077 bus_dmamap_destroy(ss->rx_small.dmat,
3078 ss->rx_small.info[i].map);
3079 }
3080 bus_dmamap_destroy(ss->rx_small.dmat,
3081 ss->rx_small.extra_map);
3082 bus_dma_tag_destroy(ss->rx_small.dmat);
3083 }
d777b84f 3084 kfree(ss->rx_small.info, M_DEVBUF);
8892ea20
AE
3085 }
3086 ss->rx_small.info = NULL;
3087
3088 if (ss->rx_big.info != NULL) {
3089 if (ss->rx_big.dmat != NULL) {
3090 for (i = 0; i <= ss->rx_big.mask; i++) {
3091 bus_dmamap_destroy(ss->rx_big.dmat,
3092 ss->rx_big.info[i].map);
3093 }
3094 bus_dmamap_destroy(ss->rx_big.dmat,
3095 ss->rx_big.extra_map);
3096 bus_dma_tag_destroy(ss->rx_big.dmat);
3097 }
d777b84f 3098 kfree(ss->rx_big.info, M_DEVBUF);
8892ea20
AE
3099 }
3100 ss->rx_big.info = NULL;
3101}
3102
3103static void
3104mxge_free_rings(mxge_softc_t *sc)
3105{
3106 int slice;
3107
3108 for (slice = 0; slice < sc->num_slices; slice++)
3109 mxge_free_slice_rings(&sc->ss[slice]);
3110}
3111
3112static int
3113mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3114 int tx_ring_entries)
3115{
3116 mxge_softc_t *sc = ss->sc;
3117 size_t bytes;
3118 int err, i;
3119
3120 err = ENOMEM;
3121
3122 /* allocate per-slice receive resources */
3123
3124 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3125 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3126
3127 /* allocate the rx shadow rings */
3128 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
d777b84f 3129 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3130 if (ss->rx_small.shadow == NULL)
3131 return err;;
3132
3133 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
d777b84f 3134 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3135 if (ss->rx_big.shadow == NULL)
3136 return err;;
3137
3138 /* allocate the rx host info rings */
3139 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
d777b84f 3140 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3141 if (ss->rx_small.info == NULL)
3142 return err;;
3143
3144 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
d777b84f 3145 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3146 if (ss->rx_big.info == NULL)
3147 return err;;
3148
3149 /* allocate the rx busdma resources */
3150 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3151 1, /* alignment */
3152 4096, /* boundary */
3153 BUS_SPACE_MAXADDR, /* low */
3154 BUS_SPACE_MAXADDR, /* high */
3155 NULL, NULL, /* filter */
3156 MHLEN, /* maxsize */
3157 1, /* num segs */
3158 MHLEN, /* maxsegsize */
3159 BUS_DMA_ALLOCNOW, /* flags */
3160 NULL, NULL, /* lock */
3161 &ss->rx_small.dmat); /* tag */
3162 if (err != 0) {
3163 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3164 err);
3165 return err;;
3166 }
3167
3168 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3169 1, /* alignment */
3170#if MXGE_VIRT_JUMBOS
3171 4096, /* boundary */
3172#else
3173 0, /* boundary */
3174#endif
3175 BUS_SPACE_MAXADDR, /* low */
3176 BUS_SPACE_MAXADDR, /* high */
3177 NULL, NULL, /* filter */
3178 3*4096, /* maxsize */
3179#if MXGE_VIRT_JUMBOS
3180 3, /* num segs */
3181 4096, /* maxsegsize*/
3182#else
3183 1, /* num segs */
3184 MJUM9BYTES, /* maxsegsize*/
3185#endif
3186 BUS_DMA_ALLOCNOW, /* flags */
3187 NULL, NULL, /* lock */
3188 &ss->rx_big.dmat); /* tag */
3189 if (err != 0) {
3190 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3191 err);
3192 return err;;
3193 }
3194 for (i = 0; i <= ss->rx_small.mask; i++) {
3195 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3196 &ss->rx_small.info[i].map);
3197 if (err != 0) {
3198 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3199 err);
3200 return err;;
3201 }
3202 }
3203 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3204 &ss->rx_small.extra_map);
3205 if (err != 0) {
3206 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3207 err);
3208 return err;;
3209 }
3210
3211 for (i = 0; i <= ss->rx_big.mask; i++) {
3212 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3213 &ss->rx_big.info[i].map);
3214 if (err != 0) {
3215 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3216 err);
3217 return err;;
3218 }
3219 }
3220 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3221 &ss->rx_big.extra_map);
3222 if (err != 0) {
3223 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3224 err);
3225 return err;;
3226 }
3227
3228 /* now allocate TX resouces */
3229
3230#ifndef IFNET_BUF_RING
3231 /* only use a single TX ring for now */
3232 if (ss != ss->sc->ss)
3233 return 0;
3234#endif
3235
3236 ss->tx.mask = tx_ring_entries - 1;
3237 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3238
3239
3240 /* allocate the tx request copy block */
3241 bytes = 8 +
3242 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
d777b84f 3243 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20
AE
3244 if (ss->tx.req_bytes == NULL)
3245 return err;;
3246 /* ensure req_list entries are aligned to 8 bytes */
3247 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3248 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3249
3250 /* allocate the tx busdma segment list */
3251 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3252 ss->tx.seg_list = (bus_dma_segment_t *)
d777b84f 3253 kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20
AE
3254 if (ss->tx.seg_list == NULL)
3255 return err;;
3256
3257 /* allocate the tx host info ring */
3258 bytes = tx_ring_entries * sizeof (*ss->tx.info);
d777b84f 3259 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3260 if (ss->tx.info == NULL)
3261 return err;;
3262
3263 /* allocate the tx busdma resources */
3264 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3265 1, /* alignment */
3266 sc->tx_boundary, /* boundary */
3267 BUS_SPACE_MAXADDR, /* low */
3268 BUS_SPACE_MAXADDR, /* high */
3269 NULL, NULL, /* filter */
3270 65536 + 256, /* maxsize */
3271 ss->tx.max_desc - 2, /* num segs */
3272 sc->tx_boundary, /* maxsegsz */
3273 BUS_DMA_ALLOCNOW, /* flags */
3274 NULL, NULL, /* lock */
3275 &ss->tx.dmat); /* tag */
3276
3277 if (err != 0) {
3278 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3279 err);
3280 return err;;
3281 }
3282
3283 /* now use these tags to setup dmamaps for each slot
3284 in the ring */
3285 for (i = 0; i <= ss->tx.mask; i++) {
3286 err = bus_dmamap_create(ss->tx.dmat, 0,
3287 &ss->tx.info[i].map);
3288 if (err != 0) {
3289 device_printf(sc->dev, "Err %d tx dmamap\n",
3290 err);
3291 return err;;
3292 }
3293 }
3294 return 0;
3295
3296}
3297
3298static int
3299mxge_alloc_rings(mxge_softc_t *sc)
3300{
3301 mxge_cmd_t cmd;
3302 int tx_ring_size;
3303 int tx_ring_entries, rx_ring_entries;
3304 int err, slice;
3305
3306 /* get ring sizes */
3307 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3308 tx_ring_size = cmd.data0;
3309 if (err != 0) {
3310 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3311 goto abort;
3312 }
3313
3314 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3315 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3316 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3317 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3318 IFQ_SET_READY(&sc->ifp->if_snd);
3319
3320 for (slice = 0; slice < sc->num_slices; slice++) {
3321 err = mxge_alloc_slice_rings(&sc->ss[slice],
3322 rx_ring_entries,
3323 tx_ring_entries);
3324 if (err != 0)
3325 goto abort;
3326 }
3327 return 0;
3328
3329abort:
3330 mxge_free_rings(sc);
3331 return err;
3332
3333}
3334
3335
3336static void
3337mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3338{
3339 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3340
3341 if (bufsize < MCLBYTES) {
3342 /* easy, everything fits in a single buffer */
3343 *big_buf_size = MCLBYTES;
3344 *cl_size = MCLBYTES;
3345 *nbufs = 1;
3346 return;
3347 }
3348
3349 if (bufsize < MJUMPAGESIZE) {
3350 /* still easy, everything still fits in a single buffer */
3351 *big_buf_size = MJUMPAGESIZE;
3352 *cl_size = MJUMPAGESIZE;
3353 *nbufs = 1;
3354 return;
3355 }
3356#if MXGE_VIRT_JUMBOS
3357 /* now we need to use virtually contiguous buffers */
3358 *cl_size = MJUM9BYTES;
3359 *big_buf_size = 4096;
3360 *nbufs = mtu / 4096 + 1;
3361 /* needs to be a power of two, so round up */
3362 if (*nbufs == 3)
3363 *nbufs = 4;
3364#else
3365 *cl_size = MJUM9BYTES;
3366 *big_buf_size = MJUM9BYTES;
3367 *nbufs = 1;
3368#endif
3369}
3370
3371static int
3372mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3373{
3374 mxge_softc_t *sc;
3375 mxge_cmd_t cmd;
3376 bus_dmamap_t map;
3377 struct lro_entry *lro_entry;
3378 int err, i, slice;
3379
3380
3381 sc = ss->sc;
3382 slice = ss - sc->ss;
3383
3384 SLIST_INIT(&ss->lro_free);
3385 SLIST_INIT(&ss->lro_active);
3386
3387 for (i = 0; i < sc->lro_cnt; i++) {
3388 lro_entry = (struct lro_entry *)
d777b84f 3389 kmalloc(sizeof (*lro_entry), M_DEVBUF,
8892ea20
AE
3390 M_NOWAIT | M_ZERO);
3391 if (lro_entry == NULL) {
3392 sc->lro_cnt = i;
3393 break;
3394 }
3395 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3396 }
3397 /* get the lanai pointers to the send and receive rings */
3398
3399 err = 0;
3400#ifndef IFNET_BUF_RING
3401 /* We currently only send from the first slice */
3402 if (slice == 0) {
3403#endif
3404 cmd.data0 = slice;
3405 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3406 ss->tx.lanai =
3407 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3408 ss->tx.send_go = (volatile uint32_t *)
3409 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3410 ss->tx.send_stop = (volatile uint32_t *)
3411 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3412#ifndef IFNET_BUF_RING
3413 }
3414#endif
3415 cmd.data0 = slice;
3416 err |= mxge_send_cmd(sc,
3417 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3418 ss->rx_small.lanai =
3419 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3420 cmd.data0 = slice;
3421 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3422 ss->rx_big.lanai =
3423 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3424
3425 if (err != 0) {
3426 device_printf(sc->dev,
3427 "failed to get ring sizes or locations\n");
3428 return EIO;
3429 }
3430
3431 /* stock receive rings */
3432 for (i = 0; i <= ss->rx_small.mask; i++) {
3433 map = ss->rx_small.info[i].map;
3434 err = mxge_get_buf_small(ss, map, i);
3435 if (err) {
3436 device_printf(sc->dev, "alloced %d/%d smalls\n",
3437 i, ss->rx_small.mask + 1);
3438 return ENOMEM;
3439 }
3440 }
3441 for (i = 0; i <= ss->rx_big.mask; i++) {
3442 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3443 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3444 }
3445 ss->rx_big.nbufs = nbufs;
3446 ss->rx_big.cl_size = cl_size;
3447 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3448 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3449 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3450 map = ss->rx_big.info[i].map;
3451 err = mxge_get_buf_big(ss, map, i);
3452 if (err) {
3453 device_printf(sc->dev, "alloced %d/%d bigs\n",
3454 i, ss->rx_big.mask + 1);
3455 return ENOMEM;
3456 }
3457 }
3458 return 0;
3459}
3460
3461static int
3462mxge_open(mxge_softc_t *sc)
3463{
3464 mxge_cmd_t cmd;
3465 int err, big_bytes, nbufs, slice, cl_size, i;
3466 bus_addr_t bus;
3467 volatile uint8_t *itable;
3468 struct mxge_slice_state *ss;
3469
3470 /* Copy the MAC address in case it was overridden */
3471 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3472
3473 err = mxge_reset(sc, 1);
3474 if (err != 0) {
3475 device_printf(sc->dev, "failed to reset\n");
3476 return EIO;
3477 }
3478
3479 if (sc->num_slices > 1) {
3480 /* setup the indirection table */
3481 cmd.data0 = sc->num_slices;
3482 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3483 &cmd);
3484
3485 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3486 &cmd);
3487 if (err != 0) {
3488 device_printf(sc->dev,
3489 "failed to setup rss tables\n");
3490 return err;
3491 }
3492
3493 /* just enable an identity mapping */
3494 itable = sc->sram + cmd.data0;
3495 for (i = 0; i < sc->num_slices; i++)
3496 itable[i] = (uint8_t)i;
3497
3498 cmd.data0 = 1;
3499 cmd.data1 = mxge_rss_hash_type;
3500 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3501 if (err != 0) {
3502 device_printf(sc->dev, "failed to enable slices\n");
3503 return err;
3504 }
3505 }
3506
3507
3508 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3509
3510 cmd.data0 = nbufs;
3511 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3512 &cmd);
3513 /* error is only meaningful if we're trying to set
3514 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3515 if (err && nbufs > 1) {
3516 device_printf(sc->dev,
3517 "Failed to set alway-use-n to %d\n",
3518 nbufs);
3519 return EIO;
3520 }
3521 /* Give the firmware the mtu and the big and small buffer
3522 sizes. The firmware wants the big buf size to be a power
3523 of two. Luckily, FreeBSD's clusters are powers of two */
3524 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3525 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3526 cmd.data0 = MHLEN - MXGEFW_PAD;
3527 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3528 &cmd);
3529 cmd.data0 = big_bytes;
3530 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3531
3532 if (err != 0) {
3533 device_printf(sc->dev, "failed to setup params\n");
3534 goto abort;
3535 }
3536
3537 /* Now give him the pointer to the stats block */
3538 for (slice = 0;
3539#ifdef IFNET_BUF_RING
3540 slice < sc->num_slices;
3541#else
3542 slice < 1;
3543#endif
3544 slice++) {
3545 ss = &sc->ss[slice];
3546 cmd.data0 =
3547 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3548 cmd.data1 =
3549 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3550 cmd.data2 = sizeof(struct mcp_irq_data);
3551 cmd.data2 |= (slice << 16);
3552 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3553 }
3554
3555 if (err != 0) {
3556 bus = sc->ss->fw_stats_dma.bus_addr;
3557 bus += offsetof(struct mcp_irq_data, send_done_count);
3558 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3559 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3560 err = mxge_send_cmd(sc,
3561 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3562 &cmd);
3563 /* Firmware cannot support multicast without STATS_DMA_V2 */
3564 sc->fw_multicast_support = 0;
3565 } else {
3566 sc->fw_multicast_support = 1;
3567 }
3568
3569 if (err != 0) {
3570 device_printf(sc->dev, "failed to setup params\n");
3571 goto abort;
3572 }
3573
3574 for (slice = 0; slice < sc->num_slices; slice++) {
3575 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3576 if (err != 0) {
3577 device_printf(sc->dev, "couldn't open slice %d\n",
3578 slice);
3579 goto abort;
3580 }
3581 }
3582
3583 /* Finally, start the firmware running */
3584 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3585 if (err) {
3586 device_printf(sc->dev, "Couldn't bring up link\n");
3587 goto abort;
3588 }
3589#ifdef IFNET_BUF_RING
3590 for (slice = 0; slice < sc->num_slices; slice++) {
3591 ss = &sc->ss[slice];
3592 ss->if_drv_flags |= IFF_DRV_RUNNING;
3593 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3594 }
3595#endif
3596 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3597 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3598 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3599
3600 return 0;
3601
3602
3603abort:
3604 mxge_free_mbufs(sc);
3605
3606 return err;
3607}
3608
3609static int
3610mxge_close(mxge_softc_t *sc)
3611{
3612 mxge_cmd_t cmd;
3613 int err, old_down_cnt;
3614#ifdef IFNET_BUF_RING
3615 struct mxge_slice_state *ss;
3616 int slice;
3617#endif
3618
3619 callout_stop(&sc->co_hdl);
3620#ifdef IFNET_BUF_RING
3621 for (slice = 0; slice < sc->num_slices; slice++) {
3622 ss = &sc->ss[slice];
3623 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3624 }
3625#endif
3626 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3627 old_down_cnt = sc->down_cnt;
3628 wmb();
3629 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3630 if (err) {
3631 device_printf(sc->dev, "Couldn't bring down link\n");
3632 }
3633 if (old_down_cnt == sc->down_cnt) {
3634 /* wait for down irq */
3635 DELAY(10 * sc->intr_coal_delay);
3636 }
3637 wmb();
3638 if (old_down_cnt == sc->down_cnt) {
3639 device_printf(sc->dev, "never got down irq\n");
3640 }
3641
3642 mxge_free_mbufs(sc);
3643
3644 return 0;
3645}
3646
3647static void
3648mxge_setup_cfg_space(mxge_softc_t *sc)
3649{
3650 device_t dev = sc->dev;
3651 int reg;
3652 uint16_t cmd, lnk, pectl;
3653
3654 /* find the PCIe link width and set max read request to 4KB*/
3655 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3656 lnk = pci_read_config(dev, reg + 0x12, 2);
3657 sc->link_width = (lnk >> 4) & 0x3f;
3658
3659 pectl = pci_read_config(dev, reg + 0x8, 2);
3660 pectl = (pectl & ~0x7000) | (5 << 12);
3661 pci_write_config(dev, reg + 0x8, pectl, 2);
3662 }
3663
3664 /* Enable DMA and Memory space access */
3665 pci_enable_busmaster(dev);
3666 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3667 cmd |= PCIM_CMD_MEMEN;
3668 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3669}
3670
3671static uint32_t
3672mxge_read_reboot(mxge_softc_t *sc)
3673{
3674 device_t dev = sc->dev;
3675 uint32_t vs;
3676
3677 /* find the vendor specific offset */
3678 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3679 device_printf(sc->dev,
3680 "could not find vendor specific offset\n");
3681 return (uint32_t)-1;
3682 }
3683 /* enable read32 mode */
3684 pci_write_config(dev, vs + 0x10, 0x3, 1);
3685 /* tell NIC which register to read */
3686 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3687 return (pci_read_config(dev, vs + 0x14, 4));
3688}
3689
3690static int
3691mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3692{
3693 struct pci_devinfo *dinfo;
3694 mxge_tx_ring_t *tx;
3695 int err;
3696 uint32_t reboot;
3697 uint16_t cmd;
3698
3699 err = ENXIO;
3700
3701 device_printf(sc->dev, "Watchdog reset!\n");
3702
3703 /*
3704 * check to see if the NIC rebooted. If it did, then all of
3705 * PCI config space has been reset, and things like the
3706 * busmaster bit will be zero. If this is the case, then we
3707 * must restore PCI config space before the NIC can be used
3708 * again
3709 */
3710 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3711 if (cmd == 0xffff) {
3712 /*
3713 * maybe the watchdog caught the NIC rebooting; wait
3714 * up to 100ms for it to finish. If it does not come
3715 * back, then give up
3716 */
3717 DELAY(1000*100);
3718 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3719 if (cmd == 0xffff) {
3720 device_printf(sc->dev, "NIC disappeared!\n");
3721 return (err);
3722 }
3723 }
3724 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3725 /* print the reboot status */
3726 reboot = mxge_read_reboot(sc);
3727 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3728 reboot);
3729 /* restore PCI configuration space */
3730 dinfo = device_get_ivars(sc->dev);
3731 pci_cfg_restore(sc->dev, dinfo);
3732
3733 /* and redo any changes we made to our config space */
3734 mxge_setup_cfg_space(sc);
3735
3736 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3737 mxge_close(sc);
3738 err = mxge_open(sc);
3739 }
3740 } else {
3741 tx = &sc->ss[slice].tx;
3742 device_printf(sc->dev,
3743 "NIC did not reboot, slice %d ring state:\n",
3744 slice);
3745 device_printf(sc->dev,
3746 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3747 tx->req, tx->done, tx->queue_active);
3748 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3749 tx->activate, tx->deactivate);
3750 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3751 tx->pkt_done,
3752 be32toh(sc->ss->fw_stats->send_done_count));
3753 device_printf(sc->dev, "not resetting\n");
3754 }
3755 return (err);
3756}
3757
3758static int
3759mxge_watchdog(mxge_softc_t *sc)
3760{
3761 mxge_tx_ring_t *tx;
3762 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3763 int i, err = 0;
3764
3765 /* see if we have outstanding transmits, which
3766 have been pending for more than mxge_ticks */
3767 for (i = 0;
3768#ifdef IFNET_BUF_RING
3769 (i < sc->num_slices) && (err == 0);
3770#else
3771 (i < 1) && (err == 0);
3772#endif
3773 i++) {
3774 tx = &sc->ss[i].tx;
3775 if (tx->req != tx->done &&
3776 tx->watchdog_req != tx->watchdog_done &&
3777 tx->done == tx->watchdog_done) {
3778 /* check for pause blocking before resetting */
3779 if (tx->watchdog_rx_pause == rx_pause)
3780 err = mxge_watchdog_reset(sc, i);
3781 else
3782 device_printf(sc->dev, "Flow control blocking "
3783 "xmits, check link partner\n");
3784 }
3785
3786 tx->watchdog_req = tx->req;
3787 tx->watchdog_done = tx->done;
3788 tx->watchdog_rx_pause = rx_pause;
3789 }
3790
3791 if (sc->need_media_probe)
3792 mxge_media_probe(sc);
3793 return (err);
3794}
3795
3796static void
3797mxge_update_stats(mxge_softc_t *sc)
3798{
3799 struct mxge_slice_state *ss;
3800 u_long ipackets = 0;
3801 u_long opackets = 0;
3802#ifdef IFNET_BUF_RING
3803 u_long obytes = 0;
3804 u_long omcasts = 0;
3805 u_long odrops = 0;
3806#endif
3807 u_long oerrors = 0;
3808 int slice;
3809
3810 for (slice = 0; slice < sc->num_slices; slice++) {
3811 ss = &sc->ss[slice];
3812 ipackets += ss->ipackets;
3813 opackets += ss->opackets;
3814#ifdef IFNET_BUF_RING
3815 obytes += ss->obytes;
3816 omcasts += ss->omcasts;
3817 odrops += ss->tx.br->br_drops;
3818#endif
3819 oerrors += ss->oerrors;
3820 }
3821 sc->ifp->if_ipackets = ipackets;
3822 sc->ifp->if_opackets = opackets;
3823#ifdef IFNET_BUF_RING
3824 sc->ifp->if_obytes = obytes;
3825 sc->ifp->if_omcasts = omcasts;
3826 sc->ifp->if_snd.ifq_drops = odrops;
3827#endif
3828 sc->ifp->if_oerrors = oerrors;
3829}
3830
3831static void
3832mxge_tick(void *arg)
3833{
3834 mxge_softc_t *sc = arg;
3835 int err = 0;
3836
6a6f4694 3837 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3838 /* aggregate stats from different slices */
3839 mxge_update_stats(sc);
3840 if (!sc->watchdog_countdown) {
3841 err = mxge_watchdog(sc);
3842 sc->watchdog_countdown = 4;
3843 }
3844 sc->watchdog_countdown--;
3845 if (err == 0)
3846 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
6a6f4694 3847 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3848}
3849
3850static int
3851mxge_media_change(struct ifnet *ifp)
3852{
3853 return EINVAL;
3854}
3855
3856static int
3857mxge_change_mtu(mxge_softc_t *sc, int mtu)
3858{
3859 struct ifnet *ifp = sc->ifp;
3860 int real_mtu, old_mtu;
3861 int err = 0;
3862
3863
3864 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3865 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3866 return EINVAL;
e8a47a7f 3867 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3868 old_mtu = ifp->if_mtu;
3869 ifp->if_mtu = mtu;
3870 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3871 mxge_close(sc);
3872 err = mxge_open(sc);
3873 if (err != 0) {
3874 ifp->if_mtu = old_mtu;
3875 mxge_close(sc);
3876 (void) mxge_open(sc);
3877 }
3878 }
e8a47a7f 3879 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3880 return err;
3881}
3882
3883static void
3884mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3885{
3886 mxge_softc_t *sc = ifp->if_softc;
3887
3888
3889 if (sc == NULL)
3890 return;
3891 ifmr->ifm_status = IFM_AVALID;
3892 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3893 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3894 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3895}
3896
3897static int
137195a6 3898mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
8892ea20
AE
3899{
3900 mxge_softc_t *sc = ifp->if_softc;
3901 struct ifreq *ifr = (struct ifreq *)data;
3902 int err, mask;
3903
137195a6 3904 (void)cr;
8892ea20
AE
3905 err = 0;
3906 switch (command) {
3907 case SIOCSIFADDR:
3908 case SIOCGIFADDR:
3909 err = ether_ioctl(ifp, command, data);
3910 break;
3911
3912 case SIOCSIFMTU:
3913 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3914 break;
3915
3916 case SIOCSIFFLAGS:
e8a47a7f 3917 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 3918 if (sc->dying) {
e8a47a7f 3919 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3920 return EINVAL;
3921 }
3922 if (ifp->if_flags & IFF_UP) {
3923 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3924 err = mxge_open(sc);
3925 } else {
3926 /* take care of promis can allmulti
3927 flag chages */
3928 mxge_change_promisc(sc,
3929 ifp->if_flags & IFF_PROMISC);
3930 mxge_set_multicast_list(sc);
3931 }
3932 } else {
3933 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3934 mxge_close(sc);
3935 }
3936 }
e8a47a7f 3937 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3938 break;
3939
3940 case SIOCADDMULTI:
3941 case SIOCDELMULTI:
e8a47a7f 3942 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 3943 mxge_set_multicast_list(sc);
e8a47a7f 3944 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3945 break;
3946
3947 case SIOCSIFCAP:
e8a47a7f 3948 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3949 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3950 if (mask & IFCAP_TXCSUM) {
3951 if (IFCAP_TXCSUM & ifp->if_capenable) {
3952 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3953 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3954 | CSUM_TSO);
3955 } else {
3956 ifp->if_capenable |= IFCAP_TXCSUM;
3957 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3958 }
3959 } else if (mask & IFCAP_RXCSUM) {
3960 if (IFCAP_RXCSUM & ifp->if_capenable) {
3961 ifp->if_capenable &= ~IFCAP_RXCSUM;
3962 sc->csum_flag = 0;
3963 } else {
3964 ifp->if_capenable |= IFCAP_RXCSUM;
3965 sc->csum_flag = 1;
3966 }
3967 }
3968 if (mask & IFCAP_TSO4) {
3969 if (IFCAP_TSO4 & ifp->if_capenable) {
3970 ifp->if_capenable &= ~IFCAP_TSO4;
3971 ifp->if_hwassist &= ~CSUM_TSO;
3972 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3973 ifp->if_capenable |= IFCAP_TSO4;
3974 ifp->if_hwassist |= CSUM_TSO;
3975 } else {
6c348da6 3976 kprintf("mxge requires tx checksum offload"
8892ea20
AE
3977 " be enabled to use TSO\n");
3978 err = EINVAL;
3979 }
3980 }
3981 if (mask & IFCAP_LRO) {
3982 if (IFCAP_LRO & ifp->if_capenable)
3983 err = mxge_change_lro_locked(sc, 0);
3984 else
3985 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3986 }
3987 if (mask & IFCAP_VLAN_HWTAGGING)
3988 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
e8a47a7f 3989 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3990 VLAN_CAPABILITIES(ifp);
3991
3992 break;
3993
3994 case SIOCGIFMEDIA:
3995 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3996 &sc->media, command);
3997 break;
3998
3999 default:
4000 err = ENOTTY;
4001 }
4002 return err;
4003}
4004
4005static void
4006mxge_fetch_tunables(mxge_softc_t *sc)
4007{
4008
4009 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4010 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4011 &mxge_flow_control);
4012 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4013 &mxge_intr_coal_delay);
4014 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4015 &mxge_nvidia_ecrc_enable);
4016 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4017 &mxge_force_firmware);
4018 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4019 &mxge_deassert_wait);
4020 TUNABLE_INT_FETCH("hw.mxge.verbose",
4021 &mxge_verbose);
4022 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4023 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4024 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4025 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4026 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4027 if (sc->lro_cnt != 0)
4028 mxge_lro_cnt = sc->lro_cnt;
4029
4030 if (bootverbose)
4031 mxge_verbose = 1;
4032 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4033 mxge_intr_coal_delay = 30;
4034 if (mxge_ticks == 0)
4035 mxge_ticks = hz / 2;
4036 sc->pause = mxge_flow_control;
4037 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4038 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4039 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4040 }
4041 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4042 mxge_initial_mtu < ETHER_MIN_LEN)
4043 mxge_initial_mtu = ETHERMTU_JUMBO;
4044}
4045
4046
4047static void
4048mxge_free_slices(mxge_softc_t *sc)
4049{
4050 struct mxge_slice_state *ss;
4051 int i;
4052
4053
4054 if (sc->ss == NULL)
4055 return;
4056
4057 for (i = 0; i < sc->num_slices; i++) {
4058 ss = &sc->ss[i];
4059 if (ss->fw_stats != NULL) {