get mxge to build, stage 25/many
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
CommitLineData
8892ea20
AE
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
b3535a6f 31/*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
8892ea20
AE
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/linker.h>
36#include <sys/firmware.h>
37#include <sys/endian.h>
05e71c89 38#include <sys/in_cksum.h>
8892ea20
AE
39#include <sys/sockio.h>
40#include <sys/mbuf.h>
41#include <sys/malloc.h>
8892ea20
AE
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/module.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
8892ea20
AE
47
48/* count xmits ourselves, rather than via drbr */
49#define NO_SLOW_STATS
50#include <net/if.h>
51#include <net/if_arp.h>
f2f758df 52#include <net/ifq_var.h>
8892ea20
AE
53#include <net/ethernet.h>
54#include <net/if_dl.h>
55#include <net/if_media.h>
56
57#include <net/bpf.h>
58
59#include <net/if_types.h>
b3535a6f 60#include <net/vlan/if_vlan_var.h>
8892ea20
AE
61#include <net/zlib.h>
62
63#include <netinet/in_systm.h>
64#include <netinet/in.h>
65#include <netinet/ip.h>
66#include <netinet/tcp.h>
67
8892ea20
AE
68#include <sys/bus.h>
69#include <sys/rman.h>
8892ea20 70
b3535a6f
AE
71#include <bus/pci/pcireg.h>
72#include <bus/pci/pcivar.h>
73#include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
8892ea20
AE
74
75#include <vm/vm.h> /* for pmap_mapdev() */
76#include <vm/pmap.h>
77
78#if defined(__i386) || defined(__amd64)
79#include <machine/specialreg.h>
80#endif
81
b3535a6f
AE
82#include <dev/netif/mxge/mxge_mcp.h>
83#include <dev/netif/mxge/mcp_gen_header.h>
8892ea20 84/*#define MXGE_FAKE_IFP*/
b3535a6f 85#include <dev/netif/mxge/if_mxge_var.h>
8892ea20
AE
86#ifdef IFNET_BUF_RING
87#include <sys/buf_ring.h>
88#endif
89
90#include "opt_inet.h"
91
92/* tunable params */
93static int mxge_nvidia_ecrc_enable = 1;
94static int mxge_force_firmware = 0;
95static int mxge_intr_coal_delay = 30;
96static int mxge_deassert_wait = 1;
97static int mxge_flow_control = 1;
98static int mxge_verbose = 0;
99static int mxge_lro_cnt = 8;
100static int mxge_ticks;
101static int mxge_max_slices = 1;
102static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
103static int mxge_always_promisc = 0;
104static int mxge_initial_mtu = ETHERMTU_JUMBO;
105static char *mxge_fw_unaligned = "mxge_ethp_z8e";
106static char *mxge_fw_aligned = "mxge_eth_z8e";
107static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
108static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
109
110static int mxge_probe(device_t dev);
111static int mxge_attach(device_t dev);
112static int mxge_detach(device_t dev);
113static int mxge_shutdown(device_t dev);
114static void mxge_intr(void *arg);
115
116static device_method_t mxge_methods[] =
117{
118 /* Device interface */
119 DEVMETHOD(device_probe, mxge_probe),
120 DEVMETHOD(device_attach, mxge_attach),
121 DEVMETHOD(device_detach, mxge_detach),
122 DEVMETHOD(device_shutdown, mxge_shutdown),
123 {0, 0}
124};
125
126static driver_t mxge_driver =
127{
128 "mxge",
129 mxge_methods,
130 sizeof(mxge_softc_t),
131};
132
133static devclass_t mxge_devclass;
134
135/* Declare ourselves to be a child of the PCI bus.*/
136DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
137MODULE_DEPEND(mxge, firmware, 1, 1, 1);
138MODULE_DEPEND(mxge, zlib, 1, 1, 1);
139
140static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
141static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
142static int mxge_close(mxge_softc_t *sc);
143static int mxge_open(mxge_softc_t *sc);
144static void mxge_tick(void *arg);
145
146static int
147mxge_probe(device_t dev)
148{
149 int rev;
150
151
152 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
153 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
154 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
155 rev = pci_get_revid(dev);
156 switch (rev) {
157 case MXGE_PCI_REV_Z8E:
158 device_set_desc(dev, "Myri10G-PCIE-8A");
159 break;
160 case MXGE_PCI_REV_Z8ES:
161 device_set_desc(dev, "Myri10G-PCIE-8B");
162 break;
163 default:
164 device_set_desc(dev, "Myri10G-PCIE-8??");
165 device_printf(dev, "Unrecognized rev %d NIC\n",
166 rev);
167 break;
168 }
169 return 0;
170 }
171 return ENXIO;
172}
173
174static void
175mxge_enable_wc(mxge_softc_t *sc)
176{
9eb279be 177#if 0
8892ea20
AE
178#if defined(__i386) || defined(__amd64)
179 vm_offset_t len;
180 int err;
181
182 sc->wc = 1;
183 len = rman_get_size(sc->mem_res);
184 err = pmap_change_attr((vm_offset_t) sc->sram,
185 len, PAT_WRITE_COMBINING);
186 if (err != 0) {
187 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
188 err);
189 sc->wc = 0;
190 }
9eb279be
AE
191#endif
192#else
193 sc->wc = 0; /* TBD: PAT support */
194#endif
8892ea20
AE
195}
196
197
198/* callback to get our DMA address */
199static void
200mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
201 int error)
202{
203 if (error == 0) {
204 *(bus_addr_t *) arg = segs->ds_addr;
205 }
206}
207
208static int
209mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
210 bus_size_t alignment)
211{
212 int err;
213 device_t dev = sc->dev;
214 bus_size_t boundary, maxsegsize;
215
216 if (bytes > 4096 && alignment == 4096) {
217 boundary = 0;
218 maxsegsize = bytes;
219 } else {
220 boundary = 4096;
221 maxsegsize = 4096;
222 }
223
224 /* allocate DMAable memory tags */
225 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
226 alignment, /* alignment */
227 boundary, /* boundary */
228 BUS_SPACE_MAXADDR, /* low */
229 BUS_SPACE_MAXADDR, /* high */
230 NULL, NULL, /* filter */
231 bytes, /* maxsize */
232 1, /* num segs */
233 maxsegsize, /* maxsegsize */
234 BUS_DMA_COHERENT, /* flags */
8892ea20
AE
235 &dma->dmat); /* tag */
236 if (err != 0) {
237 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
238 return err;
239 }
240
241 /* allocate DMAable memory & map */
242 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
243 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
244 | BUS_DMA_ZERO), &dma->map);
245 if (err != 0) {
246 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
247 goto abort_with_dmat;
248 }
249
250 /* load the memory */
251 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
252 mxge_dmamap_callback,
253 (void *)&dma->bus_addr, 0);
254 if (err != 0) {
255 device_printf(dev, "couldn't load map (err = %d)\n", err);
256 goto abort_with_mem;
257 }
258 return 0;
259
260abort_with_mem:
261 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
262abort_with_dmat:
263 (void)bus_dma_tag_destroy(dma->dmat);
264 return err;
265}
266
267
268static void
269mxge_dma_free(mxge_dma_t *dma)
270{
271 bus_dmamap_unload(dma->dmat, dma->map);
272 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
273 (void)bus_dma_tag_destroy(dma->dmat);
274}
275
276/*
277 * The eeprom strings on the lanaiX have the format
278 * SN=x\0
279 * MAC=x:x:x:x:x:x\0
280 * PC=text\0
281 */
282
283static int
284mxge_parse_strings(mxge_softc_t *sc)
285{
286#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
287
288 char *ptr, *limit;
289 int i, found_mac;
290
291 ptr = sc->eeprom_strings;
292 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
293 found_mac = 0;
294 while (ptr < limit && *ptr != '\0') {
295 if (memcmp(ptr, "MAC=", 4) == 0) {
296 ptr += 1;
297 sc->mac_addr_string = ptr;
298 for (i = 0; i < 6; i++) {
299 ptr += 3;
300 if ((ptr + 2) > limit)
301 goto abort;
302 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
303 found_mac = 1;
304 }
305 } else if (memcmp(ptr, "PC=", 3) == 0) {
306 ptr += 3;
307 strncpy(sc->product_code_string, ptr,
308 sizeof (sc->product_code_string) - 1);
309 } else if (memcmp(ptr, "SN=", 3) == 0) {
310 ptr += 3;
311 strncpy(sc->serial_number_string, ptr,
312 sizeof (sc->serial_number_string) - 1);
313 }
314 MXGE_NEXT_STRING(ptr);
315 }
316
317 if (found_mac)
318 return 0;
319
320 abort:
321 device_printf(sc->dev, "failed to parse eeprom_strings\n");
322
323 return ENXIO;
324}
325
326#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
327static void
328mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
329{
330 uint32_t val;
331 unsigned long base, off;
332 char *va, *cfgptr;
333 device_t pdev, mcp55;
334 uint16_t vendor_id, device_id, word;
335 uintptr_t bus, slot, func, ivend, idev;
336 uint32_t *ptr32;
337
338
339 if (!mxge_nvidia_ecrc_enable)
340 return;
341
342 pdev = device_get_parent(device_get_parent(sc->dev));
343 if (pdev == NULL) {
344 device_printf(sc->dev, "could not find parent?\n");
345 return;
346 }
347 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
348 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
349
350 if (vendor_id != 0x10de)
351 return;
352
353 base = 0;
354
355 if (device_id == 0x005d) {
356 /* ck804, base address is magic */
357 base = 0xe0000000UL;
358 } else if (device_id >= 0x0374 && device_id <= 0x378) {
359 /* mcp55, base address stored in chipset */
360 mcp55 = pci_find_bsf(0, 0, 0);
361 if (mcp55 &&
362 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
363 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
364 word = pci_read_config(mcp55, 0x90, 2);
365 base = ((unsigned long)word & 0x7ffeU) << 25;
366 }
367 }
368 if (!base)
369 return;
370
371 /* XXXX
372 Test below is commented because it is believed that doing
373 config read/write beyond 0xff will access the config space
374 for the next larger function. Uncomment this and remove
375 the hacky pmap_mapdev() way of accessing config space when
376 FreeBSD grows support for extended pcie config space access
377 */
378#if 0
379 /* See if we can, by some miracle, access the extended
380 config space */
381 val = pci_read_config(pdev, 0x178, 4);
382 if (val != 0xffffffff) {
383 val |= 0x40;
384 pci_write_config(pdev, 0x178, val, 4);
385 return;
386 }
387#endif
388 /* Rather than using normal pci config space writes, we must
389 * map the Nvidia config space ourselves. This is because on
390 * opteron/nvidia class machine the 0xe000000 mapping is
391 * handled by the nvidia chipset, that means the internal PCI
392 * device (the on-chip northbridge), or the amd-8131 bridge
393 * and things behind them are not visible by this method.
394 */
395
396 BUS_READ_IVAR(device_get_parent(pdev), pdev,
397 PCI_IVAR_BUS, &bus);
398 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 PCI_IVAR_SLOT, &slot);
400 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 PCI_IVAR_FUNCTION, &func);
402 BUS_READ_IVAR(device_get_parent(pdev), pdev,
403 PCI_IVAR_VENDOR, &ivend);
404 BUS_READ_IVAR(device_get_parent(pdev), pdev,
405 PCI_IVAR_DEVICE, &idev);
406
407 off = base
408 + 0x00100000UL * (unsigned long)bus
409 + 0x00001000UL * (unsigned long)(func
410 + 8 * slot);
411
412 /* map it into the kernel */
413 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
414
415
416 if (va == NULL) {
417 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
418 return;
419 }
420 /* get a pointer to the config space mapped into the kernel */
421 cfgptr = va + (off & PAGE_MASK);
422
423 /* make sure that we can really access it */
424 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
425 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
426 if (! (vendor_id == ivend && device_id == idev)) {
427 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
428 vendor_id, device_id);
429 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
430 return;
431 }
432
433 ptr32 = (uint32_t*)(cfgptr + 0x178);
434 val = *ptr32;
435
436 if (val == 0xffffffff) {
437 device_printf(sc->dev, "extended mapping failed\n");
438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
439 return;
440 }
441 *ptr32 = val | 0x40;
442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
443 if (mxge_verbose)
444 device_printf(sc->dev,
445 "Enabled ECRC on upstream Nvidia bridge "
446 "at %d:%d:%d\n",
447 (int)bus, (int)slot, (int)func);
448 return;
449}
450#else
451static void
452mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
453{
454 device_printf(sc->dev,
455 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
456 return;
457}
458#endif
459
460
461static int
462mxge_dma_test(mxge_softc_t *sc, int test_type)
463{
464 mxge_cmd_t cmd;
465 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
466 int status;
467 uint32_t len;
468 char *test = " ";
469
470
471 /* Run a small DMA test.
472 * The magic multipliers to the length tell the firmware
473 * to do DMA read, write, or read+write tests. The
474 * results are returned in cmd.data0. The upper 16
475 * bits of the return is the number of transfers completed.
476 * The lower 16 bits is the time in 0.5us ticks that the
477 * transfers took to complete.
478 */
479
480 len = sc->tx_boundary;
481
482 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
483 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
484 cmd.data2 = len * 0x10000;
485 status = mxge_send_cmd(sc, test_type, &cmd);
486 if (status != 0) {
487 test = "read";
488 goto abort;
489 }
490 sc->read_dma = ((cmd.data0>>16) * len * 2) /
491 (cmd.data0 & 0xffff);
492 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
493 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
494 cmd.data2 = len * 0x1;
495 status = mxge_send_cmd(sc, test_type, &cmd);
496 if (status != 0) {
497 test = "write";
498 goto abort;
499 }
500 sc->write_dma = ((cmd.data0>>16) * len * 2) /
501 (cmd.data0 & 0xffff);
502
503 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
504 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
505 cmd.data2 = len * 0x10001;
506 status = mxge_send_cmd(sc, test_type, &cmd);
507 if (status != 0) {
508 test = "read/write";
509 goto abort;
510 }
511 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
512 (cmd.data0 & 0xffff);
513
514abort:
515 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
516 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
517 test, status);
518
519 return status;
520}
521
522/*
523 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
524 * when the PCI-E Completion packets are aligned on an 8-byte
525 * boundary. Some PCI-E chip sets always align Completion packets; on
526 * the ones that do not, the alignment can be enforced by enabling
527 * ECRC generation (if supported).
528 *
529 * When PCI-E Completion packets are not aligned, it is actually more
530 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
531 *
532 * If the driver can neither enable ECRC nor verify that it has
533 * already been enabled, then it must use a firmware image which works
534 * around unaligned completion packets (ethp_z8e.dat), and it should
535 * also ensure that it never gives the device a Read-DMA which is
536 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
537 * enabled, then the driver should use the aligned (eth_z8e.dat)
538 * firmware image, and set tx_boundary to 4KB.
539 */
540
541static int
542mxge_firmware_probe(mxge_softc_t *sc)
543{
544 device_t dev = sc->dev;
545 int reg, status;
546 uint16_t pectl;
547
548 sc->tx_boundary = 4096;
549 /*
550 * Verify the max read request size was set to 4KB
551 * before trying the test with 4KB.
552 */
553 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
554 pectl = pci_read_config(dev, reg + 0x8, 2);
555 if ((pectl & (5 << 12)) != (5 << 12)) {
556 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
557 pectl);
558 sc->tx_boundary = 2048;
559 }
560 }
561
562 /*
563 * load the optimized firmware (which assumes aligned PCIe
564 * completions) in order to see if it works on this host.
565 */
566 sc->fw_name = mxge_fw_aligned;
567 status = mxge_load_firmware(sc, 1);
568 if (status != 0) {
569 return status;
570 }
571
572 /*
573 * Enable ECRC if possible
574 */
575 mxge_enable_nvidia_ecrc(sc);
576
577 /*
578 * Run a DMA test which watches for unaligned completions and
579 * aborts on the first one seen.
580 */
581
582 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
583 if (status == 0)
584 return 0; /* keep the aligned firmware */
585
586 if (status != E2BIG)
587 device_printf(dev, "DMA test failed: %d\n", status);
588 if (status == ENOSYS)
589 device_printf(dev, "Falling back to ethp! "
590 "Please install up to date fw\n");
591 return status;
592}
593
594static int
595mxge_select_firmware(mxge_softc_t *sc)
596{
597 int aligned = 0;
598
599
600 if (mxge_force_firmware != 0) {
601 if (mxge_force_firmware == 1)
602 aligned = 1;
603 else
604 aligned = 0;
605 if (mxge_verbose)
606 device_printf(sc->dev,
607 "Assuming %s completions (forced)\n",
608 aligned ? "aligned" : "unaligned");
609 goto abort;
610 }
611
612 /* if the PCIe link width is 4 or less, we can use the aligned
613 firmware and skip any checks */
614 if (sc->link_width != 0 && sc->link_width <= 4) {
615 device_printf(sc->dev,
616 "PCIe x%d Link, expect reduced performance\n",
617 sc->link_width);
618 aligned = 1;
619 goto abort;
620 }
621
622 if (0 == mxge_firmware_probe(sc))
623 return 0;
624
625abort:
626 if (aligned) {
627 sc->fw_name = mxge_fw_aligned;
628 sc->tx_boundary = 4096;
629 } else {
630 sc->fw_name = mxge_fw_unaligned;
631 sc->tx_boundary = 2048;
632 }
633 return (mxge_load_firmware(sc, 0));
634}
635
636union qualhack
637{
638 const char *ro_char;
639 char *rw_char;
640};
641
642static int
643mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
644{
645
646
647 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
648 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
649 be32toh(hdr->mcp_type));
650 return EIO;
651 }
652
653 /* save firmware version for sysctl */
654 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
655 if (mxge_verbose)
656 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
657
b6670ba0 658 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
8892ea20
AE
659 &sc->fw_ver_minor, &sc->fw_ver_tiny);
660
661 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
662 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
663 device_printf(sc->dev, "Found firmware version %s\n",
664 sc->fw_version);
665 device_printf(sc->dev, "Driver needs %d.%d\n",
666 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
667 return EINVAL;
668 }
669 return 0;
670
671}
672
673static void *
674z_alloc(void *nil, u_int items, u_int size)
675{
676 void *ptr;
677
d777b84f 678 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
8892ea20
AE
679 return ptr;
680}
681
682static void
683z_free(void *nil, void *ptr)
684{
d777b84f 685 kfree(ptr, M_TEMP);
8892ea20
AE
686}
687
688
689static int
690mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
691{
e16aed9b 692 struct fw_image *fw;
8892ea20
AE
693 const mcp_gen_header_t *hdr;
694 unsigned hdr_offset;
695 int status;
696 unsigned int i;
697 char dummy;
698 size_t fw_len;
699
e16aed9b 700 fw = firmware_image_load(sc->fw_name, NULL);
8892ea20
AE
701 if (fw == NULL) {
702 device_printf(sc->dev, "Could not find firmware image %s\n",
703 sc->fw_name);
704 return ENOENT;
705 }
e16aed9b 706#if 0
8892ea20
AE
707 /* setup zlib and decompress f/w */
708 bzero(&zs, sizeof (zs));
709 zs.zalloc = z_alloc;
710 zs.zfree = z_free;
711 status = inflateInit(&zs);
712 if (status != Z_OK) {
713 status = EIO;
714 goto abort_with_fw;
715 }
716
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len = (size_t) fw->version;
d777b84f 720 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
8892ea20
AE
721 if (inflate_buffer == NULL)
722 goto abort_with_zs;
723 zs.avail_in = fw->datasize;
724 zs.next_in = __DECONST(char *, fw->data);
725 zs.avail_out = fw_len;
726 zs.next_out = inflate_buffer;
727 status = inflate(&zs, Z_FINISH);
728 if (status != Z_STREAM_END) {
729 device_printf(sc->dev, "zlib %d\n", status);
730 status = EIO;
731 goto abort_with_buffer;
732 }
e16aed9b 733#endif
8892ea20
AE
734 /* check id */
735 hdr_offset = htobe32(*(const uint32_t *)
e16aed9b 736 (fw->fw_image + MCP_HEADER_PTR_OFFSET));
8892ea20
AE
737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
738 device_printf(sc->dev, "Bad firmware file");
739 status = EIO;
e16aed9b 740 goto abort_with_fw;
8892ea20 741 }
e16aed9b 742 hdr = (const void*)(fw->fw_image + hdr_offset);
8892ea20
AE
743
744 status = mxge_validate_firmware(sc, hdr);
745 if (status != 0)
e16aed9b 746 goto abort_with_fw;
8892ea20
AE
747
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i = 0; i < fw_len; i += 256) {
750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
e16aed9b 751 fw->fw_image + i,
8892ea20
AE
752 min(256U, (unsigned)(fw_len - i)));
753 wmb();
754 dummy = *sc->sram;
755 wmb();
756 }
757
758 *limit = fw_len;
759 status = 0;
e16aed9b 760#if 0
8892ea20 761abort_with_buffer:
d777b84f 762 kfree(inflate_buffer, M_TEMP);
8892ea20
AE
763abort_with_zs:
764 inflateEnd(&zs);
e16aed9b 765#endif
8892ea20 766abort_with_fw:
e16aed9b 767 firmware_image_unload(fw);
8892ea20
AE
768 return status;
769}
770
771/*
772 * Enable or disable periodic RDMAs from the host to make certain
773 * chipsets resend dropped PCIe messages
774 */
775
776static void
777mxge_dummy_rdma(mxge_softc_t *sc, int enable)
778{
779 char buf_bytes[72];
780 volatile uint32_t *confirm;
781 volatile char *submit;
782 uint32_t *buf, dma_low, dma_high;
783 int i;
784
785 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
786
787 /* clear confirmation addr */
788 confirm = (volatile uint32_t *)sc->cmd;
789 *confirm = 0;
790 wmb();
791
792 /* send an rdma command to the PCIe engine, and wait for the
793 response in the confirmation address. The firmware should
794 write a -1 there to indicate it is alive and well
795 */
796
797 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
798 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
799 buf[0] = htobe32(dma_high); /* confirm addr MSW */
800 buf[1] = htobe32(dma_low); /* confirm addr LSW */
801 buf[2] = htobe32(0xffffffff); /* confirm data */
802 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
803 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
804 buf[3] = htobe32(dma_high); /* dummy addr MSW */
805 buf[4] = htobe32(dma_low); /* dummy addr LSW */
806 buf[5] = htobe32(enable); /* enable? */
807
808
809 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
810
811 mxge_pio_copy(submit, buf, 64);
812 wmb();
813 DELAY(1000);
814 wmb();
815 i = 0;
816 while (*confirm != 0xffffffff && i < 20) {
817 DELAY(1000);
818 i++;
819 }
820 if (*confirm != 0xffffffff) {
821 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
822 (enable ? "enable" : "disable"), confirm,
823 *confirm);
824 }
825 return;
826}
827
828static int
829mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
830{
831 mcp_cmd_t *buf;
832 char buf_bytes[sizeof(*buf) + 8];
833 volatile mcp_cmd_response_t *response = sc->cmd;
834 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
835 uint32_t dma_low, dma_high;
836 int err, sleep_total = 0;
837
838 /* ensure buf is aligned to 8 bytes */
839 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
840
841 buf->data0 = htobe32(data->data0);
842 buf->data1 = htobe32(data->data1);
843 buf->data2 = htobe32(data->data2);
844 buf->cmd = htobe32(cmd);
845 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
846 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
847
848 buf->response_addr.low = htobe32(dma_low);
849 buf->response_addr.high = htobe32(dma_high);
e8a47a7f 850 lockmgr(&sc->cmd_lock, LK_EXCLUSIVE);
8892ea20
AE
851 response->result = 0xffffffff;
852 wmb();
853 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
854
855 /* wait up to 20ms */
856 err = EAGAIN;
857 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
858 bus_dmamap_sync(sc->cmd_dma.dmat,
859 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
860 wmb();
861 switch (be32toh(response->result)) {
862 case 0:
863 data->data0 = be32toh(response->data);
864 err = 0;
865 break;
866 case 0xffffffff:
867 DELAY(1000);
868 break;
869 case MXGEFW_CMD_UNKNOWN:
870 err = ENOSYS;
871 break;
872 case MXGEFW_CMD_ERROR_UNALIGNED:
873 err = E2BIG;
874 break;
875 case MXGEFW_CMD_ERROR_BUSY:
876 err = EBUSY;
877 break;
878 default:
879 device_printf(sc->dev,
880 "mxge: command %d "
881 "failed, result = %d\n",
882 cmd, be32toh(response->result));
883 err = ENXIO;
884 break;
885 }
886 if (err != EAGAIN)
887 break;
888 }
889 if (err == EAGAIN)
890 device_printf(sc->dev, "mxge: command %d timed out"
891 "result = %d\n",
892 cmd, be32toh(response->result));
e8a47a7f 893 lockmgr(&sc->cmd_lock, LK_RELEASE);
8892ea20
AE
894 return err;
895}
896
897static int
898mxge_adopt_running_firmware(mxge_softc_t *sc)
899{
900 struct mcp_gen_header *hdr;
901 const size_t bytes = sizeof (struct mcp_gen_header);
902 size_t hdr_offset;
903 int status;
904
905 /* find running firmware header */
906 hdr_offset = htobe32(*(volatile uint32_t *)
907 (sc->sram + MCP_HEADER_PTR_OFFSET));
908
909 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
910 device_printf(sc->dev,
911 "Running firmware has bad header offset (%d)\n",
912 (int)hdr_offset);
913 return EIO;
914 }
915
916 /* copy header of running firmware from SRAM to host memory to
917 * validate firmware */
d777b84f 918 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
8892ea20 919 if (hdr == NULL) {
d777b84f 920 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
8892ea20
AE
921 return ENOMEM;
922 }
923 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
924 rman_get_bushandle(sc->mem_res),
925 hdr_offset, (char *)hdr, bytes);
926 status = mxge_validate_firmware(sc, hdr);
d777b84f 927 kfree(hdr, M_DEVBUF);
8892ea20
AE
928
929 /*
930 * check to see if adopted firmware has bug where adopting
931 * it will cause broadcasts to be filtered unless the NIC
932 * is kept in ALLMULTI mode
933 */
934 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
935 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
936 sc->adopted_rx_filter_bug = 1;
937 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
938 "working around rx filter bug\n",
939 sc->fw_ver_major, sc->fw_ver_minor,
940 sc->fw_ver_tiny);
941 }
942
943 return status;
944}
945
946
947static int
948mxge_load_firmware(mxge_softc_t *sc, int adopt)
949{
950 volatile uint32_t *confirm;
951 volatile char *submit;
952 char buf_bytes[72];
953 uint32_t *buf, size, dma_low, dma_high;
954 int status, i;
955
956 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
957
958 size = sc->sram_size;
959 status = mxge_load_firmware_helper(sc, &size);
960 if (status) {
961 if (!adopt)
962 return status;
963 /* Try to use the currently running firmware, if
964 it is new enough */
965 status = mxge_adopt_running_firmware(sc);
966 if (status) {
967 device_printf(sc->dev,
968 "failed to adopt running firmware\n");
969 return status;
970 }
971 device_printf(sc->dev,
972 "Successfully adopted running firmware\n");
973 if (sc->tx_boundary == 4096) {
974 device_printf(sc->dev,
975 "Using firmware currently running on NIC"
976 ". For optimal\n");
977 device_printf(sc->dev,
978 "performance consider loading optimized "
979 "firmware\n");
980 }
981 sc->fw_name = mxge_fw_unaligned;
982 sc->tx_boundary = 2048;
983 return 0;
984 }
985 /* clear confirmation addr */
986 confirm = (volatile uint32_t *)sc->cmd;
987 *confirm = 0;
988 wmb();
989 /* send a reload command to the bootstrap MCP, and wait for the
990 response in the confirmation address. The firmware should
991 write a -1 there to indicate it is alive and well
992 */
993
994 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
995 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
996
997 buf[0] = htobe32(dma_high); /* confirm addr MSW */
998 buf[1] = htobe32(dma_low); /* confirm addr LSW */
999 buf[2] = htobe32(0xffffffff); /* confirm data */
1000
1001 /* FIX: All newest firmware should un-protect the bottom of
1002 the sram before handoff. However, the very first interfaces
1003 do not. Therefore the handoff copy must skip the first 8 bytes
1004 */
1005 /* where the code starts*/
1006 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1007 buf[4] = htobe32(size - 8); /* length of code */
1008 buf[5] = htobe32(8); /* where to copy to */
1009 buf[6] = htobe32(0); /* where to jump to */
1010
1011 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1012 mxge_pio_copy(submit, buf, 64);
1013 wmb();
1014 DELAY(1000);
1015 wmb();
1016 i = 0;
1017 while (*confirm != 0xffffffff && i < 20) {
1018 DELAY(1000*10);
1019 i++;
1020 bus_dmamap_sync(sc->cmd_dma.dmat,
1021 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1022 }
1023 if (*confirm != 0xffffffff) {
1024 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1025 confirm, *confirm);
1026
1027 return ENXIO;
1028 }
1029 return 0;
1030}
1031
1032static int
1033mxge_update_mac_address(mxge_softc_t *sc)
1034{
1035 mxge_cmd_t cmd;
1036 uint8_t *addr = sc->mac_addr;
1037 int status;
1038
1039
1040 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1041 | (addr[2] << 8) | addr[3]);
1042
1043 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1044
1045 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1046 return status;
1047}
1048
1049static int
1050mxge_change_pause(mxge_softc_t *sc, int pause)
1051{
1052 mxge_cmd_t cmd;
1053 int status;
1054
1055 if (pause)
1056 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1057 &cmd);
1058 else
1059 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1060 &cmd);
1061
1062 if (status) {
1063 device_printf(sc->dev, "Failed to set flow control mode\n");
1064 return ENXIO;
1065 }
1066 sc->pause = pause;
1067 return 0;
1068}
1069
1070static void
1071mxge_change_promisc(mxge_softc_t *sc, int promisc)
1072{
1073 mxge_cmd_t cmd;
1074 int status;
1075
1076 if (mxge_always_promisc)
1077 promisc = 1;
1078
1079 if (promisc)
1080 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1081 &cmd);
1082 else
1083 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1084 &cmd);
1085
1086 if (status) {
1087 device_printf(sc->dev, "Failed to set promisc mode\n");
1088 }
1089}
1090
1091static void
1092mxge_set_multicast_list(mxge_softc_t *sc)
1093{
1094 mxge_cmd_t cmd;
1095 struct ifmultiaddr *ifma;
1096 struct ifnet *ifp = sc->ifp;
1097 int err;
1098
1099 /* This firmware is known to not support multicast */
1100 if (!sc->fw_multicast_support)
1101 return;
1102
1103 /* Disable multicast filtering while we play with the lists*/
1104 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1105 if (err != 0) {
1106 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1107 " error status: %d\n", err);
1108 return;
1109 }
1110
1111 if (sc->adopted_rx_filter_bug)
1112 return;
1113
1114 if (ifp->if_flags & IFF_ALLMULTI)
1115 /* request to disable multicast filtering, so quit here */
1116 return;
1117
1118 /* Flush all the filters */
1119
1120 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1121 if (err != 0) {
1122 device_printf(sc->dev,
1123 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1124 ", error status: %d\n", err);
1125 return;
1126 }
1127
1128 /* Walk the multicast list, and add each address */
1129
1130 if_maddr_rlock(ifp);
b915556e 1131 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
8892ea20
AE
1132 if (ifma->ifma_addr->sa_family != AF_LINK)
1133 continue;
1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1135 &cmd.data0, 4);
1136 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1137 &cmd.data1, 2);
1138 cmd.data0 = htonl(cmd.data0);
1139 cmd.data1 = htonl(cmd.data1);
1140 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1141 if (err != 0) {
1142 device_printf(sc->dev, "Failed "
1143 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1144 "%d\t", err);
1145 /* abort, leaving multicast filtering off */
1146 if_maddr_runlock(ifp);
1147 return;
1148 }
1149 }
1150 if_maddr_runlock(ifp);
1151 /* Enable multicast filtering */
1152 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1153 if (err != 0) {
1154 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1155 ", error status: %d\n", err);
1156 }
1157}
1158
1159static int
1160mxge_max_mtu(mxge_softc_t *sc)
1161{
1162 mxge_cmd_t cmd;
1163 int status;
1164
1165 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1166 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1167
1168 /* try to set nbufs to see if it we can
1169 use virtually contiguous jumbos */
1170 cmd.data0 = 0;
1171 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1172 &cmd);
1173 if (status == 0)
1174 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1175
1176 /* otherwise, we're limited to MJUMPAGESIZE */
1177 return MJUMPAGESIZE - MXGEFW_PAD;
1178}
1179
1180static int
1181mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1182{
1183 struct mxge_slice_state *ss;
1184 mxge_rx_done_t *rx_done;
1185 volatile uint32_t *irq_claim;
1186 mxge_cmd_t cmd;
1187 int slice, status;
1188
1189 /* try to send a reset command to the card to see if it
1190 is alive */
1191 memset(&cmd, 0, sizeof (cmd));
1192 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1193 if (status != 0) {
1194 device_printf(sc->dev, "failed reset\n");
1195 return ENXIO;
1196 }
1197
1198 mxge_dummy_rdma(sc, 1);
1199
1200
1201 /* set the intrq size */
1202 cmd.data0 = sc->rx_ring_size;
1203 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1204
1205 /*
1206 * Even though we already know how many slices are supported
1207 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1208 * has magic side effects, and must be called after a reset.
1209 * It must be called prior to calling any RSS related cmds,
1210 * including assigning an interrupt queue for anything but
1211 * slice 0. It must also be called *after*
1212 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1213 * the firmware to compute offsets.
1214 */
1215
1216 if (sc->num_slices > 1) {
1217 /* ask the maximum number of slices it supports */
1218 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1219 &cmd);
1220 if (status != 0) {
1221 device_printf(sc->dev,
1222 "failed to get number of slices\n");
1223 return status;
1224 }
1225 /*
1226 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1227 * to setting up the interrupt queue DMA
1228 */
1229 cmd.data0 = sc->num_slices;
1230 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1231#ifdef IFNET_BUF_RING
1232 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1233#endif
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1235 &cmd);
1236 if (status != 0) {
1237 device_printf(sc->dev,
1238 "failed to set number of slices\n");
1239 return status;
1240 }
1241 }
1242
1243
1244 if (interrupts_setup) {
1245 /* Now exchange information about interrupts */
1246 for (slice = 0; slice < sc->num_slices; slice++) {
1247 rx_done = &sc->ss[slice].rx_done;
1248 memset(rx_done->entry, 0, sc->rx_ring_size);
1249 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1250 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1251 cmd.data2 = slice;
1252 status |= mxge_send_cmd(sc,
1253 MXGEFW_CMD_SET_INTRQ_DMA,
1254 &cmd);
1255 }
1256 }
1257
1258 status |= mxge_send_cmd(sc,
1259 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1260
1261
1262 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1263
1264 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1265 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1266
1267
1268 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1269 &cmd);
1270 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1271 if (status != 0) {
1272 device_printf(sc->dev, "failed set interrupt parameters\n");
1273 return status;
1274 }
1275
1276
1277 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1278
1279
1280 /* run a DMA benchmark */
1281 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1282
1283 for (slice = 0; slice < sc->num_slices; slice++) {
1284 ss = &sc->ss[slice];
1285
1286 ss->irq_claim = irq_claim + (2 * slice);
1287 /* reset mcp/driver shared state back to 0 */
1288 ss->rx_done.idx = 0;
1289 ss->rx_done.cnt = 0;
1290 ss->tx.req = 0;
1291 ss->tx.done = 0;
1292 ss->tx.pkt_done = 0;
1293 ss->tx.queue_active = 0;
1294 ss->tx.activate = 0;
1295 ss->tx.deactivate = 0;
1296 ss->tx.wake = 0;
1297 ss->tx.defrag = 0;
1298 ss->tx.stall = 0;
1299 ss->rx_big.cnt = 0;
1300 ss->rx_small.cnt = 0;
1301 ss->lro_bad_csum = 0;
1302 ss->lro_queued = 0;
1303 ss->lro_flushed = 0;
1304 if (ss->fw_stats != NULL) {
1305 ss->fw_stats->valid = 0;
1306 ss->fw_stats->send_done_count = 0;
1307 }
1308 }
1309 sc->rdma_tags_available = 15;
1310 status = mxge_update_mac_address(sc);
1311 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1312 mxge_change_pause(sc, sc->pause);
1313 mxge_set_multicast_list(sc);
1314 return status;
1315}
1316
1317static int
1318mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1319{
1320 mxge_softc_t *sc;
1321 unsigned int intr_coal_delay;
1322 int err;
1323
1324 sc = arg1;
1325 intr_coal_delay = sc->intr_coal_delay;
1326 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1327 if (err != 0) {
1328 return err;
1329 }
1330 if (intr_coal_delay == sc->intr_coal_delay)
1331 return 0;
1332
1333 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1334 return EINVAL;
1335
e8a47a7f 1336 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
1337 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1338 sc->intr_coal_delay = intr_coal_delay;
1339
e8a47a7f 1340 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1341 return err;
1342}
1343
1344static int
1345mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1346{
1347 mxge_softc_t *sc;
1348 unsigned int enabled;
1349 int err;
1350
1351 sc = arg1;
1352 enabled = sc->pause;
1353 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1354 if (err != 0) {
1355 return err;
1356 }
1357 if (enabled == sc->pause)
1358 return 0;
1359
e8a47a7f 1360 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 1361 err = mxge_change_pause(sc, enabled);
e8a47a7f 1362 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1363 return err;
1364}
1365
1366static int
1367mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1368{
1369 struct ifnet *ifp;
1370 int err = 0;
1371
1372 ifp = sc->ifp;
1373 if (lro_cnt == 0)
1374 ifp->if_capenable &= ~IFCAP_LRO;
1375 else
1376 ifp->if_capenable |= IFCAP_LRO;
1377 sc->lro_cnt = lro_cnt;
2ab1b8a9 1378 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
1379 mxge_close(sc);
1380 err = mxge_open(sc);
1381 }
1382 return err;
1383}
1384
1385static int
1386mxge_change_lro(SYSCTL_HANDLER_ARGS)
1387{
1388 mxge_softc_t *sc;
1389 unsigned int lro_cnt;
1390 int err;
1391
1392 sc = arg1;
1393 lro_cnt = sc->lro_cnt;
1394 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1395 if (err != 0)
1396 return err;
1397
1398 if (lro_cnt == sc->lro_cnt)
1399 return 0;
1400
1401 if (lro_cnt > 128)
1402 return EINVAL;
1403
e8a47a7f 1404 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 1405 err = mxge_change_lro_locked(sc, lro_cnt);
e8a47a7f 1406 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
1407 return err;
1408}
1409
1410static int
1411mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1412{
1413 int err;
1414
1415 if (arg1 == NULL)
1416 return EFAULT;
1417 arg2 = be32toh(*(int *)arg1);
1418 arg1 = NULL;
1419 err = sysctl_handle_int(oidp, arg1, arg2, req);
1420
1421 return err;
1422}
1423
1424static void
1425mxge_rem_sysctls(mxge_softc_t *sc)
1426{
1427 struct mxge_slice_state *ss;
1428 int slice;
1429
1430 if (sc->slice_sysctl_tree == NULL)
1431 return;
1432
1433 for (slice = 0; slice < sc->num_slices; slice++) {
1434 ss = &sc->ss[slice];
1435 if (ss == NULL || ss->sysctl_tree == NULL)
1436 continue;
1437 sysctl_ctx_free(&ss->sysctl_ctx);
1438 ss->sysctl_tree = NULL;
1439 }
1440 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1441 sc->slice_sysctl_tree = NULL;
1442}
1443
1444static void
1445mxge_add_sysctls(mxge_softc_t *sc)
1446{
1447 struct sysctl_ctx_list *ctx;
1448 struct sysctl_oid_list *children;
1449 mcp_irq_data_t *fw;
1450 struct mxge_slice_state *ss;
1451 int slice;
1452 char slice_num[8];
1453
b6737651
AE
1454 ctx = &sc->sysctl_ctx;
1455 sysctl_ctx_init(ctx);
1456 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1457 OID_AUTO,
1458 device_get_nameunit(sc->dev),
1459 CTLFLAG_RD, 0, "");
1460 if (sc->sysctl_tree == NULL) {
1461 device_printf(sc->dev, "can't add sysctl node\n");
1462 return;
1463 }
1464
1465 children = SYSCTL_CHILDREN(sc->sysctl_tree);
8892ea20
AE
1466 fw = sc->ss[0].fw_stats;
1467
1468 /* random information */
1469 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1470 "firmware_version",
1471 CTLFLAG_RD, &sc->fw_version,
1472 0, "firmware version");
1473 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1474 "serial_number",
1475 CTLFLAG_RD, &sc->serial_number_string,
1476 0, "serial number");
1477 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1478 "product_code",
1479 CTLFLAG_RD, &sc->product_code_string,
1480 0, "product_code");
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1482 "pcie_link_width",
1483 CTLFLAG_RD, &sc->link_width,
1484 0, "tx_boundary");
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1486 "tx_boundary",
1487 CTLFLAG_RD, &sc->tx_boundary,
1488 0, "tx_boundary");
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1490 "write_combine",
1491 CTLFLAG_RD, &sc->wc,
1492 0, "write combining PIO?");
1493 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1494 "read_dma_MBs",
1495 CTLFLAG_RD, &sc->read_dma,
1496 0, "DMA Read speed in MB/s");
1497 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1498 "write_dma_MBs",
1499 CTLFLAG_RD, &sc->write_dma,
1500 0, "DMA Write speed in MB/s");
1501 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1502 "read_write_dma_MBs",
1503 CTLFLAG_RD, &sc->read_write_dma,
1504 0, "DMA concurrent Read/Write speed in MB/s");
1505
1506
1507 /* performance related tunables */
1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1509 "intr_coal_delay",
1510 CTLTYPE_INT|CTLFLAG_RW, sc,
1511 0, mxge_change_intr_coal,
1512 "I", "interrupt coalescing delay in usecs");
1513
1514 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1515 "flow_control_enabled",
1516 CTLTYPE_INT|CTLFLAG_RW, sc,
1517 0, mxge_change_flow_control,
1518 "I", "interrupt coalescing delay in usecs");
1519
1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1521 "deassert_wait",
1522 CTLFLAG_RW, &mxge_deassert_wait,
1523 0, "Wait for IRQ line to go low in ihandler");
1524
1525 /* stats block from firmware is in network byte order.
1526 Need to swap it */
1527 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1528 "link_up",
1529 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1530 0, mxge_handle_be32,
1531 "I", "link up");
1532 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1533 "rdma_tags_available",
1534 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1535 0, mxge_handle_be32,
1536 "I", "rdma_tags_available");
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1538 "dropped_bad_crc32",
1539 CTLTYPE_INT|CTLFLAG_RD,
1540 &fw->dropped_bad_crc32,
1541 0, mxge_handle_be32,
1542 "I", "dropped_bad_crc32");
1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1544 "dropped_bad_phy",
1545 CTLTYPE_INT|CTLFLAG_RD,
1546 &fw->dropped_bad_phy,
1547 0, mxge_handle_be32,
1548 "I", "dropped_bad_phy");
1549 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1550 "dropped_link_error_or_filtered",
1551 CTLTYPE_INT|CTLFLAG_RD,
1552 &fw->dropped_link_error_or_filtered,
1553 0, mxge_handle_be32,
1554 "I", "dropped_link_error_or_filtered");
1555 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1556 "dropped_link_overflow",
1557 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1558 0, mxge_handle_be32,
1559 "I", "dropped_link_overflow");
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "dropped_multicast_filtered",
1562 CTLTYPE_INT|CTLFLAG_RD,
1563 &fw->dropped_multicast_filtered,
1564 0, mxge_handle_be32,
1565 "I", "dropped_multicast_filtered");
1566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1567 "dropped_no_big_buffer",
1568 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1569 0, mxge_handle_be32,
1570 "I", "dropped_no_big_buffer");
1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1572 "dropped_no_small_buffer",
1573 CTLTYPE_INT|CTLFLAG_RD,
1574 &fw->dropped_no_small_buffer,
1575 0, mxge_handle_be32,
1576 "I", "dropped_no_small_buffer");
1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1578 "dropped_overrun",
1579 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1580 0, mxge_handle_be32,
1581 "I", "dropped_overrun");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_pause",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_pause,
1586 0, mxge_handle_be32,
1587 "I", "dropped_pause");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_runt",
1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1591 0, mxge_handle_be32,
1592 "I", "dropped_runt");
1593
1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1595 "dropped_unicast_filtered",
1596 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1597 0, mxge_handle_be32,
1598 "I", "dropped_unicast_filtered");
1599
1600 /* verbose printing? */
1601 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1602 "verbose",
1603 CTLFLAG_RW, &mxge_verbose,
1604 0, "verbose printing");
1605
1606 /* lro */
1607 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1608 "lro_cnt",
1609 CTLTYPE_INT|CTLFLAG_RW, sc,
1610 0, mxge_change_lro,
1611 "I", "number of lro merge queues");
1612
1613
1614 /* add counters exported for debugging from all slices */
1615 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1616 sc->slice_sysctl_tree =
1617 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1618 "slice", CTLFLAG_RD, 0, "");
1619
1620 for (slice = 0; slice < sc->num_slices; slice++) {
1621 ss = &sc->ss[slice];
1622 sysctl_ctx_init(&ss->sysctl_ctx);
1623 ctx = &ss->sysctl_ctx;
1624 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
b6737651 1625 ksprintf(slice_num, "%d", slice);
8892ea20
AE
1626 ss->sysctl_tree =
1627 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1628 CTLFLAG_RD, 0, "");
1629 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 "rx_small_cnt",
1632 CTLFLAG_RD, &ss->rx_small.cnt,
1633 0, "rx_small_cnt");
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 "rx_big_cnt",
1636 CTLFLAG_RD, &ss->rx_big.cnt,
1637 0, "rx_small_cnt");
1638 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1639 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1640 0, "number of lro merge queues flushed");
1641
1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1643 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1644 0, "number of frames appended to lro merge"
1645 "queues");
1646
1647#ifndef IFNET_BUF_RING
1648 /* only transmit from slice 0 for now */
1649 if (slice > 0)
1650 continue;
1651#endif
1652 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1653 "tx_req",
1654 CTLFLAG_RD, &ss->tx.req,
1655 0, "tx_req");
1656
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_done",
1659 CTLFLAG_RD, &ss->tx.done,
1660 0, "tx_done");
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1662 "tx_pkt_done",
1663 CTLFLAG_RD, &ss->tx.pkt_done,
1664 0, "tx_done");
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 "tx_stall",
1667 CTLFLAG_RD, &ss->tx.stall,
1668 0, "tx_stall");
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 "tx_wake",
1671 CTLFLAG_RD, &ss->tx.wake,
1672 0, "tx_wake");
1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 "tx_defrag",
1675 CTLFLAG_RD, &ss->tx.defrag,
1676 0, "tx_defrag");
1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 "tx_queue_active",
1679 CTLFLAG_RD, &ss->tx.queue_active,
1680 0, "tx_queue_active");
1681 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1682 "tx_activate",
1683 CTLFLAG_RD, &ss->tx.activate,
1684 0, "tx_activate");
1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1686 "tx_deactivate",
1687 CTLFLAG_RD, &ss->tx.deactivate,
1688 0, "tx_deactivate");
1689 }
1690}
1691
1692/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1693 backwards one at a time and handle ring wraps */
1694
1695static inline void
1696mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1697 mcp_kreq_ether_send_t *src, int cnt)
1698{
1699 int idx, starting_slot;
1700 starting_slot = tx->req;
1701 while (cnt > 1) {
1702 cnt--;
1703 idx = (starting_slot + cnt) & tx->mask;
1704 mxge_pio_copy(&tx->lanai[idx],
1705 &src[cnt], sizeof(*src));
1706 wmb();
1707 }
1708}
1709
1710/*
1711 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1712 * at most 32 bytes at a time, so as to avoid involving the software
1713 * pio handler in the nic. We re-write the first segment's flags
1714 * to mark them valid only after writing the entire chain
1715 */
1716
1717static inline void
1718mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1719 int cnt)
1720{
1721 int idx, i;
1722 uint32_t *src_ints;
1723 volatile uint32_t *dst_ints;
1724 mcp_kreq_ether_send_t *srcp;
1725 volatile mcp_kreq_ether_send_t *dstp, *dst;
1726 uint8_t last_flags;
1727
1728 idx = tx->req & tx->mask;
1729
1730 last_flags = src->flags;
1731 src->flags = 0;
1732 wmb();
1733 dst = dstp = &tx->lanai[idx];
1734 srcp = src;
1735
1736 if ((idx + cnt) < tx->mask) {
1737 for (i = 0; i < (cnt - 1); i += 2) {
1738 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1739 wmb(); /* force write every 32 bytes */
1740 srcp += 2;
1741 dstp += 2;
1742 }
1743 } else {
1744 /* submit all but the first request, and ensure
1745 that it is submitted below */
1746 mxge_submit_req_backwards(tx, src, cnt);
1747 i = 0;
1748 }
1749 if (i < cnt) {
1750 /* submit the first request */
1751 mxge_pio_copy(dstp, srcp, sizeof(*src));
1752 wmb(); /* barrier before setting valid flag */
1753 }
1754
1755 /* re-write the last 32-bits with the valid flags */
1756 src->flags = last_flags;
1757 src_ints = (uint32_t *)src;
1758 src_ints+=3;
1759 dst_ints = (volatile uint32_t *)dst;
1760 dst_ints+=3;
1761 *dst_ints = *src_ints;
1762 tx->req += cnt;
1763 wmb();
1764}
1765
1766#if IFCAP_TSO4
1767
1768static void
1769mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1770 int busdma_seg_cnt, int ip_off)
1771{
1772 mxge_tx_ring_t *tx;
1773 mcp_kreq_ether_send_t *req;
1774 bus_dma_segment_t *seg;
1775 struct ip *ip;
1776 struct tcphdr *tcp;
1777 uint32_t low, high_swapped;
1778 int len, seglen, cum_len, cum_len_next;
1779 int next_is_first, chop, cnt, rdma_count, small;
1780 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1781 uint8_t flags, flags_next;
1782 static int once;
1783
1784 mss = m->m_pkthdr.tso_segsz;
1785
1786 /* negative cum_len signifies to the
1787 * send loop that we are still in the
1788 * header portion of the TSO packet.
1789 */
1790
1791 /* ensure we have the ethernet, IP and TCP
1792 header together in the first mbuf, copy
1793 it to a scratch buffer if not */
1794 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1795 m_copydata(m, 0, ip_off + sizeof (*ip),
1796 ss->scratch);
1797 ip = (struct ip *)(ss->scratch + ip_off);
1798 } else {
1799 ip = (struct ip *)(mtod(m, char *) + ip_off);
1800 }
1801 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1802 + sizeof (*tcp))) {
1803 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1804 + sizeof (*tcp), ss->scratch);
1805 ip = (struct ip *)(mtod(m, char *) + ip_off);
1806 }
1807
1808 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1809 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1810
1811 /* TSO implies checksum offload on this hardware */
1812 cksum_offset = ip_off + (ip->ip_hl << 2);
1813 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1814
1815
1816 /* for TSO, pseudo_hdr_offset holds mss.
1817 * The firmware figures out where to put
1818 * the checksum by parsing the header. */
1819 pseudo_hdr_offset = htobe16(mss);
1820
1821 tx = &ss->tx;
1822 req = tx->req_list;
1823 seg = tx->seg_list;
1824 cnt = 0;
1825 rdma_count = 0;
1826 /* "rdma_count" is the number of RDMAs belonging to the
1827 * current packet BEFORE the current send request. For
1828 * non-TSO packets, this is equal to "count".
1829 * For TSO packets, rdma_count needs to be reset
1830 * to 0 after a segment cut.
1831 *
1832 * The rdma_count field of the send request is
1833 * the number of RDMAs of the packet starting at
1834 * that request. For TSO send requests with one ore more cuts
1835 * in the middle, this is the number of RDMAs starting
1836 * after the last cut in the request. All previous
1837 * segments before the last cut implicitly have 1 RDMA.
1838 *
1839 * Since the number of RDMAs is not known beforehand,
1840 * it must be filled-in retroactively - after each
1841 * segmentation cut or at the end of the entire packet.
1842 */
1843
1844 while (busdma_seg_cnt) {
1845 /* Break the busdma segment up into pieces*/
1846 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1847 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1848 len = seg->ds_len;
1849
1850 while (len) {
1851 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1852 seglen = len;
1853 cum_len_next = cum_len + seglen;
1854 (req-rdma_count)->rdma_count = rdma_count + 1;
1855 if (__predict_true(cum_len >= 0)) {
1856 /* payload */
1857 chop = (cum_len_next > mss);
1858 cum_len_next = cum_len_next % mss;
1859 next_is_first = (cum_len_next == 0);
1860 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1861 flags_next |= next_is_first *
1862 MXGEFW_FLAGS_FIRST;
1863 rdma_count |= -(chop | next_is_first);
1864 rdma_count += chop & !next_is_first;
1865 } else if (cum_len_next >= 0) {
1866 /* header ends */
1867 rdma_count = -1;
1868 cum_len_next = 0;
1869 seglen = -cum_len;
1870 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1871 flags_next = MXGEFW_FLAGS_TSO_PLD |
1872 MXGEFW_FLAGS_FIRST |
1873 (small * MXGEFW_FLAGS_SMALL);
1874 }
1875
1876 req->addr_high = high_swapped;
1877 req->addr_low = htobe32(low);
1878 req->pseudo_hdr_offset = pseudo_hdr_offset;
1879 req->pad = 0;
1880 req->rdma_count = 1;
1881 req->length = htobe16(seglen);
1882 req->cksum_offset = cksum_offset;
1883 req->flags = flags | ((cum_len & 1) *
1884 MXGEFW_FLAGS_ALIGN_ODD);
1885 low += seglen;
1886 len -= seglen;
1887 cum_len = cum_len_next;
1888 flags = flags_next;
1889 req++;
1890 cnt++;
1891 rdma_count++;
1892 if (__predict_false(cksum_offset > seglen))
1893 cksum_offset -= seglen;
1894 else
1895 cksum_offset = 0;
1896 if (__predict_false(cnt > tx->max_desc))
1897 goto drop;
1898 }
1899 busdma_seg_cnt--;
1900 seg++;
1901 }
1902 (req-rdma_count)->rdma_count = rdma_count;
1903
1904 do {
1905 req--;
1906 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1907 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1908
1909 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1910 mxge_submit_req(tx, tx->req_list, cnt);
1911#ifdef IFNET_BUF_RING
1912 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1913 /* tell the NIC to start polling this slice */
1914 *tx->send_go = 1;
1915 tx->queue_active = 1;
1916 tx->activate++;
1917 wmb();
1918 }
1919#endif
1920 return;
1921
1922drop:
1923 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1924 m_freem(m);
1925 ss->oerrors++;
1926 if (!once) {
6c348da6
AE
1927 kprintf("tx->max_desc exceeded via TSO!\n");
1928 kprintf("mss = %d, %ld, %d!\n", mss,
8892ea20
AE
1929 (long)seg - (long)tx->seg_list, tx->max_desc);
1930 once = 1;
1931 }
1932 return;
1933
1934}
1935
1936#endif /* IFCAP_TSO4 */
1937
1938#ifdef MXGE_NEW_VLAN_API
1939/*
1940 * We reproduce the software vlan tag insertion from
1941 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1942 * vlan tag insertion. We need to advertise this in order to have the
1943 * vlan interface respect our csum offload flags.
1944 */
1945static struct mbuf *
1946mxge_vlan_tag_insert(struct mbuf *m)
1947{
1948 struct ether_vlan_header *evl;
1949
b915556e 1950 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT);
8892ea20
AE
1951 if (__predict_false(m == NULL))
1952 return NULL;
1953 if (m->m_len < sizeof(*evl)) {
1954 m = m_pullup(m, sizeof(*evl));
1955 if (__predict_false(m == NULL))
1956 return NULL;
1957 }
1958 /*
1959 * Transform the Ethernet header into an Ethernet header
1960 * with 802.1Q encapsulation.
1961 */
1962 evl = mtod(m, struct ether_vlan_header *);
b915556e 1963 bcopy((char *)evl + EVL_ENCAPLEN,
8892ea20
AE
1964 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1965 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
b915556e 1966 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
8892ea20
AE
1967 m->m_flags &= ~M_VLANTAG;
1968 return m;
1969}
1970#endif /* MXGE_NEW_VLAN_API */
1971
1972static void
1973mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1974{
1975 mxge_softc_t *sc;
1976 mcp_kreq_ether_send_t *req;
1977 bus_dma_segment_t *seg;
1978 struct mbuf *m_tmp;
1979 struct ifnet *ifp;
1980 mxge_tx_ring_t *tx;
1981 struct ip *ip;
1982 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1983 uint16_t pseudo_hdr_offset;
1984 uint8_t flags, cksum_offset;
1985
1986
1987 sc = ss->sc;
1988 ifp = sc->ifp;
1989 tx = &ss->tx;
1990
1991 ip_off = sizeof (struct ether_header);
1992#ifdef MXGE_NEW_VLAN_API
1993 if (m->m_flags & M_VLANTAG) {
1994 m = mxge_vlan_tag_insert(m);
1995 if (__predict_false(m == NULL))
1996 goto drop;
b915556e 1997 ip_off += EVL_ENCAPLEN;
8892ea20
AE
1998 }
1999#endif
2000 /* (try to) map the frame for DMA */
2001 idx = tx->req & tx->mask;
7d8771d4
AE
2002 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map,
2003 m, tx->seg_list, 1, &cnt,
2004 BUS_DMA_NOWAIT);
8892ea20
AE
2005 if (__predict_false(err == EFBIG)) {
2006 /* Too many segments in the chain. Try
2007 to defrag */
2008 m_tmp = m_defrag(m, M_NOWAIT);
2009 if (m_tmp == NULL) {
2010 goto drop;
2011 }
2012 ss->tx.defrag++;
2013 m = m_tmp;
7d8771d4 2014 err = bus_dmamap_load_mbuf_segment(tx->dmat,
8892ea20 2015 tx->info[idx].map,
7d8771d4 2016 m, tx->seg_list, 1, &cnt,
8892ea20
AE
2017 BUS_DMA_NOWAIT);
2018 }
2019 if (__predict_false(err != 0)) {
7d8771d4 2020 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d"
8892ea20
AE
2021 " packet len = %d\n", err, m->m_pkthdr.len);
2022 goto drop;
2023 }
2024 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2025 BUS_DMASYNC_PREWRITE);
2026 tx->info[idx].m = m;
2027
2028#if IFCAP_TSO4
2029 /* TSO is different enough, we handle it in another routine */
2030 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2031 mxge_encap_tso(ss, m, cnt, ip_off);
2032 return;
2033 }
2034#endif
2035
2036 req = tx->req_list;
2037 cksum_offset = 0;
2038 pseudo_hdr_offset = 0;
2039 flags = MXGEFW_FLAGS_NO_TSO;
2040
2041 /* checksum offloading? */
2042 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2043 /* ensure ip header is in first mbuf, copy
2044 it to a scratch buffer if not */
2045 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2046 m_copydata(m, 0, ip_off + sizeof (*ip),
2047 ss->scratch);
2048 ip = (struct ip *)(ss->scratch + ip_off);
2049 } else {
2050 ip = (struct ip *)(mtod(m, char *) + ip_off);
2051 }
2052 cksum_offset = ip_off + (ip->ip_hl << 2);
2053 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2054 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2055 req->cksum_offset = cksum_offset;
2056 flags |= MXGEFW_FLAGS_CKSUM;
2057 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2058 } else {
2059 odd_flag = 0;
2060 }
2061 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2062 flags |= MXGEFW_FLAGS_SMALL;
2063
2064 /* convert segments into a request list */
2065 cum_len = 0;
2066 seg = tx->seg_list;
2067 req->flags = MXGEFW_FLAGS_FIRST;
2068 for (i = 0; i < cnt; i++) {
2069 req->addr_low =
2070 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2071 req->addr_high =
2072 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2073 req->length = htobe16(seg->ds_len);
2074 req->cksum_offset = cksum_offset;
2075 if (cksum_offset > seg->ds_len)
2076 cksum_offset -= seg->ds_len;
2077 else
2078 cksum_offset = 0;
2079 req->pseudo_hdr_offset = pseudo_hdr_offset;
2080 req->pad = 0; /* complete solid 16-byte block */
2081 req->rdma_count = 1;
2082 req->flags |= flags | ((cum_len & 1) * odd_flag);
2083 cum_len += seg->ds_len;
2084 seg++;
2085 req++;
2086 req->flags = 0;
2087 }
2088 req--;
2089 /* pad runts to 60 bytes */
2090 if (cum_len < 60) {
2091 req++;
2092 req->addr_low =
2093 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2094 req->addr_high =
2095 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2096 req->length = htobe16(60 - cum_len);
2097 req->cksum_offset = 0;
2098 req->pseudo_hdr_offset = pseudo_hdr_offset;
2099 req->pad = 0; /* complete solid 16-byte block */
2100 req->rdma_count = 1;
2101 req->flags |= flags | ((cum_len & 1) * odd_flag);
2102 cnt++;
2103 }
2104
2105 tx->req_list[0].rdma_count = cnt;
2106#if 0
2107 /* print what the firmware will see */
2108 for (i = 0; i < cnt; i++) {
6c348da6 2109 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
8892ea20
AE
2110 "cso:%d, flags:0x%x, rdma:%d\n",
2111 i, (int)ntohl(tx->req_list[i].addr_high),
2112 (int)ntohl(tx->req_list[i].addr_low),
2113 (int)ntohs(tx->req_list[i].length),
2114 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2115 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2116 tx->req_list[i].rdma_count);
2117 }
6c348da6 2118 kprintf("--------------\n");
8892ea20
AE
2119#endif
2120 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2121 mxge_submit_req(tx, tx->req_list, cnt);
2122#ifdef IFNET_BUF_RING
2123 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2124 /* tell the NIC to start polling this slice */
2125 *tx->send_go = 1;
2126 tx->queue_active = 1;
2127 tx->activate++;
2128 wmb();
2129 }
2130#endif
2131 return;
2132
2133drop:
2134 m_freem(m);
2135 ss->oerrors++;
2136 return;
2137}
2138
2139#ifdef IFNET_BUF_RING
2140static void
2141mxge_qflush(struct ifnet *ifp)
2142{
2143 mxge_softc_t *sc = ifp->if_softc;
2144 mxge_tx_ring_t *tx;
2145 struct mbuf *m;
2146 int slice;
2147
2148 for (slice = 0; slice < sc->num_slices; slice++) {
2149 tx = &sc->ss[slice].tx;
e8a47a7f 2150 lockmgr(&tx->lock, LK_EXCLUSIVE);
8892ea20
AE
2151 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2152 m_freem(m);
e8a47a7f 2153 lockmgr(&tx->lock, LK_RELEASE);
8892ea20
AE
2154 }
2155 if_qflush(ifp);
2156}
2157
2158static inline void
2159mxge_start_locked(struct mxge_slice_state *ss)
2160{
2161 mxge_softc_t *sc;
2162 struct mbuf *m;
2163 struct ifnet *ifp;
2164 mxge_tx_ring_t *tx;
2165
2166 sc = ss->sc;
2167 ifp = sc->ifp;
2168 tx = &ss->tx;
2169
2170 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2171 m = drbr_dequeue(ifp, tx->br);
2172 if (m == NULL) {
2173 return;
2174 }
2175 /* let BPF see it */
2176 BPF_MTAP(ifp, m);
2177
2178 /* give it to the nic */
2179 mxge_encap(ss, m);
2180 }
2181 /* ran out of transmit slots */
2ab1b8a9 2182 if (((ss->if_flags & IFF_OACTIVE) == 0)
8892ea20 2183 && (!drbr_empty(ifp, tx->br))) {
2ab1b8a9 2184 ss->if_flags |= IFF_OACTIVE;
8892ea20
AE
2185 tx->stall++;
2186 }
2187}
2188
2189static int
2190mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2191{
2192 mxge_softc_t *sc;
2193 struct ifnet *ifp;
2194 mxge_tx_ring_t *tx;
2195 int err;
2196
2197 sc = ss->sc;
2198 ifp = sc->ifp;
2199 tx = &ss->tx;
2200
2ab1b8a9
AE
2201 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
2202 IFF_RUNNING) {
8892ea20
AE
2203 err = drbr_enqueue(ifp, tx->br, m);
2204 return (err);
2205 }
2206
2207 if (drbr_empty(ifp, tx->br) &&
2208 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2209 /* let BPF see it */
2210 BPF_MTAP(ifp, m);
2211 /* give it to the nic */
2212 mxge_encap(ss, m);
2213 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2214 return (err);
2215 }
2216 if (!drbr_empty(ifp, tx->br))
2217 mxge_start_locked(ss);
2218 return (0);
2219}
2220
2221static int
2222mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2223{
2224 mxge_softc_t *sc = ifp->if_softc;
2225 struct mxge_slice_state *ss;
2226 mxge_tx_ring_t *tx;
2227 int err = 0;
2228 int slice;
2229
deef6e3e 2230#if 0
8892ea20 2231 slice = m->m_pkthdr.flowid;
deef6e3e 2232#endif
8892ea20
AE
2233 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2234
2235 ss = &sc->ss[slice];
2236 tx = &ss->tx;
2237
e8a47a7f 2238 if (lockmgr(&tx->lock, LK_EXCLUSIVE|LK_NOWAIT)) {
8892ea20 2239 err = mxge_transmit_locked(ss, m);
e8a47a7f 2240 lockmgr(&tx->lock, LK_RELEASE);
8892ea20
AE
2241 } else {
2242 err = drbr_enqueue(ifp, tx->br, m);
2243 }
2244
2245 return (err);
2246}
2247
2248#else
2249
2250static inline void
2251mxge_start_locked(struct mxge_slice_state *ss)
2252{
2253 mxge_softc_t *sc;
2254 struct mbuf *m;
2255 struct ifnet *ifp;
2256 mxge_tx_ring_t *tx;
2257
2258 sc = ss->sc;
2259 ifp = sc->ifp;
2260 tx = &ss->tx;
2261 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
f2f758df 2262 m = ifq_dequeue(&ifp->if_snd, NULL);
8892ea20
AE
2263 if (m == NULL) {
2264 return;
2265 }
2266 /* let BPF see it */
2267 BPF_MTAP(ifp, m);
2268
2269 /* give it to the nic */
2270 mxge_encap(ss, m);
2271 }
2272 /* ran out of transmit slots */
2ab1b8a9
AE
2273 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) {
2274 sc->ifp->if_flags |= IFF_OACTIVE;
8892ea20
AE
2275 tx->stall++;
2276 }
2277}
2278#endif
2279static void
2280mxge_start(struct ifnet *ifp)
2281{
2282 mxge_softc_t *sc = ifp->if_softc;
2283 struct mxge_slice_state *ss;
2284
2285 /* only use the first slice for now */
2286 ss = &sc->ss[0];
e8a47a7f 2287 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
8892ea20 2288 mxge_start_locked(ss);
e8a47a7f 2289 lockmgr(&ss->tx.lock, LK_RELEASE);
8892ea20
AE
2290}
2291
2292/*
2293 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2294 * at most 32 bytes at a time, so as to avoid involving the software
2295 * pio handler in the nic. We re-write the first segment's low
2296 * DMA address to mark it valid only after we write the entire chunk
2297 * in a burst
2298 */
2299static inline void
2300mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2301 mcp_kreq_ether_recv_t *src)
2302{
2303 uint32_t low;
2304
2305 low = src->addr_low;
2306 src->addr_low = 0xffffffff;
2307 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2308 wmb();
2309 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2310 wmb();
2311 src->addr_low = low;
2312 dst->addr_low = low;
2313 wmb();
2314}
2315
2316static int
2317mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2318{
2319 bus_dma_segment_t seg;
2320 struct mbuf *m;
2321 mxge_rx_ring_t *rx = &ss->rx_small;
2322 int cnt, err;
2323
17eb0737 2324 m = m_gethdr(MB_DONTWAIT, MT_DATA);
8892ea20
AE
2325 if (m == NULL) {
2326 rx->alloc_fail++;
2327 err = ENOBUFS;
2328 goto done;
2329 }
2330 m->m_len = MHLEN;
7d8771d4
AE
2331 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2332 &seg, 1, &cnt, BUS_DMA_NOWAIT);
8892ea20
AE
2333 if (err != 0) {
2334 m_free(m);
2335 goto done;
2336 }
2337 rx->info[idx].m = m;
2338 rx->shadow[idx].addr_low =
2339 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2340 rx->shadow[idx].addr_high =
2341 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2342
2343done:
2344 if ((idx & 7) == 7)
2345 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2346 return err;
2347}
2348
2349static int
2350mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2351{
2352 bus_dma_segment_t seg[3];
2353 struct mbuf *m;
2354 mxge_rx_ring_t *rx = &ss->rx_big;
2355 int cnt, err, i;
2356
2357 if (rx->cl_size == MCLBYTES)
17eb0737 2358 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
8892ea20 2359 else
17eb0737 2360 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
8892ea20
AE
2361 if (m == NULL) {
2362 rx->alloc_fail++;
2363 err = ENOBUFS;
2364 goto done;
2365 }
2366 m->m_len = rx->mlen;
7d8771d4
AE
2367 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2368 seg, 1, &cnt, BUS_DMA_NOWAIT);
8892ea20
AE
2369 if (err != 0) {
2370 m_free(m);
2371 goto done;
2372 }
2373 rx->info[idx].m = m;
2374 rx->shadow[idx].addr_low =
2375 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2376 rx->shadow[idx].addr_high =
2377 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2378
2379#if MXGE_VIRT_JUMBOS
2380 for (i = 1; i < cnt; i++) {
2381 rx->shadow[idx + i].addr_low =
2382 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2383 rx->shadow[idx + i].addr_high =
2384 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2385 }
2386#endif
2387
2388done:
2389 for (i = 0; i < rx->nbufs; i++) {
2390 if ((idx & 7) == 7) {
2391 mxge_submit_8rx(&rx->lanai[idx - 7],
2392 &rx->shadow[idx - 7]);
2393 }
2394 idx++;
2395 }
2396 return err;
2397}
2398
2399/*
2400 * Myri10GE hardware checksums are not valid if the sender
2401 * padded the frame with non-zero padding. This is because
2402 * the firmware just does a simple 16-bit 1s complement
2403 * checksum across the entire frame, excluding the first 14
2404 * bytes. It is best to simply to check the checksum and
2405 * tell the stack about it only if the checksum is good
2406 */
2407
2408static inline uint16_t
2409mxge_rx_csum(struct mbuf *m, int csum)
2410{
2411 struct ether_header *eh;
2412 struct ip *ip;
2413 uint16_t c;
2414
2415 eh = mtod(m, struct ether_header *);
2416
2417 /* only deal with IPv4 TCP & UDP for now */
2418 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2419 return 1;
2420 ip = (struct ip *)(eh + 1);
2421 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2422 ip->ip_p != IPPROTO_UDP))
2423 return 1;
2424#ifdef INET
2425 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2426 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2427 - (ip->ip_hl << 2) + ip->ip_p));
2428#else
2429 c = 1;
2430#endif
2431 c ^= 0xffff;
2432 return (c);
2433}
2434
2435static void
2436mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2437{
2438 struct ether_vlan_header *evl;
2439 struct ether_header *eh;
2440 uint32_t partial;
2441
2442 evl = mtod(m, struct ether_vlan_header *);
2443 eh = mtod(m, struct ether_header *);
2444
2445 /*
b915556e 2446 * fix checksum by subtracting EVL_ENCAPLEN bytes
8892ea20
AE
2447 * after what the firmware thought was the end of the ethernet
2448 * header.
2449 */
2450
2451 /* put checksum into host byte order */
2452 *csum = ntohs(*csum);
2453 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2454 (*csum) += ~partial;
2455 (*csum) += ((*csum) < ~partial);
2456 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2457 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2458
2459 /* restore checksum to network byte order;
2460 later consumers expect this */
2461 *csum = htons(*csum);
2462
2463 /* save the tag */
2464#ifdef MXGE_NEW_VLAN_API
b915556e 2465 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
8892ea20
AE
2466#else
2467 {
2468 struct m_tag *mtag;
2469 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
b915556e 2470 MB_DONTWAIT);
8892ea20
AE
2471 if (mtag == NULL)
2472 return;
2473 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2474 m_tag_prepend(m, mtag);
2475 }
2476
2477#endif
2478 m->m_flags |= M_VLANTAG;
2479
2480 /*
2481 * Remove the 802.1q header by copying the Ethernet
2482 * addresses over it and adjusting the beginning of
2483 * the data in the mbuf. The encapsulated Ethernet
2484 * type field is already in place.
2485 */
b915556e 2486 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
8892ea20 2487 ETHER_HDR_LEN - ETHER_TYPE_LEN);
b915556e 2488 m_adj(m, EVL_ENCAPLEN);
8892ea20
AE
2489}
2490
2491
2492static inline void
2493mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2494{
2495 mxge_softc_t *sc;
2496 struct ifnet *ifp;
2497 struct mbuf *m;
2498 struct ether_header *eh;
2499 mxge_rx_ring_t *rx;
2500 bus_dmamap_t old_map;
2501 int idx;
2502 uint16_t tcpudp_csum;
2503
2504 sc = ss->sc;
2505 ifp = sc->ifp;
2506 rx = &ss->rx_big;
2507 idx = rx->cnt & rx->mask;
2508 rx->cnt += rx->nbufs;
2509 /* save a pointer to the received mbuf */
2510 m = rx->info[idx].m;
2511 /* try to replace the received mbuf */
2512 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2513 /* drop the frame -- the old mbuf is re-cycled */
2514 ifp->if_ierrors++;
2515 return;
2516 }
2517
2518 /* unmap the received buffer */
2519 old_map = rx->info[idx].map;
2520 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2521 bus_dmamap_unload(rx->dmat, old_map);
2522
2523 /* swap the bus_dmamap_t's */
2524 rx->info[idx].map = rx->extra_map;
2525 rx->extra_map = old_map;
2526
2527 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2528 * aligned */
2529 m->m_data += MXGEFW_PAD;
2530
2531 m->m_pkthdr.rcvif = ifp;
2532 m->m_len = m->m_pkthdr.len = len;
2533 ss->ipackets++;
2534 eh = mtod(m, struct ether_header *);
2535 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2536 mxge_vlan_tag_remove(m, &csum);
2537 }
2538 /* if the checksum is valid, mark it in the mbuf header */
2539 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2540 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2541 return;
2542 /* otherwise, it was a UDP frame, or a TCP frame which
2543 we could not do LRO on. Tell the stack that the
2544 checksum is good */
2545 m->m_pkthdr.csum_data = 0xffff;
2546 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2547 }
deef6e3e 2548#if 0
8892ea20
AE
2549 /* flowid only valid if RSS hashing is enabled */
2550 if (sc->num_slices > 1) {
2551 m->m_pkthdr.flowid = (ss - sc->ss);
2552 m->m_flags |= M_FLOWID;
2553 }
deef6e3e 2554#endif
8892ea20
AE
2555 /* pass the frame up the stack */
2556 (*ifp->if_input)(ifp, m);
2557}
2558
2559static inline void
2560mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2561{
2562 mxge_softc_t *sc;
2563 struct ifnet *ifp;
2564 struct ether_header *eh;
2565 struct mbuf *m;
2566 mxge_rx_ring_t *rx;
2567 bus_dmamap_t old_map;
2568 int idx;
2569 uint16_t tcpudp_csum;
2570
2571 sc = ss->sc;
2572 ifp = sc->ifp;
2573 rx = &ss->rx_small;
2574 idx = rx->cnt & rx->mask;
2575 rx->cnt++;
2576 /* save a pointer to the received mbuf */
2577 m = rx->info[idx].m;
2578 /* try to replace the received mbuf */
2579 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2580 /* drop the frame -- the old mbuf is re-cycled */
2581 ifp->if_ierrors++;
2582 return;
2583 }
2584
2585 /* unmap the received buffer */
2586 old_map = rx->info[idx].map;
2587 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2588 bus_dmamap_unload(rx->dmat, old_map);
2589
2590 /* swap the bus_dmamap_t's */
2591 rx->info[idx].map = rx->extra_map;
2592 rx->extra_map = old_map;
2593
2594 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2595 * aligned */
2596 m->m_data += MXGEFW_PAD;
2597
2598 m->m_pkthdr.rcvif = ifp;
2599 m->m_len = m->m_pkthdr.len = len;
2600 ss->ipackets++;
2601 eh = mtod(m, struct ether_header *);
2602 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2603 mxge_vlan_tag_remove(m, &csum);
2604 }
2605 /* if the checksum is valid, mark it in the mbuf header */
2606 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2607 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2608 return;
2609 /* otherwise, it was a UDP frame, or a TCP frame which
2610 we could not do LRO on. Tell the stack that the
2611 checksum is good */
2612 m->m_pkthdr.csum_data = 0xffff;
2613 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2614 }
deef6e3e 2615#if 0
8892ea20
AE
2616 /* flowid only valid if RSS hashing is enabled */
2617 if (sc->num_slices > 1) {
2618 m->m_pkthdr.flowid = (ss - sc->ss);
2619 m->m_flags |= M_FLOWID;
2620 }
deef6e3e 2621#endif
8892ea20
AE
2622 /* pass the frame up the stack */
2623 (*ifp->if_input)(ifp, m);
2624}
2625
2626static inline void
2627mxge_clean_rx_done(struct mxge_slice_state *ss)
2628{
2629 mxge_rx_done_t *rx_done = &ss->rx_done;
2630 int limit = 0;
2631 uint16_t length;
2632 uint16_t checksum;
2633
2634
2635 while (rx_done->entry[rx_done->idx].length != 0) {
2636 length = ntohs(rx_done->entry[rx_done->idx].length);
2637 rx_done->entry[rx_done->idx].length = 0;
2638 checksum = rx_done->entry[rx_done->idx].checksum;
2639 if (length <= (MHLEN - MXGEFW_PAD))
2640 mxge_rx_done_small(ss, length, checksum);
2641 else
2642 mxge_rx_done_big(ss, length, checksum);
2643 rx_done->cnt++;
2644 rx_done->idx = rx_done->cnt & rx_done->mask;
2645
2646 /* limit potential for livelock */
2647 if (__predict_false(++limit > rx_done->mask / 2))
2648 break;
2649 }
2650#ifdef INET
2651 while (!SLIST_EMPTY(&ss->lro_active)) {
2652 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2653 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2654 mxge_lro_flush(ss, lro);
2655 }
2656#endif
2657}
2658
2659
2660static inline void
2661mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2662{
2663 struct ifnet *ifp;
2664 mxge_tx_ring_t *tx;
2665 struct mbuf *m;
2666 bus_dmamap_t map;
2667 int idx;
2668 int *flags;
2669
2670 tx = &ss->tx;
2671 ifp = ss->sc->ifp;
2672 while (tx->pkt_done != mcp_idx) {
2673 idx = tx->done & tx->mask;
2674 tx->done++;
2675 m = tx->info[idx].m;
2676 /* mbuf and DMA map only attached to the first
2677 segment per-mbuf */
2678 if (m != NULL) {
2679 ss->obytes += m->m_pkthdr.len;
2680 if (m->m_flags & M_MCAST)
2681 ss->omcasts++;
2682 ss->opackets++;
2683 tx->info[idx].m = NULL;
2684 map = tx->info[idx].map;
2685 bus_dmamap_unload(tx->dmat, map);
2686 m_freem(m);
2687 }
2688 if (tx->info[idx].flag) {
2689 tx->info[idx].flag = 0;
2690 tx->pkt_done++;
2691 }
2692 }
2693
2694 /* If we have space, clear IFF_OACTIVE to tell the stack that
2695 its OK to send packets */
2696#ifdef IFNET_BUF_RING
2ab1b8a9 2697 flags = &ss->if_flags;
8892ea20 2698#else
2ab1b8a9 2699 flags = &ifp->if_flags;
8892ea20 2700#endif
e8a47a7f 2701 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
2ab1b8a9 2702 if ((*flags) & IFF_OACTIVE &&
8892ea20 2703 tx->req - tx->done < (tx->mask + 1)/4) {
2ab1b8a9 2704 *(flags) &= ~IFF_OACTIVE;
8892ea20
AE
2705 ss->tx.wake++;
2706 mxge_start_locked(ss);
2707 }
2708#ifdef IFNET_BUF_RING
2709 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2710 /* let the NIC stop polling this queue, since there
2711 * are no more transmits pending */
2712 if (tx->req == tx->done) {
2713 *tx->send_stop = 1;
2714 tx->queue_active = 0;
2715 tx->deactivate++;
2716 wmb();
2717 }
2718 }
2719#endif
e8a47a7f 2720 lockmgr(&ss->tx.lock, LK_RELEASE);
8892ea20
AE
2721
2722}
2723
2724static struct mxge_media_type mxge_xfp_media_types[] =
2725{
2726 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2727 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2728 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2729 {0, (1 << 5), "10GBASE-ER"},
2730 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2731 {0, (1 << 3), "10GBASE-SW"},
2732 {0, (1 << 2), "10GBASE-LW"},
2733 {0, (1 << 1), "10GBASE-EW"},
2734 {0, (1 << 0), "Reserved"}
2735};
2736static struct mxge_media_type mxge_sfp_media_types[] =
2737{
2738 {0, (1 << 7), "Reserved"},
2739 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2740 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2741 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2742};
2743
2744static void
2745mxge_set_media(mxge_softc_t *sc, int type)
2746{
2747 sc->media_flags |= type;
2748 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2749 ifmedia_set(&sc->media, sc->media_flags);
2750}
2751
2752
2753/*
2754 * Determine the media type for a NIC. Some XFPs will identify
2755 * themselves only when their link is up, so this is initiated via a
2756 * link up interrupt. However, this can potentially take up to
2757 * several milliseconds, so it is run via the watchdog routine, rather
2758 * than in the interrupt handler itself. This need only be done
2759 * once, not each time the link is up.
2760 */
2761static void
2762mxge_media_probe(mxge_softc_t *sc)
2763{
2764 mxge_cmd_t cmd;
2765 char *cage_type;
2766 char *ptr;
2767 struct mxge_media_type *mxge_media_types = NULL;
2768 int i, err, ms, mxge_media_type_entries;
2769 uint32_t byte;
2770
2771 sc->need_media_probe = 0;
2772
2773 /* if we've already set a media type, we're done */
2774 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2775 return;
2776
2777 /*
2778 * parse the product code to deterimine the interface type
2779 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2780 * after the 3rd dash in the driver's cached copy of the
2781 * EEPROM's product code string.
2782 */
2783 ptr = sc->product_code_string;
2784 if (ptr == NULL) {
2785 device_printf(sc->dev, "Missing product code\n");
2786 }
2787
2788 for (i = 0; i < 3; i++, ptr++) {
2789 ptr = index(ptr, '-');
2790 if (ptr == NULL) {
2791 device_printf(sc->dev,
2792 "only %d dashes in PC?!?\n", i);
2793 return;
2794 }
2795 }
2796 if (*ptr == 'C') {
2797 /* -C is CX4 */
2798 mxge_set_media(sc, IFM_10G_CX4);
2799 return;
2800 }
2801 else if (*ptr == 'Q') {
2802 /* -Q is Quad Ribbon Fiber */
2803 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2804 /* FreeBSD has no media type for Quad ribbon fiber */
2805 return;
2806 }
2807
2808 if (*ptr == 'R') {
2809 /* -R is XFP */
2810 mxge_media_types = mxge_xfp_media_types;
2811 mxge_media_type_entries =
2812 sizeof (mxge_xfp_media_types) /
2813 sizeof (mxge_xfp_media_types[0]);
2814 byte = MXGE_XFP_COMPLIANCE_BYTE;
2815 cage_type = "XFP";
2816 }
2817
2818 if (*ptr == 'S' || *(ptr +1) == 'S') {
2819 /* -S or -2S is SFP+ */
2820 mxge_media_types = mxge_sfp_media_types;
2821 mxge_media_type_entries =
2822 sizeof (mxge_sfp_media_types) /
2823 sizeof (mxge_sfp_media_types[0]);
2824 cage_type = "SFP+";
2825 byte = 3;
2826 }
2827
2828 if (mxge_media_types == NULL) {
2829 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2830 return;
2831 }
2832
2833 /*
2834 * At this point we know the NIC has an XFP cage, so now we
2835 * try to determine what is in the cage by using the
2836 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2837 * register. We read just one byte, which may take over
2838 * a millisecond
2839 */
2840
2841 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2842 cmd.data1 = byte;
2843 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2844 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2845 device_printf(sc->dev, "failed to read XFP\n");
2846 }
2847 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2848 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2849 }
2850 if (err != MXGEFW_CMD_OK) {
2851 return;
2852 }
2853
2854 /* now we wait for the data to be cached */
2855 cmd.data0 = byte;
2856 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2857 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2858 DELAY(1000);
2859 cmd.data0 = byte;
2860 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2861 }
2862 if (err != MXGEFW_CMD_OK) {
2863 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2864 cage_type, err, ms);
2865 return;
2866 }
2867
2868 if (cmd.data0 == mxge_media_types[0].bitmask) {
2869 if (mxge_verbose)
2870 device_printf(sc->dev, "%s:%s\n", cage_type,
2871 mxge_media_types[0].name);
2872 mxge_set_media(sc, IFM_10G_CX4);
2873 return;
2874 }
2875 for (i = 1; i < mxge_media_type_entries; i++) {
2876 if (cmd.data0 & mxge_media_types[i].bitmask) {
2877 if (mxge_verbose)
2878 device_printf(sc->dev, "%s:%s\n",
2879 cage_type,
2880 mxge_media_types[i].name);
2881
2882 mxge_set_media(sc, mxge_media_types[i].flag);
2883 return;
2884 }
2885 }
2886 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2887 cmd.data0);
2888
2889 return;
2890}
2891
2892static void
2893mxge_intr(void *arg)
2894{
2895 struct mxge_slice_state *ss = arg;
2896 mxge_softc_t *sc = ss->sc;
2897 mcp_irq_data_t *stats = ss->fw_stats;
2898 mxge_tx_ring_t *tx = &ss->tx;
2899 mxge_rx_done_t *rx_done = &ss->rx_done;
2900 uint32_t send_done_count;
2901 uint8_t valid;
2902
2903
2904#ifndef IFNET_BUF_RING
2905 /* an interrupt on a non-zero slice is implicitly valid
2906 since MSI-X irqs are not shared */
2907 if (ss != sc->ss) {
2908 mxge_clean_rx_done(ss);
2909 *ss->irq_claim = be32toh(3);
2910 return;
2911 }
2912#endif
2913
2914 /* make sure the DMA has finished */
2915 if (!stats->valid) {
2916 return;
2917 }
2918 valid = stats->valid;
2919
2920 if (sc->legacy_irq) {
2921 /* lower legacy IRQ */
2922 *sc->irq_deassert = 0;
2923 if (!mxge_deassert_wait)
2924 /* don't wait for conf. that irq is low */
2925 stats->valid = 0;
2926 } else {
2927 stats->valid = 0;
2928 }
2929
2930 /* loop while waiting for legacy irq deassertion */
2931 do {
2932 /* check for transmit completes and receives */
2933 send_done_count = be32toh(stats->send_done_count);
2934 while ((send_done_count != tx->pkt_done) ||
2935 (rx_done->entry[rx_done->idx].length != 0)) {
2936 if (send_done_count != tx->pkt_done)
2937 mxge_tx_done(ss, (int)send_done_count);
2938 mxge_clean_rx_done(ss);
2939 send_done_count = be32toh(stats->send_done_count);
2940 }
2941 if (sc->legacy_irq && mxge_deassert_wait)
2942 wmb();
2943 } while (*((volatile uint8_t *) &stats->valid));
2944
2945 /* fw link & error stats meaningful only on the first slice */
2946 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2947 if (sc->link_state != stats->link_up) {
2948 sc->link_state = stats->link_up;
2949 if (sc->link_state) {
73a22abe
AE
2950 sc->ifp->if_link_state = LINK_STATE_UP;
2951 if_link_state_change(sc->ifp);
8892ea20
AE
2952 if (mxge_verbose)
2953 device_printf(sc->dev, "link up\n");
2954 } else {
73a22abe
AE
2955 sc->ifp->if_link_state = LINK_STATE_DOWN;
2956 if_link_state_change(sc->ifp);
8892ea20
AE
2957 if (mxge_verbose)
2958 device_printf(sc->dev, "link down\n");
2959 }
2960 sc->need_media_probe = 1;
2961 }
2962 if (sc->rdma_tags_available !=
2963 be32toh(stats->rdma_tags_available)) {
2964 sc->rdma_tags_available =
2965 be32toh(stats->rdma_tags_available);
2966 device_printf(sc->dev, "RDMA timed out! %d tags "
2967 "left\n", sc->rdma_tags_available);
2968 }
2969
2970 if (stats->link_down) {
2971 sc->down_cnt += stats->link_down;
2972 sc->link_state = 0;
f0115d64
AE
2973 sc->ifp->if_link_state = LINK_STATE_DOWN;
2974 if_link_state_change(sc->ifp);
8892ea20
AE
2975 }
2976 }
2977
2978 /* check to see if we have rx token to pass back */
2979 if (valid & 0x1)
2980 *ss->irq_claim = be32toh(3);
2981 *(ss->irq_claim + 1) = be32toh(3);
2982}
2983
2984static void
2985mxge_init(void *arg)
2986{
2987}
2988
2989
2990
2991static void
2992mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2993{
2994 struct lro_entry *lro_entry;
2995 int i;
2996
2997 while (!SLIST_EMPTY(&ss->lro_free)) {
2998 lro_entry = SLIST_FIRST(&ss->lro_free);
2999 SLIST_REMOVE_HEAD(&ss->lro_free, next);
d777b84f 3000 kfree(lro_entry, M_DEVBUF);
8892ea20
AE
3001 }
3002
3003 for (i = 0; i <= ss->rx_big.mask; i++) {
3004 if (ss->rx_big.info[i].m == NULL)
3005 continue;
3006 bus_dmamap_unload(ss->rx_big.dmat,
3007 ss->rx_big.info[i].map);
3008 m_freem(ss->rx_big.info[i].m);
3009 ss->rx_big.info[i].m = NULL;
3010 }
3011
3012 for (i = 0; i <= ss->rx_small.mask; i++) {
3013 if (ss->rx_small.info[i].m == NULL)
3014 continue;
3015 bus_dmamap_unload(ss->rx_small.dmat,
3016 ss->rx_small.info[i].map);
3017 m_freem(ss->rx_small.info[i].m);
3018 ss->rx_small.info[i].m = NULL;
3019 }
3020
3021 /* transmit ring used only on the first slice */
3022 if (ss->tx.info == NULL)
3023 return;
3024
3025 for (i = 0; i <= ss->tx.mask; i++) {
3026 ss->tx.info[i].flag = 0;
3027 if (ss->tx.info[i].m == NULL)
3028 continue;
3029 bus_dmamap_unload(ss->tx.dmat,
3030 ss->tx.info[i].map);
3031 m_freem(ss->tx.info[i].m);
3032 ss->tx.info[i].m = NULL;
3033 }
3034}
3035
3036static void
3037mxge_free_mbufs(mxge_softc_t *sc)
3038{
3039 int slice;
3040
3041 for (slice = 0; slice < sc->num_slices; slice++)
3042 mxge_free_slice_mbufs(&sc->ss[slice]);
3043}
3044
3045static void
3046mxge_free_slice_rings(struct mxge_slice_state *ss)
3047{
3048 int i;
3049
3050
3051 if (ss->rx_done.entry != NULL)
3052 mxge_dma_free(&ss->rx_done.dma);
3053 ss->rx_done.entry = NULL;
3054
3055 if (ss->tx.req_bytes != NULL)
d777b84f 3056 kfree(ss->tx.req_bytes, M_DEVBUF);
8892ea20
AE
3057 ss->tx.req_bytes = NULL;
3058
3059 if (ss->tx.seg_list != NULL)
d777b84f 3060 kfree(ss->tx.seg_list, M_DEVBUF);
8892ea20
AE
3061 ss->tx.seg_list = NULL;
3062
3063 if (ss->rx_small.shadow != NULL)
d777b84f 3064 kfree(ss->rx_small.shadow, M_DEVBUF);
8892ea20
AE
3065 ss->rx_small.shadow = NULL;
3066
3067 if (ss->rx_big.shadow != NULL)
d777b84f 3068 kfree(ss->rx_big.shadow, M_DEVBUF);
8892ea20
AE
3069 ss->rx_big.shadow = NULL;
3070
3071 if (ss->tx.info != NULL) {
3072 if (ss->tx.dmat != NULL) {
3073 for (i = 0; i <= ss->tx.mask; i++) {
3074 bus_dmamap_destroy(ss->tx.dmat,
3075 ss->tx.info[i].map);
3076 }
3077 bus_dma_tag_destroy(ss->tx.dmat);
3078 }
d777b84f 3079 kfree(ss->tx.info, M_DEVBUF);
8892ea20
AE
3080 }
3081 ss->tx.info = NULL;
3082
3083 if (ss->rx_small.info != NULL) {
3084 if (ss->rx_small.dmat != NULL) {
3085 for (i = 0; i <= ss->rx_small.mask; i++) {
3086 bus_dmamap_destroy(ss->rx_small.dmat,
3087 ss->rx_small.info[i].map);
3088 }
3089 bus_dmamap_destroy(ss->rx_small.dmat,
3090 ss->rx_small.extra_map);
3091 bus_dma_tag_destroy(ss->rx_small.dmat);
3092 }
d777b84f 3093 kfree(ss->rx_small.info, M_DEVBUF);
8892ea20
AE
3094 }
3095 ss->rx_small.info = NULL;
3096
3097 if (ss->rx_big.info != NULL) {
3098 if (ss->rx_big.dmat != NULL) {
3099 for (i = 0; i <= ss->rx_big.mask; i++) {
3100 bus_dmamap_destroy(ss->rx_big.dmat,
3101 ss->rx_big.info[i].map);
3102 }
3103 bus_dmamap_destroy(ss->rx_big.dmat,
3104 ss->rx_big.extra_map);
3105 bus_dma_tag_destroy(ss->rx_big.dmat);
3106 }
d777b84f 3107 kfree(ss->rx_big.info, M_DEVBUF);
8892ea20
AE
3108 }
3109 ss->rx_big.info = NULL;
3110}
3111
3112static void
3113mxge_free_rings(mxge_softc_t *sc)
3114{
3115 int slice;
3116
3117 for (slice = 0; slice < sc->num_slices; slice++)
3118 mxge_free_slice_rings(&sc->ss[slice]);
3119}
3120
3121static int
3122mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3123 int tx_ring_entries)
3124{
3125 mxge_softc_t *sc = ss->sc;
3126 size_t bytes;
3127 int err, i;
3128
3129 err = ENOMEM;
3130
3131 /* allocate per-slice receive resources */
3132
3133 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3134 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3135
3136 /* allocate the rx shadow rings */
3137 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
d777b84f 3138 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3139 if (ss->rx_small.shadow == NULL)
3140 return err;;
3141
3142 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
d777b84f 3143 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3144 if (ss->rx_big.shadow == NULL)
3145 return err;;
3146
3147 /* allocate the rx host info rings */
3148 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
d777b84f 3149 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3150 if (ss->rx_small.info == NULL)
3151 return err;;
3152
3153 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
d777b84f 3154 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3155 if (ss->rx_big.info == NULL)
3156 return err;;
3157
3158 /* allocate the rx busdma resources */
3159 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3160 1, /* alignment */
3161 4096, /* boundary */
3162 BUS_SPACE_MAXADDR, /* low */
3163 BUS_SPACE_MAXADDR, /* high */
3164 NULL, NULL, /* filter */
3165 MHLEN, /* maxsize */
3166 1, /* num segs */
3167 MHLEN, /* maxsegsize */
3168 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3169 &ss->rx_small.dmat); /* tag */
3170 if (err != 0) {
3171 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3172 err);
3173 return err;;
3174 }
3175
3176 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3177 1, /* alignment */
3178#if MXGE_VIRT_JUMBOS
3179 4096, /* boundary */
3180#else
3181 0, /* boundary */
3182#endif
3183 BUS_SPACE_MAXADDR, /* low */
3184 BUS_SPACE_MAXADDR, /* high */
3185 NULL, NULL, /* filter */
3186 3*4096, /* maxsize */
3187#if MXGE_VIRT_JUMBOS
3188 3, /* num segs */
3189 4096, /* maxsegsize*/
3190#else
3191 1, /* num segs */
3192 MJUM9BYTES, /* maxsegsize*/
3193#endif
3194 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3195 &ss->rx_big.dmat); /* tag */
3196 if (err != 0) {
3197 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3198 err);
3199 return err;;
3200 }
3201 for (i = 0; i <= ss->rx_small.mask; i++) {
3202 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3203 &ss->rx_small.info[i].map);
3204 if (err != 0) {
3205 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3206 err);
3207 return err;;
3208 }
3209 }
3210 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3211 &ss->rx_small.extra_map);
3212 if (err != 0) {
3213 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3214 err);
3215 return err;;
3216 }
3217
3218 for (i = 0; i <= ss->rx_big.mask; i++) {
3219 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3220 &ss->rx_big.info[i].map);
3221 if (err != 0) {
3222 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3223 err);
3224 return err;;
3225 }
3226 }
3227 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3228 &ss->rx_big.extra_map);
3229 if (err != 0) {
3230 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3231 err);
3232 return err;;
3233 }
3234
3235 /* now allocate TX resouces */
3236
3237#ifndef IFNET_BUF_RING
3238 /* only use a single TX ring for now */
3239 if (ss != ss->sc->ss)
3240 return 0;
3241#endif
3242
3243 ss->tx.mask = tx_ring_entries - 1;
3244 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3245
3246
3247 /* allocate the tx request copy block */
3248 bytes = 8 +
3249 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
d777b84f 3250 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20
AE
3251 if (ss->tx.req_bytes == NULL)
3252 return err;;
3253 /* ensure req_list entries are aligned to 8 bytes */
3254 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3255 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3256
3257 /* allocate the tx busdma segment list */
3258 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3259 ss->tx.seg_list = (bus_dma_segment_t *)
d777b84f 3260 kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20
AE
3261 if (ss->tx.seg_list == NULL)
3262 return err;;
3263
3264 /* allocate the tx host info ring */
3265 bytes = tx_ring_entries * sizeof (*ss->tx.info);
d777b84f 3266 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3267 if (ss->tx.info == NULL)
3268 return err;;
3269
3270 /* allocate the tx busdma resources */
3271 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3272 1, /* alignment */
3273 sc->tx_boundary, /* boundary */
3274 BUS_SPACE_MAXADDR, /* low */
3275 BUS_SPACE_MAXADDR, /* high */
3276 NULL, NULL, /* filter */
3277 65536 + 256, /* maxsize */
3278 ss->tx.max_desc - 2, /* num segs */
3279 sc->tx_boundary, /* maxsegsz */
3280 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3281 &ss->tx.dmat); /* tag */
3282
3283 if (err != 0) {
3284 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3285 err);
3286 return err;;
3287 }
3288
3289 /* now use these tags to setup dmamaps for each slot
3290 in the ring */
3291 for (i = 0; i <= ss->tx.mask; i++) {
3292 err = bus_dmamap_create(ss->tx.dmat, 0,
3293 &ss->tx.info[i].map);
3294 if (err != 0) {
3295 device_printf(sc->dev, "Err %d tx dmamap\n",
3296 err);
3297 return err;;
3298 }
3299 }
3300 return 0;
3301
3302}
3303
3304static int
3305mxge_alloc_rings(mxge_softc_t *sc)
3306{
3307 mxge_cmd_t cmd;
3308 int tx_ring_size;
3309 int tx_ring_entries, rx_ring_entries;
3310 int err, slice;
3311
3312 /* get ring sizes */
3313 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3314 tx_ring_size = cmd.data0;
3315 if (err != 0) {
3316 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3317 goto abort;
3318 }
3319
3320 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3321 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
f2f758df
AE
3322 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3323 ifq_set_ready(&sc->ifp->if_snd);
8892ea20
AE
3324
3325 for (slice = 0; slice < sc->num_slices; slice++) {
3326 err = mxge_alloc_slice_rings(&sc->ss[slice],
3327 rx_ring_entries,
3328 tx_ring_entries);
3329 if (err != 0)
3330 goto abort;
3331 }
3332 return 0;
3333
3334abort:
3335 mxge_free_rings(sc);
3336 return err;
3337
3338}
3339
3340
3341static void
3342mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3343{
b915556e 3344 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
8892ea20
AE
3345
3346 if (bufsize < MCLBYTES) {
3347 /* easy, everything fits in a single buffer */
3348 *big_buf_size = MCLBYTES;
3349 *cl_size = MCLBYTES;
3350 *nbufs = 1;
3351 return;
3352 }
3353
3354 if (bufsize < MJUMPAGESIZE) {
3355 /* still easy, everything still fits in a single buffer */
3356 *big_buf_size = MJUMPAGESIZE;
3357 *cl_size = MJUMPAGESIZE;
3358 *nbufs = 1;
3359 return;
3360 }
3361#if MXGE_VIRT_JUMBOS
3362 /* now we need to use virtually contiguous buffers */
3363 *cl_size = MJUM9BYTES;
3364 *big_buf_size = 4096;
3365 *nbufs = mtu / 4096 + 1;
3366 /* needs to be a power of two, so round up */
3367 if (*nbufs == 3)
3368 *nbufs = 4;
3369#else
3370 *cl_size = MJUM9BYTES;
3371 *big_buf_size = MJUM9BYTES;
3372 *nbufs = 1;
3373#endif
3374}
3375
3376static int
3377mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3378{
3379 mxge_softc_t *sc;
3380 mxge_cmd_t cmd;
3381 bus_dmamap_t map;
3382 struct lro_entry *lro_entry;
3383 int err, i, slice;
3384
3385
3386 sc = ss->sc;
3387 slice = ss - sc->ss;
3388
3389 SLIST_INIT(&ss->lro_free);
3390 SLIST_INIT(&ss->lro_active);
3391
3392 for (i = 0; i < sc->lro_cnt; i++) {
3393 lro_entry = (struct lro_entry *)
d777b84f 3394 kmalloc(sizeof (*lro_entry), M_DEVBUF,
8892ea20
AE
3395 M_NOWAIT | M_ZERO);
3396 if (lro_entry == NULL) {
3397 sc->lro_cnt = i;
3398 break;
3399 }
3400 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3401 }
3402 /* get the lanai pointers to the send and receive rings */
3403
3404 err = 0;
3405#ifndef IFNET_BUF_RING
3406 /* We currently only send from the first slice */
3407 if (slice == 0) {
3408#endif
3409 cmd.data0 = slice;
3410 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3411 ss->tx.lanai =
3412 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3413 ss->tx.send_go = (volatile uint32_t *)
3414 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3415 ss->tx.send_stop = (volatile uint32_t *)
3416 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3417#ifndef IFNET_BUF_RING
3418 }
3419#endif
3420 cmd.data0 = slice;
3421 err |= mxge_send_cmd(sc,
3422 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3423 ss->rx_small.lanai =
3424 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3425 cmd.data0 = slice;
3426 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3427 ss->rx_big.lanai =
3428 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3429
3430 if (err != 0) {
3431 device_printf(sc->dev,
3432 "failed to get ring sizes or locations\n");
3433 return EIO;
3434 }
3435
3436 /* stock receive rings */
3437 for (i = 0; i <= ss->rx_small.mask; i++) {
3438 map = ss->rx_small.info[i].map;
3439 err = mxge_get_buf_small(ss, map, i);
3440 if (err) {
3441 device_printf(sc->dev, "alloced %d/%d smalls\n",
3442 i, ss->rx_small.mask + 1);
3443 return ENOMEM;
3444 }
3445 }
3446 for (i = 0; i <= ss->rx_big.mask; i++) {
3447 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3448 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3449 }
3450 ss->rx_big.nbufs = nbufs;
3451 ss->rx_big.cl_size = cl_size;
3452 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
b915556e 3453 EVL_ENCAPLEN + MXGEFW_PAD;
8892ea20
AE
3454 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3455 map = ss->rx_big.info[i].map;
3456 err = mxge_get_buf_big(ss, map, i);
3457 if (err) {
3458 device_printf(sc->dev, "alloced %d/%d bigs\n",
3459 i, ss->rx_big.mask + 1);
3460 return ENOMEM;
3461 }
3462 }
3463 return 0;
3464}
3465
3466static int
3467mxge_open(mxge_softc_t *sc)
3468{
3469 mxge_cmd_t cmd;
3470 int err, big_bytes, nbufs, slice, cl_size, i;
3471 bus_addr_t bus;
3472 volatile uint8_t *itable;
3473 struct mxge_slice_state *ss;
3474
3475 /* Copy the MAC address in case it was overridden */
3476 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3477
3478 err = mxge_reset(sc, 1);
3479 if (err != 0) {
3480 device_printf(sc->dev, "failed to reset\n");
3481 return EIO;
3482 }
3483
3484 if (sc->num_slices > 1) {
3485 /* setup the indirection table */
3486 cmd.data0 = sc->num_slices;
3487 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3488 &cmd);
3489
3490 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3491 &cmd);
3492 if (err != 0) {
3493 device_printf(sc->dev,
3494 "failed to setup rss tables\n");
3495 return err;
3496 }
3497
3498 /* just enable an identity mapping */
3499 itable = sc->sram + cmd.data0;
3500 for (i = 0; i < sc->num_slices; i++)
3501 itable[i] = (uint8_t)i;
3502
3503 cmd.data0 = 1;
3504 cmd.data1 = mxge_rss_hash_type;
3505 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3506 if (err != 0) {
3507 device_printf(sc->dev, "failed to enable slices\n");
3508 return err;
3509 }
3510 }
3511
3512
3513 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3514
3515 cmd.data0 = nbufs;
3516 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3517 &cmd);
3518 /* error is only meaningful if we're trying to set
3519 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3520 if (err && nbufs > 1) {
3521 device_printf(sc->dev,
3522 "Failed to set alway-use-n to %d\n",
3523 nbufs);
3524 return EIO;
3525 }
3526 /* Give the firmware the mtu and the big and small buffer
3527 sizes. The firmware wants the big buf size to be a power
3528 of two. Luckily, FreeBSD's clusters are powers of two */
b915556e 3529 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
8892ea20
AE
3530 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3531 cmd.data0 = MHLEN - MXGEFW_PAD;
3532 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3533 &cmd);
3534 cmd.data0 = big_bytes;
3535 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3536
3537 if (err != 0) {
3538 device_printf(sc->dev, "failed to setup params\n");
3539 goto abort;
3540 }
3541
3542 /* Now give him the pointer to the stats block */
3543 for (slice = 0;
3544#ifdef IFNET_BUF_RING
3545 slice < sc->num_slices;
3546#else
3547 slice < 1;
3548#endif
3549 slice++) {
3550 ss = &sc->ss[slice];
3551 cmd.data0 =
3552 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3553 cmd.data1 =
3554 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3555 cmd.data2 = sizeof(struct mcp_irq_data);
3556 cmd.data2 |= (slice << 16);
3557 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3558 }
3559
3560 if (err != 0) {
3561 bus = sc->ss->fw_stats_dma.bus_addr;
3562 bus += offsetof(struct mcp_irq_data, send_done_count);
3563 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3564 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3565 err = mxge_send_cmd(sc,
3566 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3567 &cmd);
3568 /* Firmware cannot support multicast without STATS_DMA_V2 */
3569 sc->fw_multicast_support = 0;
3570 } else {
3571 sc->fw_multicast_support = 1;
3572 }
3573
3574 if (err != 0) {
3575 device_printf(sc->dev, "failed to setup params\n");
3576 goto abort;
3577 }
3578
3579 for (slice = 0; slice < sc->num_slices; slice++) {
3580 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3581 if (err != 0) {
3582 device_printf(sc->dev, "couldn't open slice %d\n",
3583 slice);
3584 goto abort;
3585 }
3586 }
3587
3588 /* Finally, start the firmware running */
3589 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3590 if (err) {
3591 device_printf(sc->dev, "Couldn't bring up link\n");
3592 goto abort;
3593 }
3594#ifdef IFNET_BUF_RING
3595 for (slice = 0; slice < sc->num_slices; slice++) {
3596 ss = &sc->ss[slice];
2ab1b8a9
AE
3597 ss->if_flags |= IFF_RUNNING;
3598 ss->if_flags &= ~IFF_OACTIVE;
8892ea20
AE
3599 }
3600#endif
2ab1b8a9
AE
3601 sc->ifp->if_flags |= IFF_RUNNING;
3602 sc->ifp->if_flags &= ~IFF_OACTIVE;
8892ea20
AE
3603 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3604
3605 return 0;
3606
3607
3608abort:
3609 mxge_free_mbufs(sc);
3610
3611 return err;
3612}
3613
3614static int
3615mxge_close(mxge_softc_t *sc)
3616{
3617 mxge_cmd_t cmd;
3618 int err, old_down_cnt;
3619#ifdef IFNET_BUF_RING
3620 struct mxge_slice_state *ss;
3621 int slice;
3622#endif
3623
3624 callout_stop(&sc->co_hdl);
3625#ifdef IFNET_BUF_RING
3626 for (slice = 0; slice < sc->num_slices; slice++) {
3627 ss = &sc->ss[slice];
2ab1b8a9 3628 ss->if_flags &= ~IFF_RUNNING;
8892ea20
AE
3629 }
3630#endif
2ab1b8a9 3631 sc->ifp->if_flags &= ~IFF_RUNNING;
8892ea20
AE
3632 old_down_cnt = sc->down_cnt;
3633 wmb();
3634 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3635 if (err) {
3636 device_printf(sc->dev, "Couldn't bring down link\n");
3637 }
3638 if (old_down_cnt == sc->down_cnt) {
3639 /* wait for down irq */
3640 DELAY(10 * sc->intr_coal_delay);
3641 }
3642 wmb();
3643 if (old_down_cnt == sc->down_cnt) {
3644 device_printf(sc->dev, "never got down irq\n");
3645 }
3646
3647 mxge_free_mbufs(sc);
3648
3649 return 0;
3650}
3651
3652static void
3653mxge_setup_cfg_space(mxge_softc_t *sc)
3654{
3655 device_t dev = sc->dev;
3656 int reg;
3657 uint16_t cmd, lnk, pectl;
3658
3659 /* find the PCIe link width and set max read request to 4KB*/
3660 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3661 lnk = pci_read_config(dev, reg + 0x12, 2);
3662 sc->link_width = (lnk >> 4) & 0x3f;
3663
3664 pectl = pci_read_config(dev, reg + 0x8, 2);
3665 pectl = (pectl & ~0x7000) | (5 << 12);
3666 pci_write_config(dev, reg + 0x8, pectl, 2);
3667 }
3668
3669 /* Enable DMA and Memory space access */
3670 pci_enable_busmaster(dev);
3671 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3672 cmd |= PCIM_CMD_MEMEN;
3673 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3674}
3675
3676static uint32_t
3677mxge_read_reboot(mxge_softc_t *sc)
3678{
3679 device_t dev = sc->dev;
3680 uint32_t vs;
3681
3682 /* find the vendor specific offset */
3683 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3684 device_printf(sc->dev,
3685 "could not find vendor specific offset\n");
3686 return (uint32_t)-1;
3687 }
3688 /* enable read32 mode */
3689 pci_write_config(dev, vs + 0x10, 0x3, 1);
3690 /* tell NIC which register to read */
3691 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3692 return (pci_read_config(dev, vs + 0x14, 4));
3693}
3694
3695static int
3696mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3697{
3698 struct pci_devinfo *dinfo;
3699 mxge_tx_ring_t *tx;
3700 int err;
3701 uint32_t reboot;
3702 uint16_t cmd;
3703
3704 err = ENXIO;
3705
3706 device_printf(sc->dev, "Watchdog reset!\n");
3707
3708 /*
3709 * check to see if the NIC rebooted. If it did, then all of
3710 * PCI config space has been reset, and things like the
3711 * busmaster bit will be zero. If this is the case, then we
3712 * must restore PCI config space before the NIC can be used
3713 * again
3714 */
3715 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3716 if (cmd == 0xffff) {
3717 /*
3718 * maybe the watchdog caught the NIC rebooting; wait
3719 * up to 100ms for it to finish. If it does not come
3720 * back, then give up
3721 */
3722 DELAY(1000*100);
3723 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3724 if (cmd == 0xffff) {
3725 device_printf(sc->dev, "NIC disappeared!\n");
3726 return (err);
3727 }
3728 }
3729 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3730 /* print the reboot status */
3731 reboot = mxge_read_reboot(sc);
3732 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3733 reboot);
3734 /* restore PCI configuration space */
3735 dinfo = device_get_ivars(sc->dev);
3736 pci_cfg_restore(sc->dev, dinfo);
3737
3738 /* and redo any changes we made to our config space */
3739 mxge_setup_cfg_space(sc);
3740
2ab1b8a9 3741 if (sc->ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3742 mxge_close(sc);
3743 err = mxge_open(sc);
3744 }
3745 } else {
3746 tx = &sc->ss[slice].tx;
3747 device_printf(sc->dev,
3748 "NIC did not reboot, slice %d ring state:\n",
3749 slice);
3750 device_printf(sc->dev,
3751 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3752 tx->req, tx->done, tx->queue_active);
3753 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3754 tx->activate, tx->deactivate);
3755 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3756 tx->pkt_done,
3757 be32toh(sc->ss->fw_stats->send_done_count));
3758 device_printf(sc->dev, "not resetting\n");
3759 }
3760 return (err);
3761}
3762
3763static int
3764mxge_watchdog(mxge_softc_t *sc)
3765{
3766 mxge_tx_ring_t *tx;
3767 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3768 int i, err = 0;
3769
3770 /* see if we have outstanding transmits, which
3771 have been pending for more than mxge_ticks */
3772 for (i = 0;
3773#ifdef IFNET_BUF_RING
3774 (i < sc->num_slices) && (err == 0);
3775#else
3776 (i < 1) && (err == 0);
3777#endif
3778 i++) {
3779 tx = &sc->ss[i].tx;
3780 if (tx->req != tx->done &&
3781 tx->watchdog_req != tx->watchdog_done &&
3782 tx->done == tx->watchdog_done) {
3783 /* check for pause blocking before resetting */
3784 if (tx->watchdog_rx_pause == rx_pause)
3785 err = mxge_watchdog_reset(sc, i);
3786 else
3787 device_printf(sc->dev, "Flow control blocking "
3788 "xmits, check link partner\n");
3789 }
3790
3791 tx->watchdog_req = tx->req;
3792 tx->watchdog_done = tx->done;
3793 tx->watchdog_rx_pause = rx_pause;
3794 }
3795
3796 if (sc->need_media_probe)
3797 mxge_media_probe(sc);
3798 return (err);
3799}
3800
3801static void
3802mxge_update_stats(mxge_softc_t *sc)
3803{
3804 struct mxge_slice_state *ss;
3805 u_long ipackets = 0;
3806 u_long opackets = 0;
3807#ifdef IFNET_BUF_RING
3808 u_long obytes = 0;
3809 u_long omcasts = 0;
3810 u_long odrops = 0;
3811#endif
3812 u_long oerrors = 0;
3813 int slice;
3814
3815 for (slice = 0; slice < sc->num_slices; slice++) {
3816 ss = &sc->ss[slice];
3817 ipackets += ss->ipackets;
3818 opackets += ss->opackets;
3819#ifdef IFNET_BUF_RING
3820 obytes += ss->obytes;
3821 omcasts += ss->omcasts;
3822 odrops += ss->tx.br->br_drops;
3823#endif
3824 oerrors += ss->oerrors;
3825 }
3826 sc->ifp->if_ipackets = ipackets;
3827 sc->ifp->if_opackets = opackets;
3828#ifdef IFNET_BUF_RING
3829 sc->ifp->if_obytes = obytes;
3830 sc->ifp->if_omcasts = omcasts;
3831 sc->ifp->if_snd.ifq_drops = odrops;
3832#endif
3833 sc->ifp->if_oerrors = oerrors;
3834}
3835
3836static void
3837mxge_tick(void *arg)
3838{
3839 mxge_softc_t *sc = arg;
3840 int err = 0;
3841
6a6f4694 3842 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3843 /* aggregate stats from different slices */
3844 mxge_update_stats(sc);
3845 if (!sc->watchdog_countdown) {
3846 err = mxge_watchdog(sc);
3847 sc->watchdog_countdown = 4;
3848 }
3849 sc->watchdog_countdown--;
3850 if (err == 0)
3851 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
6a6f4694 3852 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3853}
3854
3855static int
3856mxge_media_change(struct ifnet *ifp)
3857{
3858 return EINVAL;
3859}
3860
3861static int
3862mxge_change_mtu(mxge_softc_t *sc, int mtu)
3863{
3864 struct ifnet *ifp = sc->ifp;
3865 int real_mtu, old_mtu;
3866 int err = 0;
3867
3868
b915556e 3869 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
8892ea20
AE
3870 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3871 return EINVAL;
e8a47a7f 3872 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3873 old_mtu = ifp->if_mtu;
3874 ifp->if_mtu = mtu;
2ab1b8a9 3875 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3876 mxge_close(sc);
3877 err = mxge_open(sc);
3878 if (err != 0) {
3879 ifp->if_mtu = old_mtu;
3880 mxge_close(sc);
3881 (void) mxge_open(sc);
3882 }
3883 }
e8a47a7f 3884 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3885 return err;
3886}
3887
3888static void
3889mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3890{
3891 mxge_softc_t *sc = ifp->if_softc;
3892
3893
3894 if (sc == NULL)
3895 return;
3896 ifmr->ifm_status = IFM_AVALID;
3897 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3898 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3899 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3900}
3901
3902static int
137195a6 3903mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
8892ea20
AE
3904{
3905 mxge_softc_t *sc = ifp->if_softc;
3906 struct ifreq *ifr = (struct ifreq *)data;
3907 int err, mask;
3908
137195a6 3909 (void)cr;
8892ea20
AE
3910 err = 0;
3911 switch (command) {
3912 case SIOCSIFADDR:
3913 case SIOCGIFADDR:
3914 err = ether_ioctl(ifp, command, data);
3915 break;
3916
3917 case SIOCSIFMTU:
3918 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3919 break;
3920
3921 case SIOCSIFFLAGS:
e8a47a7f 3922 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 3923 if (sc->dying) {
e8a47a7f 3924 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3925 return EINVAL;
3926 }
3927 if (ifp->if_flags & IFF_UP) {
2ab1b8a9 3928 if (!(ifp->if_flags & IFF_RUNNING)) {
8892ea20
AE
3929 err = mxge_open(sc);
3930 } else {
3931 /* take care of promis can allmulti
3932 flag chages */
3933 mxge_change_promisc(sc,
3934 ifp->if_flags & IFF_PROMISC);
3935 mxge_set_multicast_list(sc);
3936 }
3937 } else {
2ab1b8a9 3938 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3939 mxge_close(sc);
3940 }
3941 }
e8a47a7f 3942 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3943 break;
3944
3945 case SIOCADDMULTI:
3946 case SIOCDELMULTI:
e8a47a7f 3947 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20 3948 mxge_set_multicast_list(sc);
e8a47a7f 3949 lockmgr(&sc->driver_lock, LK_RELEASE);
8892ea20
AE
3950 break;
3951
3952 case SIOCSIFCAP:
e8a47a7f 3953 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
8892ea20
AE
3954 mask = ifr->ifr_reqcap ^ ifp->if_capenable;