if: Move IFF_OACTIVE bit into ifaltq; prepare multiple TX queues support
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
CommitLineData
8892ea20
AE
1/******************************************************************************
2
3Copyright (c) 2006-2009, Myricom Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
32af04f7 28$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $
8892ea20 29
32af04f7 30***************************************************************************/
8892ea20
AE
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/linker.h>
35#include <sys/firmware.h>
36#include <sys/endian.h>
05e71c89 37#include <sys/in_cksum.h>
8892ea20
AE
38#include <sys/sockio.h>
39#include <sys/mbuf.h>
40#include <sys/malloc.h>
8892ea20 41#include <sys/kernel.h>
8892ea20 42#include <sys/module.h>
2e8181d0 43#include <sys/serialize.h>
8892ea20
AE
44#include <sys/socket.h>
45#include <sys/sysctl.h>
8892ea20
AE
46
47/* count xmits ourselves, rather than via drbr */
48#define NO_SLOW_STATS
49#include <net/if.h>
50#include <net/if_arp.h>
f2f758df 51#include <net/ifq_var.h>
8892ea20
AE
52#include <net/ethernet.h>
53#include <net/if_dl.h>
54#include <net/if_media.h>
55
56#include <net/bpf.h>
57
58#include <net/if_types.h>
b3535a6f 59#include <net/vlan/if_vlan_var.h>
8892ea20
AE
60#include <net/zlib.h>
61
62#include <netinet/in_systm.h>
63#include <netinet/in.h>
64#include <netinet/ip.h>
65#include <netinet/tcp.h>
66
8892ea20
AE
67#include <sys/bus.h>
68#include <sys/rman.h>
8892ea20 69
b3535a6f
AE
70#include <bus/pci/pcireg.h>
71#include <bus/pci/pcivar.h>
72#include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
8892ea20
AE
73
74#include <vm/vm.h> /* for pmap_mapdev() */
75#include <vm/pmap.h>
76
b2b3ffcd 77#if defined(__i386) || defined(__x86_64)
8892ea20
AE
78#include <machine/specialreg.h>
79#endif
80
b3535a6f
AE
81#include <dev/netif/mxge/mxge_mcp.h>
82#include <dev/netif/mxge/mcp_gen_header.h>
8892ea20 83/*#define MXGE_FAKE_IFP*/
b3535a6f 84#include <dev/netif/mxge/if_mxge_var.h>
8892ea20
AE
85#ifdef IFNET_BUF_RING
86#include <sys/buf_ring.h>
87#endif
88
89#include "opt_inet.h"
90
91/* tunable params */
92static int mxge_nvidia_ecrc_enable = 1;
93static int mxge_force_firmware = 0;
94static int mxge_intr_coal_delay = 30;
95static int mxge_deassert_wait = 1;
96static int mxge_flow_control = 1;
97static int mxge_verbose = 0;
98static int mxge_lro_cnt = 8;
99static int mxge_ticks;
100static int mxge_max_slices = 1;
101static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102static int mxge_always_promisc = 0;
b4b1eda5
AE
103/* XXX: not yet */
104/* static int mxge_initial_mtu = ETHERMTU_JUMBO; */
105static int mxge_initial_mtu = ETHERMTU;
8892ea20
AE
106static char *mxge_fw_unaligned = "mxge_ethp_z8e";
107static char *mxge_fw_aligned = "mxge_eth_z8e";
108static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
109static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
110
111static int mxge_probe(device_t dev);
112static int mxge_attach(device_t dev);
113static int mxge_detach(device_t dev);
114static int mxge_shutdown(device_t dev);
115static void mxge_intr(void *arg);
116
117static device_method_t mxge_methods[] =
118{
119 /* Device interface */
120 DEVMETHOD(device_probe, mxge_probe),
121 DEVMETHOD(device_attach, mxge_attach),
122 DEVMETHOD(device_detach, mxge_detach),
123 DEVMETHOD(device_shutdown, mxge_shutdown),
124 {0, 0}
125};
126
127static driver_t mxge_driver =
128{
129 "mxge",
130 mxge_methods,
131 sizeof(mxge_softc_t),
132};
133
134static devclass_t mxge_devclass;
135
136/* Declare ourselves to be a child of the PCI bus.*/
aa2b9d05 137DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
8892ea20
AE
138MODULE_DEPEND(mxge, firmware, 1, 1, 1);
139MODULE_DEPEND(mxge, zlib, 1, 1, 1);
140
141static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
142static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
143static int mxge_close(mxge_softc_t *sc);
144static int mxge_open(mxge_softc_t *sc);
145static void mxge_tick(void *arg);
146
87353c03
AE
147/* XXX: we don't have Large Receive Offload support yet */
148 inline int
149mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
150{
151 (void)ss;
152 (void)m_head;
153 (void)csum;
154 return 1;
155}
156
157 inline void
158mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
159{
160 (void)ss;
161 (void)lro;
162}
163
8892ea20
AE
164static int
165mxge_probe(device_t dev)
166{
167 int rev;
168
169
170 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
171 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
173 rev = pci_get_revid(dev);
174 switch (rev) {
175 case MXGE_PCI_REV_Z8E:
176 device_set_desc(dev, "Myri10G-PCIE-8A");
177 break;
178 case MXGE_PCI_REV_Z8ES:
179 device_set_desc(dev, "Myri10G-PCIE-8B");
180 break;
181 default:
182 device_set_desc(dev, "Myri10G-PCIE-8??");
183 device_printf(dev, "Unrecognized rev %d NIC\n",
184 rev);
185 break;
186 }
187 return 0;
188 }
189 return ENXIO;
190}
191
192static void
193mxge_enable_wc(mxge_softc_t *sc)
194{
9eb279be 195#if 0
b2b3ffcd 196#if defined(__i386) || defined(__x86_64)
8892ea20
AE
197 vm_offset_t len;
198 int err;
199
200 sc->wc = 1;
201 len = rman_get_size(sc->mem_res);
202 err = pmap_change_attr((vm_offset_t) sc->sram,
203 len, PAT_WRITE_COMBINING);
204 if (err != 0) {
205 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
206 err);
207 sc->wc = 0;
208 }
9eb279be
AE
209#endif
210#else
211 sc->wc = 0; /* TBD: PAT support */
212#endif
8892ea20
AE
213}
214
215
216/* callback to get our DMA address */
217static void
218mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
219 int error)
220{
221 if (error == 0) {
222 *(bus_addr_t *) arg = segs->ds_addr;
223 }
224}
225
226static int
227mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
228 bus_size_t alignment)
229{
230 int err;
231 device_t dev = sc->dev;
232 bus_size_t boundary, maxsegsize;
233
234 if (bytes > 4096 && alignment == 4096) {
235 boundary = 0;
236 maxsegsize = bytes;
237 } else {
238 boundary = 4096;
239 maxsegsize = 4096;
240 }
241
242 /* allocate DMAable memory tags */
243 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
244 alignment, /* alignment */
245 boundary, /* boundary */
246 BUS_SPACE_MAXADDR, /* low */
247 BUS_SPACE_MAXADDR, /* high */
248 NULL, NULL, /* filter */
249 bytes, /* maxsize */
250 1, /* num segs */
251 maxsegsize, /* maxsegsize */
252 BUS_DMA_COHERENT, /* flags */
8892ea20
AE
253 &dma->dmat); /* tag */
254 if (err != 0) {
255 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
256 return err;
257 }
258
259 /* allocate DMAable memory & map */
260 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
261 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
262 | BUS_DMA_ZERO), &dma->map);
263 if (err != 0) {
264 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
265 goto abort_with_dmat;
266 }
267
268 /* load the memory */
269 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
270 mxge_dmamap_callback,
271 (void *)&dma->bus_addr, 0);
272 if (err != 0) {
273 device_printf(dev, "couldn't load map (err = %d)\n", err);
274 goto abort_with_mem;
275 }
276 return 0;
277
278abort_with_mem:
279 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
280abort_with_dmat:
281 (void)bus_dma_tag_destroy(dma->dmat);
282 return err;
283}
284
285
286static void
287mxge_dma_free(mxge_dma_t *dma)
288{
289 bus_dmamap_unload(dma->dmat, dma->map);
290 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
291 (void)bus_dma_tag_destroy(dma->dmat);
292}
293
294/*
295 * The eeprom strings on the lanaiX have the format
296 * SN=x\0
297 * MAC=x:x:x:x:x:x\0
298 * PC=text\0
299 */
300
301static int
302mxge_parse_strings(mxge_softc_t *sc)
303{
304#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
305
306 char *ptr, *limit;
307 int i, found_mac;
308
309 ptr = sc->eeprom_strings;
310 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
311 found_mac = 0;
312 while (ptr < limit && *ptr != '\0') {
313 if (memcmp(ptr, "MAC=", 4) == 0) {
314 ptr += 1;
315 sc->mac_addr_string = ptr;
316 for (i = 0; i < 6; i++) {
317 ptr += 3;
318 if ((ptr + 2) > limit)
319 goto abort;
320 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
321 found_mac = 1;
322 }
323 } else if (memcmp(ptr, "PC=", 3) == 0) {
324 ptr += 3;
325 strncpy(sc->product_code_string, ptr,
326 sizeof (sc->product_code_string) - 1);
327 } else if (memcmp(ptr, "SN=", 3) == 0) {
328 ptr += 3;
329 strncpy(sc->serial_number_string, ptr,
330 sizeof (sc->serial_number_string) - 1);
331 }
332 MXGE_NEXT_STRING(ptr);
333 }
334
335 if (found_mac)
336 return 0;
337
338 abort:
339 device_printf(sc->dev, "failed to parse eeprom_strings\n");
340
341 return ENXIO;
342}
343
344#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
345static void
346mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
347{
348 uint32_t val;
349 unsigned long base, off;
350 char *va, *cfgptr;
351 device_t pdev, mcp55;
352 uint16_t vendor_id, device_id, word;
353 uintptr_t bus, slot, func, ivend, idev;
354 uint32_t *ptr32;
355
356
357 if (!mxge_nvidia_ecrc_enable)
358 return;
359
360 pdev = device_get_parent(device_get_parent(sc->dev));
361 if (pdev == NULL) {
362 device_printf(sc->dev, "could not find parent?\n");
363 return;
364 }
365 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
366 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
367
368 if (vendor_id != 0x10de)
369 return;
370
371 base = 0;
372
373 if (device_id == 0x005d) {
374 /* ck804, base address is magic */
375 base = 0xe0000000UL;
376 } else if (device_id >= 0x0374 && device_id <= 0x378) {
377 /* mcp55, base address stored in chipset */
378 mcp55 = pci_find_bsf(0, 0, 0);
379 if (mcp55 &&
380 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
381 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
382 word = pci_read_config(mcp55, 0x90, 2);
383 base = ((unsigned long)word & 0x7ffeU) << 25;
384 }
385 }
386 if (!base)
387 return;
388
389 /* XXXX
390 Test below is commented because it is believed that doing
391 config read/write beyond 0xff will access the config space
392 for the next larger function. Uncomment this and remove
393 the hacky pmap_mapdev() way of accessing config space when
394 FreeBSD grows support for extended pcie config space access
395 */
396#if 0
397 /* See if we can, by some miracle, access the extended
398 config space */
399 val = pci_read_config(pdev, 0x178, 4);
400 if (val != 0xffffffff) {
401 val |= 0x40;
402 pci_write_config(pdev, 0x178, val, 4);
403 return;
404 }
405#endif
406 /* Rather than using normal pci config space writes, we must
407 * map the Nvidia config space ourselves. This is because on
408 * opteron/nvidia class machine the 0xe000000 mapping is
409 * handled by the nvidia chipset, that means the internal PCI
410 * device (the on-chip northbridge), or the amd-8131 bridge
411 * and things behind them are not visible by this method.
412 */
413
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_BUS, &bus);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_SLOT, &slot);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_FUNCTION, &func);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_VENDOR, &ivend);
422 BUS_READ_IVAR(device_get_parent(pdev), pdev,
423 PCI_IVAR_DEVICE, &idev);
424
425 off = base
426 + 0x00100000UL * (unsigned long)bus
427 + 0x00001000UL * (unsigned long)(func
428 + 8 * slot);
429
430 /* map it into the kernel */
431 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
432
433
434 if (va == NULL) {
435 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
436 return;
437 }
438 /* get a pointer to the config space mapped into the kernel */
439 cfgptr = va + (off & PAGE_MASK);
440
441 /* make sure that we can really access it */
442 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
443 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
444 if (! (vendor_id == ivend && device_id == idev)) {
445 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
446 vendor_id, device_id);
447 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
448 return;
449 }
450
451 ptr32 = (uint32_t*)(cfgptr + 0x178);
452 val = *ptr32;
453
454 if (val == 0xffffffff) {
455 device_printf(sc->dev, "extended mapping failed\n");
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
457 return;
458 }
459 *ptr32 = val | 0x40;
460 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
461 if (mxge_verbose)
462 device_printf(sc->dev,
463 "Enabled ECRC on upstream Nvidia bridge "
464 "at %d:%d:%d\n",
465 (int)bus, (int)slot, (int)func);
466 return;
467}
468#else
469static void
470mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
471{
472 device_printf(sc->dev,
b2b3ffcd 473 "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
8892ea20
AE
474 return;
475}
476#endif
477
478
479static int
480mxge_dma_test(mxge_softc_t *sc, int test_type)
481{
482 mxge_cmd_t cmd;
483 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
484 int status;
485 uint32_t len;
486 char *test = " ";
487
488
489 /* Run a small DMA test.
490 * The magic multipliers to the length tell the firmware
491 * to do DMA read, write, or read+write tests. The
492 * results are returned in cmd.data0. The upper 16
493 * bits of the return is the number of transfers completed.
494 * The lower 16 bits is the time in 0.5us ticks that the
495 * transfers took to complete.
496 */
497
498 len = sc->tx_boundary;
499
500 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
501 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
502 cmd.data2 = len * 0x10000;
503 status = mxge_send_cmd(sc, test_type, &cmd);
504 if (status != 0) {
505 test = "read";
506 goto abort;
507 }
508 sc->read_dma = ((cmd.data0>>16) * len * 2) /
509 (cmd.data0 & 0xffff);
510 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
511 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
512 cmd.data2 = len * 0x1;
513 status = mxge_send_cmd(sc, test_type, &cmd);
514 if (status != 0) {
515 test = "write";
516 goto abort;
517 }
518 sc->write_dma = ((cmd.data0>>16) * len * 2) /
519 (cmd.data0 & 0xffff);
520
521 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
522 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
523 cmd.data2 = len * 0x10001;
524 status = mxge_send_cmd(sc, test_type, &cmd);
525 if (status != 0) {
526 test = "read/write";
527 goto abort;
528 }
529 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
530 (cmd.data0 & 0xffff);
531
532abort:
533 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
534 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
535 test, status);
536
537 return status;
538}
539
540/*
541 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
542 * when the PCI-E Completion packets are aligned on an 8-byte
543 * boundary. Some PCI-E chip sets always align Completion packets; on
544 * the ones that do not, the alignment can be enforced by enabling
545 * ECRC generation (if supported).
546 *
547 * When PCI-E Completion packets are not aligned, it is actually more
548 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
549 *
550 * If the driver can neither enable ECRC nor verify that it has
551 * already been enabled, then it must use a firmware image which works
552 * around unaligned completion packets (ethp_z8e.dat), and it should
553 * also ensure that it never gives the device a Read-DMA which is
554 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
555 * enabled, then the driver should use the aligned (eth_z8e.dat)
556 * firmware image, and set tx_boundary to 4KB.
557 */
558
559static int
560mxge_firmware_probe(mxge_softc_t *sc)
561{
562 device_t dev = sc->dev;
563 int reg, status;
564 uint16_t pectl;
565
566 sc->tx_boundary = 4096;
567 /*
568 * Verify the max read request size was set to 4KB
569 * before trying the test with 4KB.
570 */
571 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
572 pectl = pci_read_config(dev, reg + 0x8, 2);
573 if ((pectl & (5 << 12)) != (5 << 12)) {
574 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
575 pectl);
576 sc->tx_boundary = 2048;
577 }
578 }
579
580 /*
581 * load the optimized firmware (which assumes aligned PCIe
582 * completions) in order to see if it works on this host.
583 */
584 sc->fw_name = mxge_fw_aligned;
585 status = mxge_load_firmware(sc, 1);
586 if (status != 0) {
587 return status;
588 }
589
590 /*
591 * Enable ECRC if possible
592 */
593 mxge_enable_nvidia_ecrc(sc);
594
595 /*
596 * Run a DMA test which watches for unaligned completions and
597 * aborts on the first one seen.
598 */
599
600 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
601 if (status == 0)
602 return 0; /* keep the aligned firmware */
603
604 if (status != E2BIG)
605 device_printf(dev, "DMA test failed: %d\n", status);
606 if (status == ENOSYS)
607 device_printf(dev, "Falling back to ethp! "
608 "Please install up to date fw\n");
609 return status;
610}
611
612static int
613mxge_select_firmware(mxge_softc_t *sc)
614{
615 int aligned = 0;
616
617
618 if (mxge_force_firmware != 0) {
619 if (mxge_force_firmware == 1)
620 aligned = 1;
621 else
622 aligned = 0;
623 if (mxge_verbose)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
627 goto abort;
628 }
629
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
635 sc->link_width);
636 aligned = 1;
637 goto abort;
638 }
639
640 if (0 == mxge_firmware_probe(sc))
641 return 0;
642
643abort:
644 if (aligned) {
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
647 } else {
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
650 }
651 return (mxge_load_firmware(sc, 0));
652}
653
654union qualhack
655{
656 const char *ro_char;
657 char *rw_char;
658};
659
660static int
661mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
662{
663
664
665 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
666 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
667 be32toh(hdr->mcp_type));
668 return EIO;
669 }
670
671 /* save firmware version for sysctl */
672 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
673 if (mxge_verbose)
674 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
675
b6670ba0 676 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
8892ea20
AE
677 &sc->fw_ver_minor, &sc->fw_ver_tiny);
678
679 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
680 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
681 device_printf(sc->dev, "Found firmware version %s\n",
682 sc->fw_version);
683 device_printf(sc->dev, "Driver needs %d.%d\n",
684 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
685 return EINVAL;
686 }
687 return 0;
688
689}
690
691static void *
692z_alloc(void *nil, u_int items, u_int size)
693{
694 void *ptr;
695
d777b84f 696 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
8892ea20
AE
697 return ptr;
698}
699
700static void
701z_free(void *nil, void *ptr)
702{
d777b84f 703 kfree(ptr, M_TEMP);
8892ea20 704}
d83c779a 705
8892ea20
AE
706
707static int
708mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
709{
d83c779a
SW
710 z_stream zs;
711 char *inflate_buffer;
712 const struct firmware *fw;
8892ea20
AE
713 const mcp_gen_header_t *hdr;
714 unsigned hdr_offset;
715 int status;
716 unsigned int i;
717 char dummy;
718 size_t fw_len;
719
d83c779a 720 fw = firmware_get(sc->fw_name);
8892ea20
AE
721 if (fw == NULL) {
722 device_printf(sc->dev, "Could not find firmware image %s\n",
723 sc->fw_name);
724 return ENOENT;
725 }
d83c779a
SW
726
727
728
8892ea20
AE
729 /* setup zlib and decompress f/w */
730 bzero(&zs, sizeof (zs));
731 zs.zalloc = z_alloc;
732 zs.zfree = z_free;
733 status = inflateInit(&zs);
734 if (status != Z_OK) {
735 status = EIO;
736 goto abort_with_fw;
737 }
738
739 /* the uncompressed size is stored as the firmware version,
740 which would otherwise go unused */
741 fw_len = (size_t) fw->version;
d777b84f 742 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
8892ea20
AE
743 if (inflate_buffer == NULL)
744 goto abort_with_zs;
745 zs.avail_in = fw->datasize;
746 zs.next_in = __DECONST(char *, fw->data);
747 zs.avail_out = fw_len;
748 zs.next_out = inflate_buffer;
749 status = inflate(&zs, Z_FINISH);
750 if (status != Z_STREAM_END) {
751 device_printf(sc->dev, "zlib %d\n", status);
752 status = EIO;
753 goto abort_with_buffer;
754 }
d83c779a 755
8892ea20
AE
756 /* check id */
757 hdr_offset = htobe32(*(const uint32_t *)
d83c779a 758 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
8892ea20
AE
759 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
760 device_printf(sc->dev, "Bad firmware file");
761 status = EIO;
d83c779a 762 goto abort_with_buffer;
8892ea20 763 }
d83c779a 764 hdr = (const void*)(inflate_buffer + hdr_offset);
8892ea20
AE
765
766 status = mxge_validate_firmware(sc, hdr);
767 if (status != 0)
d83c779a 768 goto abort_with_buffer;
8892ea20
AE
769
770 /* Copy the inflated firmware to NIC SRAM. */
771 for (i = 0; i < fw_len; i += 256) {
772 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
d83c779a 773 inflate_buffer + i,
8892ea20
AE
774 min(256U, (unsigned)(fw_len - i)));
775 wmb();
776 dummy = *sc->sram;
777 wmb();
778 }
779
780 *limit = fw_len;
781 status = 0;
782abort_with_buffer:
d777b84f 783 kfree(inflate_buffer, M_TEMP);
8892ea20
AE
784abort_with_zs:
785 inflateEnd(&zs);
786abort_with_fw:
d83c779a 787 firmware_put(fw, FIRMWARE_UNLOAD);
8892ea20
AE
788 return status;
789}
790
791/*
792 * Enable or disable periodic RDMAs from the host to make certain
793 * chipsets resend dropped PCIe messages
794 */
795
796static void
797mxge_dummy_rdma(mxge_softc_t *sc, int enable)
798{
799 char buf_bytes[72];
800 volatile uint32_t *confirm;
801 volatile char *submit;
802 uint32_t *buf, dma_low, dma_high;
803 int i;
804
805 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
806
807 /* clear confirmation addr */
808 confirm = (volatile uint32_t *)sc->cmd;
809 *confirm = 0;
810 wmb();
811
812 /* send an rdma command to the PCIe engine, and wait for the
813 response in the confirmation address. The firmware should
814 write a -1 there to indicate it is alive and well
815 */
816
817 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
818 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
819 buf[0] = htobe32(dma_high); /* confirm addr MSW */
820 buf[1] = htobe32(dma_low); /* confirm addr LSW */
821 buf[2] = htobe32(0xffffffff); /* confirm data */
822 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
823 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
824 buf[3] = htobe32(dma_high); /* dummy addr MSW */
825 buf[4] = htobe32(dma_low); /* dummy addr LSW */
826 buf[5] = htobe32(enable); /* enable? */
827
828
829 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
830
831 mxge_pio_copy(submit, buf, 64);
832 wmb();
833 DELAY(1000);
834 wmb();
835 i = 0;
836 while (*confirm != 0xffffffff && i < 20) {
837 DELAY(1000);
838 i++;
839 }
840 if (*confirm != 0xffffffff) {
841 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
842 (enable ? "enable" : "disable"), confirm,
843 *confirm);
844 }
845 return;
846}
847
848static int
849mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
850{
851 mcp_cmd_t *buf;
852 char buf_bytes[sizeof(*buf) + 8];
853 volatile mcp_cmd_response_t *response = sc->cmd;
854 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
855 uint32_t dma_low, dma_high;
856 int err, sleep_total = 0;
857
cd0543ff
AE
858 /*
859 * We may be called during attach, before if_serializer is available.
860 * This is not a fast path, just check for NULL
861 */
862
863 if (sc->ifp->if_serializer)
864 ASSERT_SERIALIZED(sc->ifp->if_serializer);
865
8892ea20
AE
866 /* ensure buf is aligned to 8 bytes */
867 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
868
869 buf->data0 = htobe32(data->data0);
870 buf->data1 = htobe32(data->data1);
871 buf->data2 = htobe32(data->data2);
872 buf->cmd = htobe32(cmd);
873 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
874 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
875
876 buf->response_addr.low = htobe32(dma_low);
877 buf->response_addr.high = htobe32(dma_high);
2e8181d0 878
2e8181d0 879
8892ea20
AE
880 response->result = 0xffffffff;
881 wmb();
882 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
883
884 /* wait up to 20ms */
885 err = EAGAIN;
886 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
887 bus_dmamap_sync(sc->cmd_dma.dmat,
888 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
889 wmb();
890 switch (be32toh(response->result)) {
891 case 0:
892 data->data0 = be32toh(response->data);
893 err = 0;
894 break;
895 case 0xffffffff:
896 DELAY(1000);
897 break;
898 case MXGEFW_CMD_UNKNOWN:
899 err = ENOSYS;
900 break;
901 case MXGEFW_CMD_ERROR_UNALIGNED:
902 err = E2BIG;
903 break;
904 case MXGEFW_CMD_ERROR_BUSY:
905 err = EBUSY;
906 break;
907 default:
908 device_printf(sc->dev,
909 "mxge: command %d "
910 "failed, result = %d\n",
911 cmd, be32toh(response->result));
912 err = ENXIO;
913 break;
914 }
915 if (err != EAGAIN)
916 break;
917 }
918 if (err == EAGAIN)
919 device_printf(sc->dev, "mxge: command %d timed out"
920 "result = %d\n",
921 cmd, be32toh(response->result));
8892ea20
AE
922 return err;
923}
924
925static int
926mxge_adopt_running_firmware(mxge_softc_t *sc)
927{
928 struct mcp_gen_header *hdr;
929 const size_t bytes = sizeof (struct mcp_gen_header);
930 size_t hdr_offset;
931 int status;
932
933 /* find running firmware header */
934 hdr_offset = htobe32(*(volatile uint32_t *)
935 (sc->sram + MCP_HEADER_PTR_OFFSET));
936
937 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
938 device_printf(sc->dev,
939 "Running firmware has bad header offset (%d)\n",
940 (int)hdr_offset);
941 return EIO;
942 }
943
944 /* copy header of running firmware from SRAM to host memory to
945 * validate firmware */
d777b84f 946 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
8892ea20 947 if (hdr == NULL) {
d777b84f 948 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
8892ea20
AE
949 return ENOMEM;
950 }
951 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
952 rman_get_bushandle(sc->mem_res),
953 hdr_offset, (char *)hdr, bytes);
954 status = mxge_validate_firmware(sc, hdr);
d777b84f 955 kfree(hdr, M_DEVBUF);
8892ea20
AE
956
957 /*
958 * check to see if adopted firmware has bug where adopting
959 * it will cause broadcasts to be filtered unless the NIC
960 * is kept in ALLMULTI mode
961 */
962 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
963 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
964 sc->adopted_rx_filter_bug = 1;
965 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
966 "working around rx filter bug\n",
967 sc->fw_ver_major, sc->fw_ver_minor,
968 sc->fw_ver_tiny);
969 }
970
971 return status;
972}
973
974
975static int
976mxge_load_firmware(mxge_softc_t *sc, int adopt)
977{
978 volatile uint32_t *confirm;
979 volatile char *submit;
980 char buf_bytes[72];
981 uint32_t *buf, size, dma_low, dma_high;
982 int status, i;
983
984 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
985
986 size = sc->sram_size;
987 status = mxge_load_firmware_helper(sc, &size);
988 if (status) {
989 if (!adopt)
990 return status;
991 /* Try to use the currently running firmware, if
992 it is new enough */
993 status = mxge_adopt_running_firmware(sc);
994 if (status) {
995 device_printf(sc->dev,
996 "failed to adopt running firmware\n");
997 return status;
998 }
999 device_printf(sc->dev,
1000 "Successfully adopted running firmware\n");
1001 if (sc->tx_boundary == 4096) {
1002 device_printf(sc->dev,
1003 "Using firmware currently running on NIC"
1004 ". For optimal\n");
1005 device_printf(sc->dev,
1006 "performance consider loading optimized "
1007 "firmware\n");
1008 }
1009 sc->fw_name = mxge_fw_unaligned;
1010 sc->tx_boundary = 2048;
1011 return 0;
1012 }
1013 /* clear confirmation addr */
1014 confirm = (volatile uint32_t *)sc->cmd;
1015 *confirm = 0;
1016 wmb();
1017 /* send a reload command to the bootstrap MCP, and wait for the
1018 response in the confirmation address. The firmware should
1019 write a -1 there to indicate it is alive and well
1020 */
1021
1022 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1023 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1024
1025 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1026 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1027 buf[2] = htobe32(0xffffffff); /* confirm data */
1028
1029 /* FIX: All newest firmware should un-protect the bottom of
1030 the sram before handoff. However, the very first interfaces
1031 do not. Therefore the handoff copy must skip the first 8 bytes
1032 */
1033 /* where the code starts*/
1034 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1035 buf[4] = htobe32(size - 8); /* length of code */
1036 buf[5] = htobe32(8); /* where to copy to */
1037 buf[6] = htobe32(0); /* where to jump to */
1038
1039 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1040 mxge_pio_copy(submit, buf, 64);
1041 wmb();
1042 DELAY(1000);
1043 wmb();
1044 i = 0;
1045 while (*confirm != 0xffffffff && i < 20) {
1046 DELAY(1000*10);
1047 i++;
1048 bus_dmamap_sync(sc->cmd_dma.dmat,
1049 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1050 }
1051 if (*confirm != 0xffffffff) {
1052 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1053 confirm, *confirm);
1054
1055 return ENXIO;
1056 }
1057 return 0;
1058}
1059
1060static int
1061mxge_update_mac_address(mxge_softc_t *sc)
1062{
1063 mxge_cmd_t cmd;
1064 uint8_t *addr = sc->mac_addr;
1065 int status;
1066
1067
1068 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1069 | (addr[2] << 8) | addr[3]);
1070
1071 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1072
1073 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1074 return status;
1075}
1076
1077static int
1078mxge_change_pause(mxge_softc_t *sc, int pause)
1079{
1080 mxge_cmd_t cmd;
1081 int status;
1082
1083 if (pause)
1084 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1085 &cmd);
1086 else
1087 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1088 &cmd);
1089
1090 if (status) {
1091 device_printf(sc->dev, "Failed to set flow control mode\n");
1092 return ENXIO;
1093 }
1094 sc->pause = pause;
1095 return 0;
1096}
1097
1098static void
1099mxge_change_promisc(mxge_softc_t *sc, int promisc)
1100{
1101 mxge_cmd_t cmd;
1102 int status;
1103
cd0543ff
AE
1104 if( sc->ifp->if_serializer)
1105 ASSERT_SERIALIZED(sc->ifp->if_serializer);
8892ea20
AE
1106 if (mxge_always_promisc)
1107 promisc = 1;
1108
1109 if (promisc)
1110 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1111 &cmd);
1112 else
1113 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1114 &cmd);
1115
1116 if (status) {
1117 device_printf(sc->dev, "Failed to set promisc mode\n");
1118 }
1119}
1120
1121static void
1122mxge_set_multicast_list(mxge_softc_t *sc)
1123{
1124 mxge_cmd_t cmd;
1125 struct ifmultiaddr *ifma;
1126 struct ifnet *ifp = sc->ifp;
1127 int err;
1128
cd0543ff
AE
1129 if (ifp->if_serializer)
1130 ASSERT_SERIALIZED(ifp->if_serializer);
1131
8892ea20
AE
1132 /* This firmware is known to not support multicast */
1133 if (!sc->fw_multicast_support)
1134 return;
1135
1136 /* Disable multicast filtering while we play with the lists*/
1137 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1138 if (err != 0) {
1139 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1140 " error status: %d\n", err);
1141 return;
1142 }
1143
1144 if (sc->adopted_rx_filter_bug)
1145 return;
1146
1147 if (ifp->if_flags & IFF_ALLMULTI)
1148 /* request to disable multicast filtering, so quit here */
1149 return;
1150
1151 /* Flush all the filters */
1152
1153 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1154 if (err != 0) {
1155 device_printf(sc->dev,
1156 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1157 ", error status: %d\n", err);
1158 return;
1159 }
1160
1161 /* Walk the multicast list, and add each address */
1162
441d34b2 1163 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
8892ea20
AE
1164 if (ifma->ifma_addr->sa_family != AF_LINK)
1165 continue;
1166 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1167 &cmd.data0, 4);
1168 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1169 &cmd.data1, 2);
1170 cmd.data0 = htonl(cmd.data0);
1171 cmd.data1 = htonl(cmd.data1);
1172 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1173 if (err != 0) {
1174 device_printf(sc->dev, "Failed "
1175 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1176 "%d\t", err);
1177 /* abort, leaving multicast filtering off */
8892ea20
AE
1178 return;
1179 }
1180 }
8892ea20
AE
1181 /* Enable multicast filtering */
1182 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1183 if (err != 0) {
1184 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1185 ", error status: %d\n", err);
1186 }
1187}
1188
1189static int
1190mxge_max_mtu(mxge_softc_t *sc)
1191{
1192 mxge_cmd_t cmd;
1193 int status;
1194
1195 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1196 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1197
1198 /* try to set nbufs to see if it we can
1199 use virtually contiguous jumbos */
1200 cmd.data0 = 0;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1202 &cmd);
1203 if (status == 0)
1204 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1205
1206 /* otherwise, we're limited to MJUMPAGESIZE */
1207 return MJUMPAGESIZE - MXGEFW_PAD;
1208}
1209
1210static int
1211mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1212{
1213 struct mxge_slice_state *ss;
1214 mxge_rx_done_t *rx_done;
1215 volatile uint32_t *irq_claim;
1216 mxge_cmd_t cmd;
1217 int slice, status;
1218
1219 /* try to send a reset command to the card to see if it
1220 is alive */
1221 memset(&cmd, 0, sizeof (cmd));
1222 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1223 if (status != 0) {
1224 device_printf(sc->dev, "failed reset\n");
1225 return ENXIO;
1226 }
1227
1228 mxge_dummy_rdma(sc, 1);
1229
1230
1231 /* set the intrq size */
1232 cmd.data0 = sc->rx_ring_size;
1233 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1234
1235 /*
1236 * Even though we already know how many slices are supported
1237 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1238 * has magic side effects, and must be called after a reset.
1239 * It must be called prior to calling any RSS related cmds,
1240 * including assigning an interrupt queue for anything but
1241 * slice 0. It must also be called *after*
1242 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1243 * the firmware to compute offsets.
1244 */
1245
1246 if (sc->num_slices > 1) {
1247 /* ask the maximum number of slices it supports */
1248 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1249 &cmd);
1250 if (status != 0) {
1251 device_printf(sc->dev,
1252 "failed to get number of slices\n");
1253 return status;
1254 }
1255 /*
1256 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1257 * to setting up the interrupt queue DMA
1258 */
1259 cmd.data0 = sc->num_slices;
1260 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1261#ifdef IFNET_BUF_RING
1262 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1263#endif
1264 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1265 &cmd);
1266 if (status != 0) {
1267 device_printf(sc->dev,
1268 "failed to set number of slices\n");
1269 return status;
1270 }
1271 }
1272
1273
1274 if (interrupts_setup) {
1275 /* Now exchange information about interrupts */
1276 for (slice = 0; slice < sc->num_slices; slice++) {
1277 rx_done = &sc->ss[slice].rx_done;
1278 memset(rx_done->entry, 0, sc->rx_ring_size);
1279 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1280 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1281 cmd.data2 = slice;
1282 status |= mxge_send_cmd(sc,
1283 MXGEFW_CMD_SET_INTRQ_DMA,
1284 &cmd);
1285 }
1286 }
1287
1288 status |= mxge_send_cmd(sc,
1289 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1290
1291
1292 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1293
1294 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1295 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1296
1297
1298 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1299 &cmd);
1300 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1301 if (status != 0) {
1302 device_printf(sc->dev, "failed set interrupt parameters\n");
1303 return status;
1304 }
1305
1306
1307 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1308
1309
1310 /* run a DMA benchmark */
1311 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1312
1313 for (slice = 0; slice < sc->num_slices; slice++) {
1314 ss = &sc->ss[slice];
1315
1316 ss->irq_claim = irq_claim + (2 * slice);
1317 /* reset mcp/driver shared state back to 0 */
1318 ss->rx_done.idx = 0;
1319 ss->rx_done.cnt = 0;
1320 ss->tx.req = 0;
1321 ss->tx.done = 0;
1322 ss->tx.pkt_done = 0;
1323 ss->tx.queue_active = 0;
1324 ss->tx.activate = 0;
1325 ss->tx.deactivate = 0;
1326 ss->tx.wake = 0;
1327 ss->tx.defrag = 0;
1328 ss->tx.stall = 0;
1329 ss->rx_big.cnt = 0;
1330 ss->rx_small.cnt = 0;
1331 ss->lro_bad_csum = 0;
1332 ss->lro_queued = 0;
1333 ss->lro_flushed = 0;
1334 if (ss->fw_stats != NULL) {
1335 ss->fw_stats->valid = 0;
1336 ss->fw_stats->send_done_count = 0;
1337 }
1338 }
1339 sc->rdma_tags_available = 15;
1340 status = mxge_update_mac_address(sc);
1341 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1342 mxge_change_pause(sc, sc->pause);
1343 mxge_set_multicast_list(sc);
1344 return status;
1345}
1346
1347static int
1348mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1349{
1350 mxge_softc_t *sc;
1351 unsigned int intr_coal_delay;
1352 int err;
1353
1354 sc = arg1;
1355 intr_coal_delay = sc->intr_coal_delay;
1356 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1357 if (err != 0) {
1358 return err;
1359 }
1360 if (intr_coal_delay == sc->intr_coal_delay)
1361 return 0;
1362
1363 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1364 return EINVAL;
1365
2e8181d0 1366 lwkt_serialize_enter(sc->ifp->if_serializer);
8892ea20
AE
1367 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1368 sc->intr_coal_delay = intr_coal_delay;
2e8181d0
AE
1369
1370 lwkt_serialize_exit(sc->ifp->if_serializer);
8892ea20
AE
1371 return err;
1372}
1373
1374static int
1375mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1376{
1377 mxge_softc_t *sc;
1378 unsigned int enabled;
1379 int err;
1380
1381 sc = arg1;
1382 enabled = sc->pause;
1383 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1384 if (err != 0) {
1385 return err;
1386 }
1387 if (enabled == sc->pause)
1388 return 0;
1389
2e8181d0 1390 lwkt_serialize_enter(sc->ifp->if_serializer);
8892ea20 1391 err = mxge_change_pause(sc, enabled);
2e8181d0 1392 lwkt_serialize_exit(sc->ifp->if_serializer);
8892ea20
AE
1393 return err;
1394}
1395
1396static int
1397mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1398{
1399 struct ifnet *ifp;
1400 int err = 0;
1401
1402 ifp = sc->ifp;
1403 if (lro_cnt == 0)
1404 ifp->if_capenable &= ~IFCAP_LRO;
1405 else
1406 ifp->if_capenable |= IFCAP_LRO;
1407 sc->lro_cnt = lro_cnt;
2ab1b8a9 1408 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
1409 mxge_close(sc);
1410 err = mxge_open(sc);
1411 }
1412 return err;
1413}
1414
1415static int
1416mxge_change_lro(SYSCTL_HANDLER_ARGS)
1417{
1418 mxge_softc_t *sc;
1419 unsigned int lro_cnt;
1420 int err;
1421
1422 sc = arg1;
1423 lro_cnt = sc->lro_cnt;
1424 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1425 if (err != 0)
1426 return err;
1427
1428 if (lro_cnt == sc->lro_cnt)
1429 return 0;
1430
1431 if (lro_cnt > 128)
1432 return EINVAL;
1433
2e8181d0 1434 lwkt_serialize_enter(sc->ifp->if_serializer);
8892ea20 1435 err = mxge_change_lro_locked(sc, lro_cnt);
2e8181d0 1436 lwkt_serialize_exit(sc->ifp->if_serializer);
8892ea20
AE
1437 return err;
1438}
1439
1440static int
1441mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1442{
1443 int err;
1444
1445 if (arg1 == NULL)
1446 return EFAULT;
1447 arg2 = be32toh(*(int *)arg1);
1448 arg1 = NULL;
1449 err = sysctl_handle_int(oidp, arg1, arg2, req);
1450
1451 return err;
1452}
1453
1454static void
1455mxge_rem_sysctls(mxge_softc_t *sc)
1456{
1457 struct mxge_slice_state *ss;
1458 int slice;
1459
1460 if (sc->slice_sysctl_tree == NULL)
1461 return;
1462
1463 for (slice = 0; slice < sc->num_slices; slice++) {
1464 ss = &sc->ss[slice];
1465 if (ss == NULL || ss->sysctl_tree == NULL)
1466 continue;
1467 sysctl_ctx_free(&ss->sysctl_ctx);
1468 ss->sysctl_tree = NULL;
1469 }
1470 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1471 sc->slice_sysctl_tree = NULL;
bbac37fb
AE
1472 sysctl_ctx_free(&sc->sysctl_ctx);
1473 sc->sysctl_tree = NULL;
1474
8892ea20
AE
1475}
1476
1477static void
1478mxge_add_sysctls(mxge_softc_t *sc)
1479{
1480 struct sysctl_ctx_list *ctx;
1481 struct sysctl_oid_list *children;
1482 mcp_irq_data_t *fw;
1483 struct mxge_slice_state *ss;
1484 int slice;
1485 char slice_num[8];
1486
b6737651
AE
1487 ctx = &sc->sysctl_ctx;
1488 sysctl_ctx_init(ctx);
1489 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1490 OID_AUTO,
1491 device_get_nameunit(sc->dev),
1492 CTLFLAG_RD, 0, "");
1493 if (sc->sysctl_tree == NULL) {
1494 device_printf(sc->dev, "can't add sysctl node\n");
1495 return;
1496 }
1497
1498 children = SYSCTL_CHILDREN(sc->sysctl_tree);
8892ea20
AE
1499 fw = sc->ss[0].fw_stats;
1500
1501 /* random information */
1502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1503 "firmware_version",
1504 CTLFLAG_RD, &sc->fw_version,
1505 0, "firmware version");
1506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1507 "serial_number",
1508 CTLFLAG_RD, &sc->serial_number_string,
1509 0, "serial number");
1510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1511 "product_code",
1512 CTLFLAG_RD, &sc->product_code_string,
1513 0, "product_code");
1514 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1515 "pcie_link_width",
1516 CTLFLAG_RD, &sc->link_width,
1517 0, "tx_boundary");
1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 "tx_boundary",
1520 CTLFLAG_RD, &sc->tx_boundary,
1521 0, "tx_boundary");
1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1523 "write_combine",
1524 CTLFLAG_RD, &sc->wc,
1525 0, "write combining PIO?");
1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1527 "read_dma_MBs",
1528 CTLFLAG_RD, &sc->read_dma,
1529 0, "DMA Read speed in MB/s");
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1531 "write_dma_MBs",
1532 CTLFLAG_RD, &sc->write_dma,
1533 0, "DMA Write speed in MB/s");
1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1535 "read_write_dma_MBs",
1536 CTLFLAG_RD, &sc->read_write_dma,
1537 0, "DMA concurrent Read/Write speed in MB/s");
1538
1539
1540 /* performance related tunables */
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "intr_coal_delay",
1543 CTLTYPE_INT|CTLFLAG_RW, sc,
1544 0, mxge_change_intr_coal,
1545 "I", "interrupt coalescing delay in usecs");
1546
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "flow_control_enabled",
1549 CTLTYPE_INT|CTLFLAG_RW, sc,
1550 0, mxge_change_flow_control,
1551 "I", "interrupt coalescing delay in usecs");
1552
1553 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1554 "deassert_wait",
1555 CTLFLAG_RW, &mxge_deassert_wait,
1556 0, "Wait for IRQ line to go low in ihandler");
1557
1558 /* stats block from firmware is in network byte order.
1559 Need to swap it */
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "link_up",
1562 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1563 0, mxge_handle_be32,
1564 "I", "link up");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "rdma_tags_available",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1568 0, mxge_handle_be32,
1569 "I", "rdma_tags_available");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_bad_crc32",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_bad_crc32,
1574 0, mxge_handle_be32,
1575 "I", "dropped_bad_crc32");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_bad_phy",
1578 CTLTYPE_INT|CTLFLAG_RD,
1579 &fw->dropped_bad_phy,
1580 0, mxge_handle_be32,
1581 "I", "dropped_bad_phy");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_link_error_or_filtered",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_link_error_or_filtered,
1586 0, mxge_handle_be32,
1587 "I", "dropped_link_error_or_filtered");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_link_overflow",
1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1591 0, mxge_handle_be32,
1592 "I", "dropped_link_overflow");
1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1594 "dropped_multicast_filtered",
1595 CTLTYPE_INT|CTLFLAG_RD,
1596 &fw->dropped_multicast_filtered,
1597 0, mxge_handle_be32,
1598 "I", "dropped_multicast_filtered");
1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1600 "dropped_no_big_buffer",
1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1602 0, mxge_handle_be32,
1603 "I", "dropped_no_big_buffer");
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_no_small_buffer",
1606 CTLTYPE_INT|CTLFLAG_RD,
1607 &fw->dropped_no_small_buffer,
1608 0, mxge_handle_be32,
1609 "I", "dropped_no_small_buffer");
1610 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1611 "dropped_overrun",
1612 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1613 0, mxge_handle_be32,
1614 "I", "dropped_overrun");
1615 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1616 "dropped_pause",
1617 CTLTYPE_INT|CTLFLAG_RD,
1618 &fw->dropped_pause,
1619 0, mxge_handle_be32,
1620 "I", "dropped_pause");
1621 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1622 "dropped_runt",
1623 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1624 0, mxge_handle_be32,
1625 "I", "dropped_runt");
1626
1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1628 "dropped_unicast_filtered",
1629 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1630 0, mxge_handle_be32,
1631 "I", "dropped_unicast_filtered");
1632
1633 /* verbose printing? */
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 "verbose",
1636 CTLFLAG_RW, &mxge_verbose,
1637 0, "verbose printing");
1638
1639 /* lro */
1640 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1641 "lro_cnt",
1642 CTLTYPE_INT|CTLFLAG_RW, sc,
1643 0, mxge_change_lro,
1644 "I", "number of lro merge queues");
1645
1646
1647 /* add counters exported for debugging from all slices */
1648 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1649 sc->slice_sysctl_tree =
1650 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1651 "slice", CTLFLAG_RD, 0, "");
1652
1653 for (slice = 0; slice < sc->num_slices; slice++) {
1654 ss = &sc->ss[slice];
1655 sysctl_ctx_init(&ss->sysctl_ctx);
1656 ctx = &ss->sysctl_ctx;
1657 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
b6737651 1658 ksprintf(slice_num, "%d", slice);
8892ea20
AE
1659 ss->sysctl_tree =
1660 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1661 CTLFLAG_RD, 0, "");
1662 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "rx_small_cnt",
1665 CTLFLAG_RD, &ss->rx_small.cnt,
1666 0, "rx_small_cnt");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "rx_big_cnt",
1669 CTLFLAG_RD, &ss->rx_big.cnt,
1670 0, "rx_small_cnt");
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1673 0, "number of lro merge queues flushed");
1674
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1677 0, "number of frames appended to lro merge"
1678 "queues");
1679
1680#ifndef IFNET_BUF_RING
1681 /* only transmit from slice 0 for now */
1682 if (slice > 0)
1683 continue;
1684#endif
1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1686 "tx_req",
1687 CTLFLAG_RD, &ss->tx.req,
1688 0, "tx_req");
1689
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "tx_done",
1692 CTLFLAG_RD, &ss->tx.done,
1693 0, "tx_done");
1694 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1695 "tx_pkt_done",
1696 CTLFLAG_RD, &ss->tx.pkt_done,
1697 0, "tx_done");
1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1699 "tx_stall",
1700 CTLFLAG_RD, &ss->tx.stall,
1701 0, "tx_stall");
1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1703 "tx_wake",
1704 CTLFLAG_RD, &ss->tx.wake,
1705 0, "tx_wake");
1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 "tx_defrag",
1708 CTLFLAG_RD, &ss->tx.defrag,
1709 0, "tx_defrag");
1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 "tx_queue_active",
1712 CTLFLAG_RD, &ss->tx.queue_active,
1713 0, "tx_queue_active");
1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1715 "tx_activate",
1716 CTLFLAG_RD, &ss->tx.activate,
1717 0, "tx_activate");
1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1719 "tx_deactivate",
1720 CTLFLAG_RD, &ss->tx.deactivate,
1721 0, "tx_deactivate");
1722 }
1723}
1724
1725/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1726 backwards one at a time and handle ring wraps */
1727
1728static inline void
1729mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1730 mcp_kreq_ether_send_t *src, int cnt)
1731{
1732 int idx, starting_slot;
1733 starting_slot = tx->req;
1734 while (cnt > 1) {
1735 cnt--;
1736 idx = (starting_slot + cnt) & tx->mask;
1737 mxge_pio_copy(&tx->lanai[idx],
1738 &src[cnt], sizeof(*src));
1739 wmb();
1740 }
1741}
1742
1743/*
1744 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1745 * at most 32 bytes at a time, so as to avoid involving the software
1746 * pio handler in the nic. We re-write the first segment's flags
1747 * to mark them valid only after writing the entire chain
1748 */
1749
1750static inline void
1751mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1752 int cnt)
1753{
1754 int idx, i;
1755 uint32_t *src_ints;
1756 volatile uint32_t *dst_ints;
1757 mcp_kreq_ether_send_t *srcp;
1758 volatile mcp_kreq_ether_send_t *dstp, *dst;
1759 uint8_t last_flags;
1760
1761 idx = tx->req & tx->mask;
1762
1763 last_flags = src->flags;
1764 src->flags = 0;
1765 wmb();
1766 dst = dstp = &tx->lanai[idx];
1767 srcp = src;
1768
1769 if ((idx + cnt) < tx->mask) {
1770 for (i = 0; i < (cnt - 1); i += 2) {
1771 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1772 wmb(); /* force write every 32 bytes */
1773 srcp += 2;
1774 dstp += 2;
1775 }
1776 } else {
1777 /* submit all but the first request, and ensure
1778 that it is submitted below */
1779 mxge_submit_req_backwards(tx, src, cnt);
1780 i = 0;
1781 }
1782 if (i < cnt) {
1783 /* submit the first request */
1784 mxge_pio_copy(dstp, srcp, sizeof(*src));
1785 wmb(); /* barrier before setting valid flag */
1786 }
1787
1788 /* re-write the last 32-bits with the valid flags */
1789 src->flags = last_flags;
1790 src_ints = (uint32_t *)src;
1791 src_ints+=3;
1792 dst_ints = (volatile uint32_t *)dst;
1793 dst_ints+=3;
1794 *dst_ints = *src_ints;
1795 tx->req += cnt;
1796 wmb();
1797}
1798
1799#if IFCAP_TSO4
1800
1801static void
1802mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1803 int busdma_seg_cnt, int ip_off)
1804{
1805 mxge_tx_ring_t *tx;
1806 mcp_kreq_ether_send_t *req;
1807 bus_dma_segment_t *seg;
1808 struct ip *ip;
1809 struct tcphdr *tcp;
1810 uint32_t low, high_swapped;
1811 int len, seglen, cum_len, cum_len_next;
1812 int next_is_first, chop, cnt, rdma_count, small;
1813 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1814 uint8_t flags, flags_next;
1815 static int once;
1816
1817 mss = m->m_pkthdr.tso_segsz;
1818
1819 /* negative cum_len signifies to the
1820 * send loop that we are still in the
1821 * header portion of the TSO packet.
1822 */
1823
1824 /* ensure we have the ethernet, IP and TCP
1825 header together in the first mbuf, copy
1826 it to a scratch buffer if not */
1827 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1828 m_copydata(m, 0, ip_off + sizeof (*ip),
1829 ss->scratch);
1830 ip = (struct ip *)(ss->scratch + ip_off);
1831 } else {
1832 ip = (struct ip *)(mtod(m, char *) + ip_off);
1833 }
1834 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1835 + sizeof (*tcp))) {
1836 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1837 + sizeof (*tcp), ss->scratch);
1838 ip = (struct ip *)(mtod(m, char *) + ip_off);
1839 }
1840
1841 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1842 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1843
1844 /* TSO implies checksum offload on this hardware */
1845 cksum_offset = ip_off + (ip->ip_hl << 2);
1846 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1847
1848
1849 /* for TSO, pseudo_hdr_offset holds mss.
1850 * The firmware figures out where to put
1851 * the checksum by parsing the header. */
1852 pseudo_hdr_offset = htobe16(mss);
1853
1854 tx = &ss->tx;
1855 req = tx->req_list;
1856 seg = tx->seg_list;
1857 cnt = 0;
1858 rdma_count = 0;
1859 /* "rdma_count" is the number of RDMAs belonging to the
1860 * current packet BEFORE the current send request. For
1861 * non-TSO packets, this is equal to "count".
1862 * For TSO packets, rdma_count needs to be reset
1863 * to 0 after a segment cut.
1864 *
1865 * The rdma_count field of the send request is
1866 * the number of RDMAs of the packet starting at
1867 * that request. For TSO send requests with one ore more cuts
1868 * in the middle, this is the number of RDMAs starting
1869 * after the last cut in the request. All previous
1870 * segments before the last cut implicitly have 1 RDMA.
1871 *
1872 * Since the number of RDMAs is not known beforehand,
1873 * it must be filled-in retroactively - after each
1874 * segmentation cut or at the end of the entire packet.
1875 */
1876
1877 while (busdma_seg_cnt) {
1878 /* Break the busdma segment up into pieces*/
1879 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1880 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1881 len = seg->ds_len;
1882
1883 while (len) {
1884 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1885 seglen = len;
1886 cum_len_next = cum_len + seglen;
1887 (req-rdma_count)->rdma_count = rdma_count + 1;
1888 if (__predict_true(cum_len >= 0)) {
1889 /* payload */
1890 chop = (cum_len_next > mss);
1891 cum_len_next = cum_len_next % mss;
1892 next_is_first = (cum_len_next == 0);
1893 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1894 flags_next |= next_is_first *
1895 MXGEFW_FLAGS_FIRST;
1896 rdma_count |= -(chop | next_is_first);
1897 rdma_count += chop & !next_is_first;
1898 } else if (cum_len_next >= 0) {
1899 /* header ends */
1900 rdma_count = -1;
1901 cum_len_next = 0;
1902 seglen = -cum_len;
1903 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1904 flags_next = MXGEFW_FLAGS_TSO_PLD |
1905 MXGEFW_FLAGS_FIRST |
1906 (small * MXGEFW_FLAGS_SMALL);
1907 }
1908
1909 req->addr_high = high_swapped;
1910 req->addr_low = htobe32(low);
1911 req->pseudo_hdr_offset = pseudo_hdr_offset;
1912 req->pad = 0;
1913 req->rdma_count = 1;
1914 req->length = htobe16(seglen);
1915 req->cksum_offset = cksum_offset;
1916 req->flags = flags | ((cum_len & 1) *
1917 MXGEFW_FLAGS_ALIGN_ODD);
1918 low += seglen;
1919 len -= seglen;
1920 cum_len = cum_len_next;
1921 flags = flags_next;
1922 req++;
1923 cnt++;
1924 rdma_count++;
1925 if (__predict_false(cksum_offset > seglen))
1926 cksum_offset -= seglen;
1927 else
1928 cksum_offset = 0;
1929 if (__predict_false(cnt > tx->max_desc))
1930 goto drop;
1931 }
1932 busdma_seg_cnt--;
1933 seg++;
1934 }
1935 (req-rdma_count)->rdma_count = rdma_count;
1936
1937 do {
1938 req--;
1939 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1940 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1941
1942 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1943 mxge_submit_req(tx, tx->req_list, cnt);
1944#ifdef IFNET_BUF_RING
1945 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1946 /* tell the NIC to start polling this slice */
1947 *tx->send_go = 1;
1948 tx->queue_active = 1;
1949 tx->activate++;
1950 wmb();
1951 }
1952#endif
1953 return;
1954
1955drop:
1956 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1957 m_freem(m);
1958 ss->oerrors++;
1959 if (!once) {
6c348da6
AE
1960 kprintf("tx->max_desc exceeded via TSO!\n");
1961 kprintf("mss = %d, %ld, %d!\n", mss,
8892ea20
AE
1962 (long)seg - (long)tx->seg_list, tx->max_desc);
1963 once = 1;
1964 }
1965 return;
1966
1967}
1968
1969#endif /* IFCAP_TSO4 */
1970
1971#ifdef MXGE_NEW_VLAN_API
1972/*
1973 * We reproduce the software vlan tag insertion from
1974 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1975 * vlan tag insertion. We need to advertise this in order to have the
1976 * vlan interface respect our csum offload flags.
1977 */
1978static struct mbuf *
1979mxge_vlan_tag_insert(struct mbuf *m)
1980{
1981 struct ether_vlan_header *evl;
1982
b915556e 1983 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT);
8892ea20
AE
1984 if (__predict_false(m == NULL))
1985 return NULL;
1986 if (m->m_len < sizeof(*evl)) {
1987 m = m_pullup(m, sizeof(*evl));
1988 if (__predict_false(m == NULL))
1989 return NULL;
1990 }
1991 /*
1992 * Transform the Ethernet header into an Ethernet header
1993 * with 802.1Q encapsulation.
1994 */
1995 evl = mtod(m, struct ether_vlan_header *);
b915556e 1996 bcopy((char *)evl + EVL_ENCAPLEN,
8892ea20
AE
1997 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1998 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
b915556e 1999 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
8892ea20
AE
2000 m->m_flags &= ~M_VLANTAG;
2001 return m;
2002}
2003#endif /* MXGE_NEW_VLAN_API */
2004
2005static void
2006mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2007{
2008 mxge_softc_t *sc;
2009 mcp_kreq_ether_send_t *req;
2010 bus_dma_segment_t *seg;
2011 struct mbuf *m_tmp;
2012 struct ifnet *ifp;
2013 mxge_tx_ring_t *tx;
2014 struct ip *ip;
2015 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2016 uint16_t pseudo_hdr_offset;
2017 uint8_t flags, cksum_offset;
2018
2019
2020 sc = ss->sc;
2021 ifp = sc->ifp;
2022 tx = &ss->tx;
2023
2024 ip_off = sizeof (struct ether_header);
2025#ifdef MXGE_NEW_VLAN_API
2026 if (m->m_flags & M_VLANTAG) {
2027 m = mxge_vlan_tag_insert(m);
2028 if (__predict_false(m == NULL))
2029 goto drop;
b915556e 2030 ip_off += EVL_ENCAPLEN;
8892ea20
AE
2031 }
2032#endif
2033 /* (try to) map the frame for DMA */
2034 idx = tx->req & tx->mask;
7d8771d4
AE
2035 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map,
2036 m, tx->seg_list, 1, &cnt,
2037 BUS_DMA_NOWAIT);
8892ea20
AE
2038 if (__predict_false(err == EFBIG)) {
2039 /* Too many segments in the chain. Try
2040 to defrag */
d776df92 2041 m_tmp = m_defrag(m, MB_DONTWAIT);
8892ea20
AE
2042 if (m_tmp == NULL) {
2043 goto drop;
2044 }
2045 ss->tx.defrag++;
2046 m = m_tmp;
7d8771d4 2047 err = bus_dmamap_load_mbuf_segment(tx->dmat,
8892ea20 2048 tx->info[idx].map,
7d8771d4 2049 m, tx->seg_list, 1, &cnt,
8892ea20
AE
2050 BUS_DMA_NOWAIT);
2051 }
2052 if (__predict_false(err != 0)) {
7d8771d4 2053 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d"
8892ea20
AE
2054 " packet len = %d\n", err, m->m_pkthdr.len);
2055 goto drop;
2056 }
2057 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2058 BUS_DMASYNC_PREWRITE);
2059 tx->info[idx].m = m;
2060
2061#if IFCAP_TSO4
2062 /* TSO is different enough, we handle it in another routine */
2063 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2064 mxge_encap_tso(ss, m, cnt, ip_off);
2065 return;
2066 }
2067#endif
2068
2069 req = tx->req_list;
2070 cksum_offset = 0;
2071 pseudo_hdr_offset = 0;
2072 flags = MXGEFW_FLAGS_NO_TSO;
2073
2074 /* checksum offloading? */
2075 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2076 /* ensure ip header is in first mbuf, copy
2077 it to a scratch buffer if not */
2078 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2079 m_copydata(m, 0, ip_off + sizeof (*ip),
2080 ss->scratch);
2081 ip = (struct ip *)(ss->scratch + ip_off);
2082 } else {
2083 ip = (struct ip *)(mtod(m, char *) + ip_off);
2084 }
2085 cksum_offset = ip_off + (ip->ip_hl << 2);
2086 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2087 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2088 req->cksum_offset = cksum_offset;
2089 flags |= MXGEFW_FLAGS_CKSUM;
2090 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2091 } else {
2092 odd_flag = 0;
2093 }
2094 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2095 flags |= MXGEFW_FLAGS_SMALL;
2096
2097 /* convert segments into a request list */
2098 cum_len = 0;
2099 seg = tx->seg_list;
2100 req->flags = MXGEFW_FLAGS_FIRST;
2101 for (i = 0; i < cnt; i++) {
2102 req->addr_low =
2103 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2104 req->addr_high =
2105 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2106 req->length = htobe16(seg->ds_len);
2107 req->cksum_offset = cksum_offset;
2108 if (cksum_offset > seg->ds_len)
2109 cksum_offset -= seg->ds_len;
2110 else
2111 cksum_offset = 0;
2112 req->pseudo_hdr_offset = pseudo_hdr_offset;
2113 req->pad = 0; /* complete solid 16-byte block */
2114 req->rdma_count = 1;
2115 req->flags |= flags | ((cum_len & 1) * odd_flag);
2116 cum_len += seg->ds_len;
2117 seg++;
2118 req++;
2119 req->flags = 0;
2120 }
2121 req--;
2122 /* pad runts to 60 bytes */
2123 if (cum_len < 60) {
2124 req++;
2125 req->addr_low =
2126 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2127 req->addr_high =
2128 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2129 req->length = htobe16(60 - cum_len);
2130 req->cksum_offset = 0;
2131 req->pseudo_hdr_offset = pseudo_hdr_offset;
2132 req->pad = 0; /* complete solid 16-byte block */
2133 req->rdma_count = 1;
2134 req->flags |= flags | ((cum_len & 1) * odd_flag);
2135 cnt++;
2136 }
2137
2138 tx->req_list[0].rdma_count = cnt;
2139#if 0
2140 /* print what the firmware will see */
2141 for (i = 0; i < cnt; i++) {
6c348da6 2142 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
8892ea20
AE
2143 "cso:%d, flags:0x%x, rdma:%d\n",
2144 i, (int)ntohl(tx->req_list[i].addr_high),
2145 (int)ntohl(tx->req_list[i].addr_low),
2146 (int)ntohs(tx->req_list[i].length),
2147 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2148 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2149 tx->req_list[i].rdma_count);
2150 }
6c348da6 2151 kprintf("--------------\n");
8892ea20
AE
2152#endif
2153 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2154 mxge_submit_req(tx, tx->req_list, cnt);
2155#ifdef IFNET_BUF_RING
2156 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2157 /* tell the NIC to start polling this slice */
2158 *tx->send_go = 1;
2159 tx->queue_active = 1;
2160 tx->activate++;
2161 wmb();
2162 }
2163#endif
2164 return;
2165
2166drop:
2167 m_freem(m);
2168 ss->oerrors++;
2169 return;
2170}
2171
8892ea20
AE
2172static inline void
2173mxge_start_locked(struct mxge_slice_state *ss)
2174{
2175 mxge_softc_t *sc;
2176 struct mbuf *m;
2177 struct ifnet *ifp;
2178 mxge_tx_ring_t *tx;
2179
2180 sc = ss->sc;
2181 ifp = sc->ifp;
2182 tx = &ss->tx;
2183 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
f2f758df 2184 m = ifq_dequeue(&ifp->if_snd, NULL);
8892ea20
AE
2185 if (m == NULL) {
2186 return;
2187 }
2188 /* let BPF see it */
2189 BPF_MTAP(ifp, m);
2190
2191 /* give it to the nic */
2192 mxge_encap(ss, m);
2193 }
2194 /* ran out of transmit slots */
9ed293e0
SZ
2195 if (!ifq_is_oactive(&ifp->if_snd)) {
2196 ifq_set_oactive(&ifp->if_snd);
8892ea20
AE
2197 tx->stall++;
2198 }
2199}
9ed293e0 2200
8892ea20
AE
2201static void
2202mxge_start(struct ifnet *ifp)
2203{
2204 mxge_softc_t *sc = ifp->if_softc;
2205 struct mxge_slice_state *ss;
2206
cd0543ff 2207 ASSERT_SERIALIZED(sc->ifp->if_serializer);
8892ea20
AE
2208 /* only use the first slice for now */
2209 ss = &sc->ss[0];
8892ea20 2210 mxge_start_locked(ss);
8892ea20
AE
2211}
2212
2213/*
2214 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2215 * at most 32 bytes at a time, so as to avoid involving the software
2216 * pio handler in the nic. We re-write the first segment's low
2217 * DMA address to mark it valid only after we write the entire chunk
2218 * in a burst
2219 */
2220static inline void
2221mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2222 mcp_kreq_ether_recv_t *src)
2223{
2224 uint32_t low;
2225
2226 low = src->addr_low;
2227 src->addr_low = 0xffffffff;
2228 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2229 wmb();
2230 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2231 wmb();
2232 src->addr_low = low;
2233 dst->addr_low = low;
2234 wmb();
2235}
2236
2237static int
2238mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2239{
2240 bus_dma_segment_t seg;
2241 struct mbuf *m;
2242 mxge_rx_ring_t *rx = &ss->rx_small;
2243 int cnt, err;
2244
17eb0737 2245 m = m_gethdr(MB_DONTWAIT, MT_DATA);
8892ea20
AE
2246 if (m == NULL) {
2247 rx->alloc_fail++;
2248 err = ENOBUFS;
2249 goto done;
2250 }
2823b018 2251 m->m_len = m->m_pkthdr.len = MHLEN;
7d8771d4
AE
2252 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2253 &seg, 1, &cnt, BUS_DMA_NOWAIT);
8892ea20 2254 if (err != 0) {
2823b018 2255 kprintf("can't dmamap small (%d)\n", err);
8892ea20
AE
2256 m_free(m);
2257 goto done;
2258 }
2259 rx->info[idx].m = m;
2260 rx->shadow[idx].addr_low =
2261 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2262 rx->shadow[idx].addr_high =
2263 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2264
2265done:
2266 if ((idx & 7) == 7)
2267 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2268 return err;
2269}
2270
87353c03 2271
8892ea20
AE
2272static int
2273mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2274{
2275 bus_dma_segment_t seg[3];
2276 struct mbuf *m;
2277 mxge_rx_ring_t *rx = &ss->rx_big;
2278 int cnt, err, i;
2279
2280 if (rx->cl_size == MCLBYTES)
17eb0737 2281 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
0a61435d
AE
2282 else {
2283#if 0
17eb0737 2284 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
0a61435d 2285#else
5f2b9102
AE
2286 /*
2287 * XXX: allocate normal sized buffers for big buffers.
2288 * We should be fine as long as we don't get any jumbo frames
2289 */
0a61435d
AE
2290 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2291#endif
2292 }
8892ea20
AE
2293 if (m == NULL) {
2294 rx->alloc_fail++;
2295 err = ENOBUFS;
2296 goto done;
2297 }
2823b018
AE
2298 m->m_pkthdr.len = 0;
2299 m->m_len = m->m_pkthdr.len = rx->mlen;
7d8771d4
AE
2300 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2301 seg, 1, &cnt, BUS_DMA_NOWAIT);
8892ea20 2302 if (err != 0) {
2823b018 2303 kprintf("can't dmamap big (%d)\n", err);
8892ea20
AE
2304 m_free(m);
2305 goto done;
2306 }
2307 rx->info[idx].m = m;
2308 rx->shadow[idx].addr_low =
2309 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2310 rx->shadow[idx].addr_high =
2311 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2312
2313#if MXGE_VIRT_JUMBOS
2314 for (i = 1; i < cnt; i++) {
2315 rx->shadow[idx + i].addr_low =
2316 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2317 rx->shadow[idx + i].addr_high =
2318 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2319 }
2320#endif
2321
2322done:
2323 for (i = 0; i < rx->nbufs; i++) {
2324 if ((idx & 7) == 7) {
2325 mxge_submit_8rx(&rx->lanai[idx - 7],
2326 &rx->shadow[idx - 7]);
2327 }
2328 idx++;
2329 }
2330 return err;
2331}
2332
2333/*
2334 * Myri10GE hardware checksums are not valid if the sender
2335 * padded the frame with non-zero padding. This is because
2336 * the firmware just does a simple 16-bit 1s complement
2337 * checksum across the entire frame, excluding the first 14
2338 * bytes. It is best to simply to check the checksum and
2339 * tell the stack about it only if the checksum is good
2340 */
2341
2342static inline uint16_t
2343mxge_rx_csum(struct mbuf *m, int csum)
2344{
2345 struct ether_header *eh;
2346 struct ip *ip;
2347 uint16_t c;
2348
2349 eh = mtod(m, struct ether_header *);
2350
2351 /* only deal with IPv4 TCP & UDP for now */
2352 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2353 return 1;
2354 ip = (struct ip *)(eh + 1);
2355 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2356 ip->ip_p != IPPROTO_UDP))
2357 return 1;
2358#ifdef INET
2359 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2360 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2361 - (ip->ip_hl << 2) + ip->ip_p));
2362#else
2363 c = 1;
2364#endif
2365 c ^= 0xffff;
2366 return (c);
2367}
2368
2369static void
2370mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2371{
2372 struct ether_vlan_header *evl;
2373 struct ether_header *eh;
2374 uint32_t partial;
2375
2376 evl = mtod(m, struct ether_vlan_header *);
2377 eh = mtod(m, struct ether_header *);
2378
2379 /*
b915556e 2380 * fix checksum by subtracting EVL_ENCAPLEN bytes
8892ea20
AE
2381 * after what the firmware thought was the end of the ethernet
2382 * header.
2383 */
2384
2385 /* put checksum into host byte order */
2386 *csum = ntohs(*csum);
2387 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2388 (*csum) += ~partial;
2389 (*csum) += ((*csum) < ~partial);
2390 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2391 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2392
2393 /* restore checksum to network byte order;
2394 later consumers expect this */
2395 *csum = htons(*csum);
2396
2397 /* save the tag */
2398#ifdef MXGE_NEW_VLAN_API
b915556e 2399 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
8892ea20
AE
2400#else
2401 {
2402 struct m_tag *mtag;
2403 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
b915556e 2404 MB_DONTWAIT);
8892ea20
AE
2405 if (mtag == NULL)
2406 return;
2407 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2408 m_tag_prepend(m, mtag);
2409 }
2410
2411#endif
2412 m->m_flags |= M_VLANTAG;
2413
2414 /*
2415 * Remove the 802.1q header by copying the Ethernet
2416 * addresses over it and adjusting the beginning of
2417 * the data in the mbuf. The encapsulated Ethernet
2418 * type field is already in place.
2419 */
b915556e 2420 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
8892ea20 2421 ETHER_HDR_LEN - ETHER_TYPE_LEN);
b915556e 2422 m_adj(m, EVL_ENCAPLEN);
8892ea20
AE
2423}
2424
2425
2426static inline void
eda7db08 2427mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
8892ea20
AE
2428{
2429 mxge_softc_t *sc;
2430 struct ifnet *ifp;
2431 struct mbuf *m;
2432 struct ether_header *eh;
2433 mxge_rx_ring_t *rx;
2434 bus_dmamap_t old_map;
2435 int idx;
2436 uint16_t tcpudp_csum;
2437
2438 sc = ss->sc;
2439 ifp = sc->ifp;
2440 rx = &ss->rx_big;
2441 idx = rx->cnt & rx->mask;
2442 rx->cnt += rx->nbufs;
2443 /* save a pointer to the received mbuf */
2444 m = rx->info[idx].m;
2445 /* try to replace the received mbuf */
2446 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2447 /* drop the frame -- the old mbuf is re-cycled */
2448 ifp->if_ierrors++;
2449 return;
2450 }
2451
2452 /* unmap the received buffer */
2453 old_map = rx->info[idx].map;
2454 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2455 bus_dmamap_unload(rx->dmat, old_map);
2456
2457 /* swap the bus_dmamap_t's */
2458 rx->info[idx].map = rx->extra_map;
2459 rx->extra_map = old_map;
2460
2461 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2462 * aligned */
2463 m->m_data += MXGEFW_PAD;
2464
2465 m->m_pkthdr.rcvif = ifp;
2466 m->m_len = m->m_pkthdr.len = len;
2467 ss->ipackets++;
2468 eh = mtod(m, struct ether_header *);
2469 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2470 mxge_vlan_tag_remove(m, &csum);
2471 }
2472 /* if the checksum is valid, mark it in the mbuf header */
2473 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2474 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2475 return;
2476 /* otherwise, it was a UDP frame, or a TCP frame which
2477 we could not do LRO on. Tell the stack that the
2478 checksum is good */
2479 m->m_pkthdr.csum_data = 0xffff;
2480 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2481 }
deef6e3e 2482#if 0
8892ea20
AE
2483 /* flowid only valid if RSS hashing is enabled */
2484 if (sc->num_slices > 1) {
2485 m->m_pkthdr.flowid = (ss - sc->ss);
2486 m->m_flags |= M_FLOWID;
2487 }
deef6e3e 2488#endif
eda7db08 2489 ifp->if_input(ifp, m);
8892ea20
AE
2490}
2491
2492static inline void
eda7db08 2493mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
8892ea20
AE
2494{
2495 mxge_softc_t *sc;
2496 struct ifnet *ifp;
2497 struct ether_header *eh;
2498 struct mbuf *m;
2499 mxge_rx_ring_t *rx;
2500 bus_dmamap_t old_map;
2501 int idx;
2502 uint16_t tcpudp_csum;
2503
2504 sc = ss->sc;
2505 ifp = sc->ifp;
2506 rx = &ss->rx_small;
2507 idx = rx->cnt & rx->mask;
2508 rx->cnt++;
2509 /* save a pointer to the received mbuf */
2510 m = rx->info[idx].m;
2511 /* try to replace the received mbuf */
2512 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2513 /* drop the frame -- the old mbuf is re-cycled */
2514 ifp->if_ierrors++;
2515 return;
2516 }
2517
2518 /* unmap the received buffer */
2519 old_map = rx->info[idx].map;
2520 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2521 bus_dmamap_unload(rx->dmat, old_map);
2522
2523 /* swap the bus_dmamap_t's */
2524 rx->info[idx].map = rx->extra_map;
2525 rx->extra_map = old_map;
2526
2527 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2528 * aligned */
2529 m->m_data += MXGEFW_PAD;
2530
2531 m->m_pkthdr.rcvif = ifp;
2532 m->m_len = m->m_pkthdr.len = len;
2533 ss->ipackets++;
2534 eh = mtod(m, struct ether_header *);
2535 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2536 mxge_vlan_tag_remove(m, &csum);
2537 }
2538 /* if the checksum is valid, mark it in the mbuf header */
2539 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2540 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2541 return;
2542 /* otherwise, it was a UDP frame, or a TCP frame which
2543 we could not do LRO on. Tell the stack that the
2544 checksum is good */
2545 m->m_pkthdr.csum_data = 0xffff;
2546 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2547 }
deef6e3e 2548#if 0
8892ea20
AE
2549 /* flowid only valid if RSS hashing is enabled */
2550 if (sc->num_slices > 1) {
2551 m->m_pkthdr.flowid = (ss - sc->ss);
2552 m->m_flags |= M_FLOWID;
2553 }
deef6e3e 2554#endif
eda7db08 2555 ifp->if_input(ifp, m);
8892ea20
AE
2556}
2557
a1a82435
SW
2558/*
2559 * XXX
2560 *
2561 * Inlining the call to this function causes mxge_intr() to grow too large
2562 * for GCC's stack size limits (which shouldn't take into account inlining
2563 * of leaf functions at one call site anyway). Inlining is definitely a
2564 * good idea in this case though, so mark the function appropriately.
2565 */
00008747 2566static inline __always_inline void
8892ea20
AE
2567mxge_clean_rx_done(struct mxge_slice_state *ss)
2568{
2569 mxge_rx_done_t *rx_done = &ss->rx_done;
2570 int limit = 0;
2571 uint16_t length;
2572 uint16_t checksum;
2573
8892ea20
AE
2574 while (rx_done->entry[rx_done->idx].length != 0) {
2575 length = ntohs(rx_done->entry[rx_done->idx].length);
2576 rx_done->entry[rx_done->idx].length = 0;
2577 checksum = rx_done->entry[rx_done->idx].checksum;
2578 if (length <= (MHLEN - MXGEFW_PAD))
eda7db08 2579 mxge_rx_done_small(ss, length, checksum);
8892ea20 2580 else
eda7db08 2581 mxge_rx_done_big(ss, length, checksum);
8892ea20
AE
2582 rx_done->cnt++;
2583 rx_done->idx = rx_done->cnt & rx_done->mask;
2584
2585 /* limit potential for livelock */
2586 if (__predict_false(++limit > rx_done->mask / 2))
2587 break;
2588 }
2589#ifdef INET
2590 while (!SLIST_EMPTY(&ss->lro_active)) {
2591 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2592 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2593 mxge_lro_flush(ss, lro);
2594 }
2595#endif
2596}
2597
2598
2599static inline void
2600mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2601{
2602 struct ifnet *ifp;
2603 mxge_tx_ring_t *tx;
2604 struct mbuf *m;
2605 bus_dmamap_t map;
2606 int idx;
8892ea20
AE
2607
2608 tx = &ss->tx;
2609 ifp = ss->sc->ifp;
cd0543ff 2610 ASSERT_SERIALIZED(ifp->if_serializer);
8892ea20
AE
2611 while (tx->pkt_done != mcp_idx) {
2612 idx = tx->done & tx->mask;
2613 tx->done++;
2614 m = tx->info[idx].m;
2615 /* mbuf and DMA map only attached to the first
2616 segment per-mbuf */
2617 if (m != NULL) {
2618 ss->obytes += m->m_pkthdr.len;
2619 if (m->m_flags & M_MCAST)
2620 ss->omcasts++;
2621 ss->opackets++;
2622 tx->info[idx].m = NULL;
2623 map = tx->info[idx].map;
2624 bus_dmamap_unload(tx->dmat, map);
2625 m_freem(m);
2626 }
2627 if (tx->info[idx].flag) {
2628 tx->info[idx].flag = 0;
2629 tx->pkt_done++;
2630 }
2631 }
2632
9ed293e0 2633 /* If we have space, clear OACTIVE to tell the stack that
8892ea20 2634 its OK to send packets */
9ed293e0 2635 if (ifq_is_oactive(&ifp->if_snd) &&
8892ea20 2636 tx->req - tx->done < (tx->mask + 1)/4) {
9ed293e0 2637 ifq_clr_oactive(&ifp->if_snd);
8892ea20
AE
2638 ss->tx.wake++;
2639 mxge_start_locked(ss);
2640 }
2641#ifdef IFNET_BUF_RING
2642 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2643 /* let the NIC stop polling this queue, since there
2644 * are no more transmits pending */
2645 if (tx->req == tx->done) {
2646 *tx->send_stop = 1;
2647 tx->queue_active = 0;
2648 tx->deactivate++;
2649 wmb();
2650 }
2651 }
2652#endif
8892ea20
AE
2653
2654}
2655
2656static struct mxge_media_type mxge_xfp_media_types[] =
2657{
2658 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2659 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2660 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2661 {0, (1 << 5), "10GBASE-ER"},
2662 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2663 {0, (1 << 3), "10GBASE-SW"},
2664 {0, (1 << 2), "10GBASE-LW"},
2665 {0, (1 << 1), "10GBASE-EW"},
2666 {0, (1 << 0), "Reserved"}
2667};
2668static struct mxge_media_type mxge_sfp_media_types[] =
2669{
2670 {0, (1 << 7), "Reserved"},
2671 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2672 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2673 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2674};
2675
2676static void
2677mxge_set_media(mxge_softc_t *sc, int type)
2678{
2679 sc->media_flags |= type;
2680 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2681 ifmedia_set(&sc->media, sc->media_flags);
2682}
2683
2684
2685/*
2686 * Determine the media type for a NIC. Some XFPs will identify
2687 * themselves only when their link is up, so this is initiated via a
2688 * link up interrupt. However, this can potentially take up to
2689 * several milliseconds, so it is run via the watchdog routine, rather
2690 * than in the interrupt handler itself. This need only be done
2691 * once, not each time the link is up.
2692 */
2693static void
2694mxge_media_probe(mxge_softc_t *sc)
2695{
2696 mxge_cmd_t cmd;
2697 char *cage_type;
2698 char *ptr;
2699 struct mxge_media_type *mxge_media_types = NULL;
2700 int i, err, ms, mxge_media_type_entries;
2701 uint32_t byte;
2702
2703 sc->need_media_probe = 0;
2704
2705 /* if we've already set a media type, we're done */
2706 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2707 return;
2708
2709 /*
2710 * parse the product code to deterimine the interface type
2711 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2712 * after the 3rd dash in the driver's cached copy of the
2713 * EEPROM's product code string.
2714 */
2715 ptr = sc->product_code_string;
2716 if (ptr == NULL) {
2717 device_printf(sc->dev, "Missing product code\n");
2718 }
2719
2720 for (i = 0; i < 3; i++, ptr++) {
2721 ptr = index(ptr, '-');
2722 if (ptr == NULL) {
2723 device_printf(sc->dev,
2724 "only %d dashes in PC?!?\n", i);
2725 return;
2726 }
2727 }
2728 if (*ptr == 'C') {
2729 /* -C is CX4 */
2730 mxge_set_media(sc, IFM_10G_CX4);
2731 return;
2732 }
2733 else if (*ptr == 'Q') {
2734 /* -Q is Quad Ribbon Fiber */
2735 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2736 /* FreeBSD has no media type for Quad ribbon fiber */
2737 return;
2738 }
2739
2740 if (*ptr == 'R') {
2741 /* -R is XFP */
2742 mxge_media_types = mxge_xfp_media_types;
b370aff7 2743 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
8892ea20
AE
2744 byte = MXGE_XFP_COMPLIANCE_BYTE;
2745 cage_type = "XFP";
2746 }
2747
2748 if (*ptr == 'S' || *(ptr +1) == 'S') {
2749 /* -S or -2S is SFP+ */
2750 mxge_media_types = mxge_sfp_media_types;
b370aff7 2751 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
8892ea20
AE
2752 cage_type = "SFP+";
2753 byte = 3;
2754 }
2755
2756 if (mxge_media_types == NULL) {
2757 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2758 return;
2759 }
2760
2761 /*
2762 * At this point we know the NIC has an XFP cage, so now we
2763 * try to determine what is in the cage by using the
2764 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2765 * register. We read just one byte, which may take over
2766 * a millisecond
2767 */
2768
2769 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2770 cmd.data1 = byte;
2771 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2772 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2773 device_printf(sc->dev, "failed to read XFP\n");
2774 }
2775 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2776 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2777 }
2778 if (err != MXGEFW_CMD_OK) {
2779 return;
2780 }
2781
2782 /* now we wait for the data to be cached */
2783 cmd.data0 = byte;
2784 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2785 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2786 DELAY(1000);
2787 cmd.data0 = byte;
2788 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2789 }
2790 if (err != MXGEFW_CMD_OK) {
2791 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2792 cage_type, err, ms);
2793 return;
2794 }
2795
2796 if (cmd.data0 == mxge_media_types[0].bitmask) {
2797 if (mxge_verbose)
2798 device_printf(sc->dev, "%s:%s\n", cage_type,
2799 mxge_media_types[0].name);
2800 mxge_set_media(sc, IFM_10G_CX4);
2801 return;
2802 }
2803 for (i = 1; i < mxge_media_type_entries; i++) {
2804 if (cmd.data0 & mxge_media_types[i].bitmask) {
2805 if (mxge_verbose)
2806 device_printf(sc->dev, "%s:%s\n",
2807 cage_type,
2808 mxge_media_types[i].name);
2809
2810 mxge_set_media(sc, mxge_media_types[i].flag);
2811 return;
2812 }
2813 }
2814 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2815 cmd.data0);
2816
2817 return;
2818}
2819
2820static void
2821mxge_intr(void *arg)
2822{
2823 struct mxge_slice_state *ss = arg;
2824 mxge_softc_t *sc = ss->sc;
2825 mcp_irq_data_t *stats = ss->fw_stats;
2826 mxge_tx_ring_t *tx = &ss->tx;
2827 mxge_rx_done_t *rx_done = &ss->rx_done;
2828 uint32_t send_done_count;
2829 uint8_t valid;
2830
2831
2832#ifndef IFNET_BUF_RING
2833 /* an interrupt on a non-zero slice is implicitly valid
2834 since MSI-X irqs are not shared */
2835 if (ss != sc->ss) {
2836 mxge_clean_rx_done(ss);
2837 *ss->irq_claim = be32toh(3);
2838 return;
2839 }
2840#endif
2841
2842 /* make sure the DMA has finished */
2843 if (!stats->valid) {
2844 return;
2845 }
2846 valid = stats->valid;
2847
2848 if (sc->legacy_irq) {
2849 /* lower legacy IRQ */
2850 *sc->irq_deassert = 0;
2851 if (!mxge_deassert_wait)
2852 /* don't wait for conf. that irq is low */
2853 stats->valid = 0;
2854 } else {
2855 stats->valid = 0;
2856 }
2857
2858 /* loop while waiting for legacy irq deassertion */
2859 do {
2860 /* check for transmit completes and receives */
2861 send_done_count = be32toh(stats->send_done_count);
2862 while ((send_done_count != tx->pkt_done) ||
2863 (rx_done->entry[rx_done->idx].length != 0)) {
2864 if (send_done_count != tx->pkt_done)
2865 mxge_tx_done(ss, (int)send_done_count);
2866 mxge_clean_rx_done(ss);
2867 send_done_count = be32toh(stats->send_done_count);
2868 }
2869 if (sc->legacy_irq && mxge_deassert_wait)
2870 wmb();
2871 } while (*((volatile uint8_t *) &stats->valid));
2872
2873 /* fw link & error stats meaningful only on the first slice */
2874 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2875 if (sc->link_state != stats->link_up) {
2876 sc->link_state = stats->link_up;
2877 if (sc->link_state) {
73a22abe
AE
2878 sc->ifp->if_link_state = LINK_STATE_UP;
2879 if_link_state_change(sc->ifp);
8892ea20
AE
2880 if (mxge_verbose)
2881 device_printf(sc->dev, "link up\n");
2882 } else {
73a22abe
AE
2883 sc->ifp->if_link_state = LINK_STATE_DOWN;
2884 if_link_state_change(sc->ifp);
8892ea20
AE
2885 if (mxge_verbose)
2886 device_printf(sc->dev, "link down\n");
2887 }
2888 sc->need_media_probe = 1;
2889 }
2890 if (sc->rdma_tags_available !=
2891 be32toh(stats->rdma_tags_available)) {
2892 sc->rdma_tags_available =
2893 be32toh(stats->rdma_tags_available);
2894 device_printf(sc->dev, "RDMA timed out! %d tags "
2895 "left\n", sc->rdma_tags_available);
2896 }
2897
2898 if (stats->link_down) {
2899 sc->down_cnt += stats->link_down;
2900 sc->link_state = 0;
f0115d64
AE
2901 sc->ifp->if_link_state = LINK_STATE_DOWN;
2902 if_link_state_change(sc->ifp);
8892ea20
AE
2903 }
2904 }
2905
2906 /* check to see if we have rx token to pass back */
2907 if (valid & 0x1)
2908 *ss->irq_claim = be32toh(3);
2909 *(ss->irq_claim + 1) = be32toh(3);
2910}
2911
2912static void
2913mxge_init(void *arg)
2914{
2915}
2916
2917
2918
2919static void
2920mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2921{
2922 struct lro_entry *lro_entry;
2923 int i;
2924
2925 while (!SLIST_EMPTY(&ss->lro_free)) {
2926 lro_entry = SLIST_FIRST(&ss->lro_free);
2927 SLIST_REMOVE_HEAD(&ss->lro_free, next);
d777b84f 2928 kfree(lro_entry, M_DEVBUF);
8892ea20
AE
2929 }
2930
2931 for (i = 0; i <= ss->rx_big.mask; i++) {
2932 if (ss->rx_big.info[i].m == NULL)
2933 continue;
2934 bus_dmamap_unload(ss->rx_big.dmat,
2935 ss->rx_big.info[i].map);
2936 m_freem(ss->rx_big.info[i].m);
2937 ss->rx_big.info[i].m = NULL;
2938 }
2939
2940 for (i = 0; i <= ss->rx_small.mask; i++) {
2941 if (ss->rx_small.info[i].m == NULL)
2942 continue;
2943 bus_dmamap_unload(ss->rx_small.dmat,
2944 ss->rx_small.info[i].map);
2945 m_freem(ss->rx_small.info[i].m);
2946 ss->rx_small.info[i].m = NULL;
2947 }
2948
2949 /* transmit ring used only on the first slice */
2950 if (ss->tx.info == NULL)
2951 return;
2952
2953 for (i = 0; i <= ss->tx.mask; i++) {
2954 ss->tx.info[i].flag = 0;
2955 if (ss->tx.info[i].m == NULL)
2956 continue;
2957 bus_dmamap_unload(ss->tx.dmat,
2958 ss->tx.info[i].map);
2959 m_freem(ss->tx.info[i].m);
2960 ss->tx.info[i].m = NULL;
2961 }
2962}
2963
2964static void
2965mxge_free_mbufs(mxge_softc_t *sc)
2966{
2967 int slice;
2968
2969 for (slice = 0; slice < sc->num_slices; slice++)
2970 mxge_free_slice_mbufs(&sc->ss[slice]);
2971}
2972
2973static void
2974mxge_free_slice_rings(struct mxge_slice_state *ss)
2975{
2976 int i;
2977
2978
2979 if (ss->rx_done.entry != NULL)
2980 mxge_dma_free(&ss->rx_done.dma);
2981 ss->rx_done.entry = NULL;
2982
2983 if (ss->tx.req_bytes != NULL)
d777b84f 2984 kfree(ss->tx.req_bytes, M_DEVBUF);
8892ea20
AE
2985 ss->tx.req_bytes = NULL;
2986
2987 if (ss->tx.seg_list != NULL)
d777b84f 2988 kfree(ss->tx.seg_list, M_DEVBUF);
8892ea20
AE
2989 ss->tx.seg_list = NULL;
2990
2991 if (ss->rx_small.shadow != NULL)
d777b84f 2992 kfree(ss->rx_small.shadow, M_DEVBUF);
8892ea20
AE
2993 ss->rx_small.shadow = NULL;
2994
2995 if (ss->rx_big.shadow != NULL)
d777b84f 2996 kfree(ss->rx_big.shadow, M_DEVBUF);
8892ea20
AE
2997 ss->rx_big.shadow = NULL;
2998
2999 if (ss->tx.info != NULL) {
3000 if (ss->tx.dmat != NULL) {
3001 for (i = 0; i <= ss->tx.mask; i++) {
3002 bus_dmamap_destroy(ss->tx.dmat,
3003 ss->tx.info[i].map);
3004 }
3005 bus_dma_tag_destroy(ss->tx.dmat);
3006 }
d777b84f 3007 kfree(ss->tx.info, M_DEVBUF);
8892ea20
AE
3008 }
3009 ss->tx.info = NULL;
3010
3011 if (ss->rx_small.info != NULL) {
3012 if (ss->rx_small.dmat != NULL) {
3013 for (i = 0; i <= ss->rx_small.mask; i++) {
3014 bus_dmamap_destroy(ss->rx_small.dmat,
3015 ss->rx_small.info[i].map);
3016 }
3017 bus_dmamap_destroy(ss->rx_small.dmat,
3018 ss->rx_small.extra_map);
3019 bus_dma_tag_destroy(ss->rx_small.dmat);
3020 }
d777b84f 3021 kfree(ss->rx_small.info, M_DEVBUF);
8892ea20
AE
3022 }
3023 ss->rx_small.info = NULL;
3024
3025 if (ss->rx_big.info != NULL) {
3026 if (ss->rx_big.dmat != NULL) {
3027 for (i = 0; i <= ss->rx_big.mask; i++) {
3028 bus_dmamap_destroy(ss->rx_big.dmat,
3029 ss->rx_big.info[i].map);
3030 }
3031 bus_dmamap_destroy(ss->rx_big.dmat,
3032 ss->rx_big.extra_map);
3033 bus_dma_tag_destroy(ss->rx_big.dmat);
3034 }
d777b84f 3035 kfree(ss->rx_big.info, M_DEVBUF);
8892ea20
AE
3036 }
3037 ss->rx_big.info = NULL;
3038}
3039
3040static void
3041mxge_free_rings(mxge_softc_t *sc)
3042{
3043 int slice;
3044
3045 for (slice = 0; slice < sc->num_slices; slice++)
3046 mxge_free_slice_rings(&sc->ss[slice]);
3047}
3048
3049static int
3050mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3051 int tx_ring_entries)
3052{
3053 mxge_softc_t *sc = ss->sc;
3054 size_t bytes;
3055 int err, i;
3056
3057 err = ENOMEM;
3058
3059 /* allocate per-slice receive resources */
3060
3061 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3062 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3063
3064 /* allocate the rx shadow rings */
3065 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
d777b84f 3066 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3067
3068 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
d777b84f 3069 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3070
3071 /* allocate the rx host info rings */
3072 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
d777b84f 3073 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3074
3075 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
d777b84f 3076 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3077
3078 /* allocate the rx busdma resources */
3079 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3080 1, /* alignment */
3081 4096, /* boundary */
3082 BUS_SPACE_MAXADDR, /* low */
3083 BUS_SPACE_MAXADDR, /* high */
3084 NULL, NULL, /* filter */
3085 MHLEN, /* maxsize */
3086 1, /* num segs */
3087 MHLEN, /* maxsegsize */
3088 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3089 &ss->rx_small.dmat); /* tag */
3090 if (err != 0) {
3091 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3092 err);
3598cc14 3093 return err;
8892ea20
AE
3094 }
3095
3096 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3097 1, /* alignment */
3098#if MXGE_VIRT_JUMBOS
3099 4096, /* boundary */
3100#else
3101 0, /* boundary */
3102#endif
3103 BUS_SPACE_MAXADDR, /* low */
3104 BUS_SPACE_MAXADDR, /* high */
3105 NULL, NULL, /* filter */
3106 3*4096, /* maxsize */
3107#if MXGE_VIRT_JUMBOS
3108 3, /* num segs */
3109 4096, /* maxsegsize*/
3110#else
3111 1, /* num segs */
3112 MJUM9BYTES, /* maxsegsize*/
3113#endif
3114 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3115 &ss->rx_big.dmat); /* tag */
3116 if (err != 0) {
3117 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3118 err);
3598cc14 3119 return err;
8892ea20
AE
3120 }
3121 for (i = 0; i <= ss->rx_small.mask; i++) {
3122 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3123 &ss->rx_small.info[i].map);
3124 if (err != 0) {
3125 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3126 err);
3598cc14 3127 return err;
8892ea20
AE
3128 }
3129 }
3130 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3131 &ss->rx_small.extra_map);
3132 if (err != 0) {
3133 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3134 err);
3598cc14 3135 return err;
8892ea20
AE
3136 }
3137
3138 for (i = 0; i <= ss->rx_big.mask; i++) {
3139 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3140 &ss->rx_big.info[i].map);
3141 if (err != 0) {
3142 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3143 err);
3598cc14 3144 return err;
8892ea20
AE
3145 }
3146 }
3147 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3148 &ss->rx_big.extra_map);
3149 if (err != 0) {
3150 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3151 err);
3598cc14 3152 return err;
8892ea20
AE
3153 }
3154
3155 /* now allocate TX resouces */
3156
3157#ifndef IFNET_BUF_RING
3158 /* only use a single TX ring for now */
3159 if (ss != ss->sc->ss)
3160 return 0;
3161#endif
3162
3163 ss->tx.mask = tx_ring_entries - 1;
3164 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3165
3166
3167 /* allocate the tx request copy block */
3168 bytes = 8 +
3169 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
d777b84f 3170 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20
AE
3171 /* ensure req_list entries are aligned to 8 bytes */
3172 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3173 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3174
3175 /* allocate the tx busdma segment list */
3176 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3177 ss->tx.seg_list = (bus_dma_segment_t *)
d777b84f 3178 kmalloc(bytes, M_DEVBUF, M_WAITOK);
8892ea20 3179 if (ss->tx.seg_list == NULL)
3598cc14 3180 return err;
8892ea20
AE
3181
3182 /* allocate the tx host info ring */
3183 bytes = tx_ring_entries * sizeof (*ss->tx.info);
d777b84f 3184 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
8892ea20
AE
3185
3186 /* allocate the tx busdma resources */
3187 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3188 1, /* alignment */
3189 sc->tx_boundary, /* boundary */
3190 BUS_SPACE_MAXADDR, /* low */
3191 BUS_SPACE_MAXADDR, /* high */
3192 NULL, NULL, /* filter */
3193 65536 + 256, /* maxsize */
3194 ss->tx.max_desc - 2, /* num segs */
3195 sc->tx_boundary, /* maxsegsz */
3196 BUS_DMA_ALLOCNOW, /* flags */
8892ea20
AE
3197 &ss->tx.dmat); /* tag */
3198
3199 if (err != 0) {
3200 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3201 err);
3598cc14 3202 return err;
8892ea20
AE
3203 }
3204
3205 /* now use these tags to setup dmamaps for each slot
3206 in the ring */
3207 for (i = 0; i <= ss->tx.mask; i++) {
3208 err = bus_dmamap_create(ss->tx.dmat, 0,
3209 &ss->tx.info[i].map);
3210 if (err != 0) {
3211 device_printf(sc->dev, "Err %d tx dmamap\n",
3212 err);
3598cc14 3213 return err;
8892ea20
AE
3214 }
3215 }
3216 return 0;
3217
3218}
3219
3220static int
3221mxge_alloc_rings(mxge_softc_t *sc)
3222{
3223 mxge_cmd_t cmd;
3224 int tx_ring_size;
3225 int tx_ring_entries, rx_ring_entries;
3226 int err, slice;
3227
3228 /* get ring sizes */
3229 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3230 tx_ring_size = cmd.data0;
3231 if (err != 0) {
3232 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3233 goto abort;
3234 }
3235
3236 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3237 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
f2f758df
AE
3238 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3239 ifq_set_ready(&sc->ifp->if_snd);
8892ea20
AE
3240
3241 for (slice = 0; slice < sc->num_slices; slice++) {
3242 err = mxge_alloc_slice_rings(&sc->ss[slice],
3243 rx_ring_entries,
3244 tx_ring_entries);
3245 if (err != 0)
3246 goto abort;
3247 }
3248 return 0;
3249
3250abort:
3251 mxge_free_rings(sc);
3252 return err;
3253
3254}
3255
3256
3257static void
3258mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3259{
b915556e 3260 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
8892ea20
AE
3261
3262 if (bufsize < MCLBYTES) {
3263 /* easy, everything fits in a single buffer */
3264 *big_buf_size = MCLBYTES;
3265 *cl_size = MCLBYTES;
3266 *nbufs = 1;
3267 return;
3268 }
3269
3270 if (bufsize < MJUMPAGESIZE) {
3271 /* still easy, everything still fits in a single buffer */
3272 *big_buf_size = MJUMPAGESIZE;
3273 *cl_size = MJUMPAGESIZE;
3274 *nbufs = 1;
3275 return;
3276 }
3277#if MXGE_VIRT_JUMBOS
3278 /* now we need to use virtually contiguous buffers */
3279 *cl_size = MJUM9BYTES;
3280 *big_buf_size = 4096;
3281 *nbufs = mtu / 4096 + 1;
3282 /* needs to be a power of two, so round up */
3283 if (*nbufs == 3)
3284 *nbufs = 4;
3285#else
3286 *cl_size = MJUM9BYTES;
3287 *big_buf_size = MJUM9BYTES;
3288 *nbufs = 1;
3289#endif
3290}
3291
3292static int
3293mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3294{
3295 mxge_softc_t *sc;
3296 mxge_cmd_t cmd;
3297 bus_dmamap_t map;
3298 struct lro_entry *lro_entry;
3299 int err, i, slice;
3300
3301
3302 sc = ss->sc;
3303 slice = ss - sc->ss;
3304
3305 SLIST_INIT(&ss->lro_free);
3306 SLIST_INIT(&ss->lro_active);
3307
3308 for (i = 0; i < sc->lro_cnt; i++) {
3309 lro_entry = (struct lro_entry *)
d777b84f 3310 kmalloc(sizeof (*lro_entry), M_DEVBUF,
8892ea20
AE
3311 M_NOWAIT | M_ZERO);
3312 if (lro_entry == NULL) {
3313 sc->lro_cnt = i;
3314 break;
3315 }
3316 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3317 }
3318 /* get the lanai pointers to the send and receive rings */
3319
3320 err = 0;
3321#ifndef IFNET_BUF_RING
3322 /* We currently only send from the first slice */
3323 if (slice == 0) {
3324#endif
3325 cmd.data0 = slice;
3326 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3327 ss->tx.lanai =
3328 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3329 ss->tx.send_go = (volatile uint32_t *)
3330 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3331 ss->tx.send_stop = (volatile uint32_t *)
3332 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3333#ifndef IFNET_BUF_RING
3334 }
3335#endif
3336 cmd.data0 = slice;
3337 err |= mxge_send_cmd(sc,
3338 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3339 ss->rx_small.lanai =
3340 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3341 cmd.data0 = slice;
3342 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3343 ss->rx_big.lanai =
3344 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3345
3346 if (err != 0) {
3347 device_printf(sc->dev,
3348 "failed to get ring sizes or locations\n");
3349 return EIO;
3350 }
3351
3352 /* stock receive rings */
3353 for (i = 0; i <= ss->rx_small.mask; i++) {
3354 map = ss->rx_small.info[i].map;
3355 err = mxge_get_buf_small(ss, map, i);
3356 if (err) {
3357 device_printf(sc->dev, "alloced %d/%d smalls\n",
3358 i, ss->rx_small.mask + 1);
3359 return ENOMEM;
3360 }
3361 }
3362 for (i = 0; i <= ss->rx_big.mask; i++) {
3363 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3364 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3365 }
3366 ss->rx_big.nbufs = nbufs;
3367 ss->rx_big.cl_size = cl_size;
3368 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
b915556e 3369 EVL_ENCAPLEN + MXGEFW_PAD;
8892ea20
AE
3370 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3371 map = ss->rx_big.info[i].map;
3372 err = mxge_get_buf_big(ss, map, i);
3373 if (err) {
3374 device_printf(sc->dev, "alloced %d/%d bigs\n",
3375 i, ss->rx_big.mask + 1);
3376 return ENOMEM;
3377 }
3378 }
3379 return 0;
3380}
3381
3382static int
3383mxge_open(mxge_softc_t *sc)
3384{
3385 mxge_cmd_t cmd;
3386 int err, big_bytes, nbufs, slice, cl_size, i;
3387 bus_addr_t bus;
3388 volatile uint8_t *itable;
3389 struct mxge_slice_state *ss;
3390
cd0543ff 3391 ASSERT_SERIALIZED(sc->ifp->if_serializer);
8892ea20
AE
3392 /* Copy the MAC address in case it was overridden */
3393 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3394
3395 err = mxge_reset(sc, 1);
3396 if (err != 0) {
3397 device_printf(sc->dev, "failed to reset\n");
3398 return EIO;
3399 }
3400
3401 if (sc->num_slices > 1) {
3402 /* setup the indirection table */
3403 cmd.data0 = sc->num_slices;
3404 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3405 &cmd);
3406
3407 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3408 &cmd);
3409 if (err != 0) {
3410 device_printf(sc->dev,
3411 "failed to setup rss tables\n");
3412 return err;
3413 }
3414
3415 /* just enable an identity mapping */
3416 itable = sc->sram + cmd.data0;
3417 for (i = 0; i < sc->num_slices; i++)
3418 itable[i] = (uint8_t)i;
3419
3420 cmd.data0 = 1;
3421 cmd.data1 = mxge_rss_hash_type;
3422 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3423 if (err != 0) {
3424 device_printf(sc->dev, "failed to enable slices\n");
3425 return err;
3426 }
3427 }
3428
3429
3430 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3431
3432 cmd.data0 = nbufs;
3433 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3434 &cmd);
3435 /* error is only meaningful if we're trying to set
3436 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3437 if (err && nbufs > 1) {
3438 device_printf(sc->dev,
3439 "Failed to set alway-use-n to %d\n",
3440 nbufs);
3441 return EIO;
3442 }
3443 /* Give the firmware the mtu and the big and small buffer
3444 sizes. The firmware wants the big buf size to be a power
3445 of two. Luckily, FreeBSD's clusters are powers of two */
b915556e 3446 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
8892ea20
AE
3447 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3448 cmd.data0 = MHLEN - MXGEFW_PAD;
3449 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3450 &cmd);
3451 cmd.data0 = big_bytes;
3452 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3453
3454 if (err != 0) {
3455 device_printf(sc->dev, "failed to setup params\n");
3456 goto abort;
3457 }
3458
3459 /* Now give him the pointer to the stats block */
3460 for (slice = 0;
3461#ifdef IFNET_BUF_RING
3462 slice < sc->num_slices;
3463#else
3464 slice < 1;
3465#endif
3466 slice++) {
3467 ss = &sc->ss[slice];
3468 cmd.data0 =
3469 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3470 cmd.data1 =
3471 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3472 cmd.data2 = sizeof(struct mcp_irq_data);
3473 cmd.data2 |= (slice << 16);
3474 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3475 }
3476
3477 if (err != 0) {
3478 bus = sc->ss->fw_stats_dma.bus_addr;
3479 bus += offsetof(struct mcp_irq_data, send_done_count);
3480 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3481 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3482 err = mxge_send_cmd(sc,
3483 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3484 &cmd);
3485 /* Firmware cannot support multicast without STATS_DMA_V2 */
3486 sc->fw_multicast_support = 0;
3487 } else {
3488 sc->fw_multicast_support = 1;
3489 }
3490
3491 if (err != 0) {
3492 device_printf(sc->dev, "failed to setup params\n");
3493 goto abort;
3494 }
3495
3496 for (slice = 0; slice < sc->num_slices; slice++) {
3497 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3498 if (err != 0) {
3499 device_printf(sc->dev, "couldn't open slice %d\n",
3500 slice);
3501 goto abort;
3502 }
3503 }
3504
3505 /* Finally, start the firmware running */
3506 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3507 if (err) {
3508 device_printf(sc->dev, "Couldn't bring up link\n");
3509 goto abort;
3510 }
2ab1b8a9 3511 sc->ifp->if_flags |= IFF_RUNNING;
9ed293e0 3512 ifq_clr_oactive(&sc->ifp->if_snd);
8892ea20
AE
3513 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3514
3515 return 0;
3516
3517
3518abort:
3519 mxge_free_mbufs(sc);
3520
3521 return err;
3522}
3523
3524static int
3525mxge_close(mxge_softc_t *sc)
3526{
3527 mxge_cmd_t cmd;
3528 int err, old_down_cnt;
3529#ifdef IFNET_BUF_RING
3530 struct mxge_slice_state *ss;
3531 int slice;
3532#endif
3533
cd0543ff 3534 ASSERT_SERIALIZED(sc->ifp->if_serializer);
8892ea20
AE
3535 callout_stop(&sc->co_hdl);
3536#ifdef IFNET_BUF_RING
3537 for (slice = 0; slice < sc->num_slices; slice++) {
3538 ss = &sc->ss[slice];
2ab1b8a9 3539 ss->if_flags &= ~IFF_RUNNING;
8892ea20
AE
3540 }
3541#endif
2ab1b8a9 3542 sc->ifp->if_flags &= ~IFF_RUNNING;
8892ea20
AE
3543 old_down_cnt = sc->down_cnt;
3544 wmb();
3545 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3546 if (err) {
3547 device_printf(sc->dev, "Couldn't bring down link\n");
3548 }
3549 if (old_down_cnt == sc->down_cnt) {
3550 /* wait for down irq */
3551 DELAY(10 * sc->intr_coal_delay);
3552 }
3553 wmb();
3554 if (old_down_cnt == sc->down_cnt) {
3555 device_printf(sc->dev, "never got down irq\n");
3556 }
3557
3558 mxge_free_mbufs(sc);
3559
3560 return 0;
3561}
3562
3563static void
3564mxge_setup_cfg_space(mxge_softc_t *sc)
3565{
3566 device_t dev = sc->dev;
3567 int reg;
3568 uint16_t cmd, lnk, pectl;
3569
3570 /* find the PCIe link width and set max read request to 4KB*/
3571 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3572 lnk = pci_read_config(dev, reg + 0x12, 2);
3573 sc->link_width = (lnk >> 4) & 0x3f;
3574
3575 pectl = pci_read_config(dev, reg + 0x8, 2);
3576 pectl = (pectl & ~0x7000) | (5 << 12);
3577 pci_write_config(dev, reg + 0x8, pectl, 2);
3578 }
3579
3580 /* Enable DMA and Memory space access */
3581 pci_enable_busmaster(dev);
3582 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3583 cmd |= PCIM_CMD_MEMEN;
3584 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3585}
3586
3587static uint32_t
3588mxge_read_reboot(mxge_softc_t *sc)
3589{
3590 device_t dev = sc->dev;
3591 uint32_t vs;
3592
3593 /* find the vendor specific offset */
3594 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3595 device_printf(sc->dev,
3596 "could not find vendor specific offset\n");
3597 return (uint32_t)-1;
3598 }
3599 /* enable read32 mode */
3600 pci_write_config(dev, vs + 0x10, 0x3, 1);
3601 /* tell NIC which register to read */
3602 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3603 return (pci_read_config(dev, vs + 0x14, 4));
3604}
3605
3606static int
3607mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3608{
3609 struct pci_devinfo *dinfo;
3610 mxge_tx_ring_t *tx;
3611 int err;
3612 uint32_t reboot;
3613 uint16_t cmd;
3614
3615 err = ENXIO;
3616
3617 device_printf(sc->dev, "Watchdog reset!\n");
3618
3619 /*
3620 * check to see if the NIC rebooted. If it did, then all of
3621 * PCI config space has been reset, and things like the
3622 * busmaster bit will be zero. If this is the case, then we
3623 * must restore PCI config space before the NIC can be used
3624 * again
3625 */
3626 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3627 if (cmd == 0xffff) {
3628 /*
3629 * maybe the watchdog caught the NIC rebooting; wait
3630 * up to 100ms for it to finish. If it does not come
3631 * back, then give up
3632 */
3633 DELAY(1000*100);
3634 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3635 if (cmd == 0xffff) {
3636 device_printf(sc->dev, "NIC disappeared!\n");
3637 return (err);
3638 }
3639 }
3640 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3641 /* print the reboot status */
3642 reboot = mxge_read_reboot(sc);
3643 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3644 reboot);
3645 /* restore PCI configuration space */
3646 dinfo = device_get_ivars(sc->dev);
3647 pci_cfg_restore(sc->dev, dinfo);
3648
3649 /* and redo any changes we made to our config space */
3650 mxge_setup_cfg_space(sc);
3651
2ab1b8a9 3652 if (sc->ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3653 mxge_close(sc);
3654 err = mxge_open(sc);
3655 }
3656 } else {
3657 tx = &sc->ss[slice].tx;
3658 device_printf(sc->dev,
3659 "NIC did not reboot, slice %d ring state:\n",
3660 slice);
3661 device_printf(sc->dev,
3662 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3663 tx->req, tx->done, tx->queue_active);
3664 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3665 tx->activate, tx->deactivate);
3666 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3667 tx->pkt_done,
3668 be32toh(sc->ss->fw_stats->send_done_count));
3669 device_printf(sc->dev, "not resetting\n");
3670 }
3671 return (err);
3672}
3673
3674static int
3675mxge_watchdog(mxge_softc_t *sc)
3676{
3677 mxge_tx_ring_t *tx;
3678 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3679 int i, err = 0;
3680
3681 /* see if we have outstanding transmits, which
3682 have been pending for more than mxge_ticks */
3683 for (i = 0;
3684#ifdef IFNET_BUF_RING
3685 (i < sc->num_slices) && (err == 0);
3686#else
3687 (i < 1) && (err == 0);
3688#endif
3689 i++) {
3690 tx = &sc->ss[i].tx;
3691 if (tx->req != tx->done &&
3692 tx->watchdog_req != tx->watchdog_done &&
3693 tx->done == tx->watchdog_done) {
3694 /* check for pause blocking before resetting */
3695 if (tx->watchdog_rx_pause == rx_pause)
3696 err = mxge_watchdog_reset(sc, i);
3697 else
3698 device_printf(sc->dev, "Flow control blocking "
3699 "xmits, check link partner\n");
3700 }
3701
3702 tx->watchdog_req = tx->req;
3703 tx->watchdog_done = tx->done;
3704 tx->watchdog_rx_pause = rx_pause;
3705 }
3706
3707 if (sc->need_media_probe)
3708 mxge_media_probe(sc);
3709 return (err);
3710}
3711
3712static void
3713mxge_update_stats(mxge_softc_t *sc)
3714{
3715 struct mxge_slice_state *ss;
3716 u_long ipackets = 0;
3717 u_long opackets = 0;
3718#ifdef IFNET_BUF_RING
3719 u_long obytes = 0;
3720 u_long omcasts = 0;
3721 u_long odrops = 0;
3722#endif
3723 u_long oerrors = 0;
3724 int slice;
3725
3726 for (slice = 0; slice < sc->num_slices; slice++) {
3727 ss = &sc->ss[slice];
3728 ipackets += ss->ipackets;
3729 opackets += ss->opackets;
3730#ifdef IFNET_BUF_RING
3731 obytes += ss->obytes;
3732 omcasts += ss->omcasts;
3733 odrops += ss->tx.br->br_drops;
3734#endif
3735 oerrors += ss->oerrors;
3736 }
3737 sc->ifp->if_ipackets = ipackets;
3738 sc->ifp->if_opackets = opackets;
3739#ifdef IFNET_BUF_RING
3740 sc->ifp->if_obytes = obytes;
3741 sc->ifp->if_omcasts = omcasts;
3742 sc->ifp->if_snd.ifq_drops = odrops;
3743#endif
3744 sc->ifp->if_oerrors = oerrors;
3745}
3746
3747static void
3748mxge_tick(void *arg)
3749{
3750 mxge_softc_t *sc = arg;
3751 int err = 0;
3752
2e8181d0 3753 lwkt_serialize_enter(sc->ifp->if_serializer);
8892ea20
AE
3754 /* aggregate stats from different slices */
3755 mxge_update_stats(sc);
3756 if (!sc->watchdog_countdown) {
3757 err = mxge_watchdog(sc);
3758 sc->watchdog_countdown = 4;
3759 }
3760 sc->watchdog_countdown--;
3761 if (err == 0)
3762 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
2e8181d0 3763 lwkt_serialize_exit(sc->ifp->if_serializer);
8892ea20
AE
3764}
3765
3766static int
3767mxge_media_change(struct ifnet *ifp)
3768{
3769 return EINVAL;
3770}
3771
3772static int
3773mxge_change_mtu(mxge_softc_t *sc, int mtu)
3774{
3775 struct ifnet *ifp = sc->ifp;
3776 int real_mtu, old_mtu;
3777 int err = 0;
3778
cd0543ff
AE
3779 if (ifp->if_serializer)
3780 ASSERT_SERIALIZED(ifp->if_serializer);
8892ea20 3781
b915556e 3782 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
8892ea20
AE
3783 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3784 return EINVAL;
8892ea20
AE
3785 old_mtu = ifp->if_mtu;
3786 ifp->if_mtu = mtu;
2ab1b8a9 3787 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3788 mxge_close(sc);
3789 err = mxge_open(sc);
3790 if (err != 0) {
3791 ifp->if_mtu = old_mtu;
3792 mxge_close(sc);
3793 (void) mxge_open(sc);
3794 }
3795 }
8892ea20
AE
3796 return err;
3797}
3798
3799static void
3800mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3801{
3802 mxge_softc_t *sc = ifp->if_softc;
3803
3804
3805 if (sc == NULL)
3806 return;
3807 ifmr->ifm_status = IFM_AVALID;
3808 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3809 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3810 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3811}
3812
3813static int
137195a6 3814mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
8892ea20
AE
3815{
3816 mxge_softc_t *sc = ifp->if_softc;
3817 struct ifreq *ifr = (struct ifreq *)data;
3818 int err, mask;
3819
137195a6 3820 (void)cr;
8892ea20 3821 err = 0;
23811d63 3822 ASSERT_SERIALIZED(ifp->if_serializer);
8892ea20
AE
3823 switch (command) {
3824 case SIOCSIFADDR:
3825 case SIOCGIFADDR:
3826 err = ether_ioctl(ifp, command, data);
3827 break;
3828
3829 case SIOCSIFMTU:
3830 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3831 break;
3832
3833 case SIOCSIFFLAGS:
8892ea20 3834 if (sc->dying) {
8892ea20
AE
3835 return EINVAL;
3836 }
3837 if (ifp->if_flags & IFF_UP) {
2ab1b8a9 3838 if (!(ifp->if_flags & IFF_RUNNING)) {
8892ea20
AE
3839 err = mxge_open(sc);
3840 } else {
3841 /* take care of promis can allmulti
3842 flag chages */
3843 mxge_change_promisc(sc,
3844 ifp->if_flags & IFF_PROMISC);
3845 mxge_set_multicast_list(sc);
3846 }
3847 } else {
2ab1b8a9 3848 if (ifp->if_flags & IFF_RUNNING) {
8892ea20
AE
3849 mxge_close(sc);
3850 }
3851 }
8892ea20
AE
3852 break;
3853
3854 case SIOCADDMULTI:
3855 case SIOCDELMULTI:
8892ea20 3856 mxge_set_multicast_list(sc);
8892ea20
AE
3857 break;
3858
3859 case SIOCSIFCAP:
8892ea20
AE
3860 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3861 if (mask & IFCAP_TXCSUM) {
3862 if (IFCAP_TXCSUM & ifp->if_capenable) {
3863 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3864 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3865 | CSUM_TSO);
3866 } else {
3867 ifp->if_capenable |= IFCAP_TXCSUM;
3868 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3869 }
3870 } else if (mask & IFCAP_RXCSUM) {
3871 if (IFCAP_RXCSUM & ifp->if_capenable) {
3872 ifp->if_capenable &= ~IFCAP_RXCSUM;
3873 sc->csum_flag = 0;
3874 } else {
3875 ifp->if_capenable |= IFCAP_RXCSUM;
3876 sc->csum_flag = 1;
3877 }
3878 }
3879 if (mask & IFCAP_TSO4) {
3880 if (IFCAP_TSO4 & ifp->if_capenable) {
3881 ifp->if_capenable &= ~IFCAP_TSO4;
3882 ifp->if_hwassist &= ~CSUM_TSO;
3883 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3884 ifp->if_capenable |= IFCAP_TSO4;
3885 ifp->if_hwassist |= CSUM_TSO;
3886 } else {
6c348da6 3887 kprintf("mxge requires tx checksum offload"
8892ea20
AE
3888 " be enabled to use TSO\n");
3889 err = EINVAL;
3890 }
3891 }
3892 if (mask & IFCAP_LRO) {
3893 if (IFCAP_LRO & ifp->if_capenable)
3894 err = mxge_change_lro_locked(sc, 0);
3895 else
3896 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3897 }
3898 if (mask & IFCAP_VLAN_HWTAGGING)
3899 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
8892ea20
AE
3900 VLAN_CAPABILITIES(ifp);
3901
3902 break;
3903
3904 case SIOCGIFMEDIA:
3905 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3906