Commit | Line | Data |
---|---|---|
8892ea20 AE |
1 | /****************************************************************************** |
2 | ||
3 | Copyright (c) 2006-2009, Myricom Inc. | |
4 | All rights reserved. | |
5 | ||
6 | Redistribution and use in source and binary forms, with or without | |
7 | modification, are permitted provided that the following conditions are met: | |
8 | ||
9 | 1. Redistributions of source code must retain the above copyright notice, | |
10 | this list of conditions and the following disclaimer. | |
11 | ||
12 | 2. Neither the name of the Myricom Inc, nor the names of its | |
13 | contributors may be used to endorse or promote products derived from | |
14 | this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
26 | POSSIBILITY OF SUCH DAMAGE. | |
27 | ||
28 | ***************************************************************************/ | |
29 | ||
30 | #include <sys/cdefs.h> | |
31 | __FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $"); | |
32 | ||
33 | #include <sys/param.h> | |
34 | #include <sys/systm.h> | |
35 | #include <sys/linker.h> | |
36 | #include <sys/firmware.h> | |
37 | #include <sys/endian.h> | |
38 | #include <sys/sockio.h> | |
39 | #include <sys/mbuf.h> | |
40 | #include <sys/malloc.h> | |
41 | #include <sys/kdb.h> | |
42 | #include <sys/kernel.h> | |
43 | #include <sys/lock.h> | |
44 | #include <sys/module.h> | |
45 | #include <sys/socket.h> | |
46 | #include <sys/sysctl.h> | |
47 | #include <sys/sx.h> | |
48 | ||
49 | /* count xmits ourselves, rather than via drbr */ | |
50 | #define NO_SLOW_STATS | |
51 | #include <net/if.h> | |
52 | #include <net/if_arp.h> | |
53 | #include <net/ethernet.h> | |
54 | #include <net/if_dl.h> | |
55 | #include <net/if_media.h> | |
56 | ||
57 | #include <net/bpf.h> | |
58 | ||
59 | #include <net/if_types.h> | |
60 | #include <net/if_vlan_var.h> | |
61 | #include <net/zlib.h> | |
62 | ||
63 | #include <netinet/in_systm.h> | |
64 | #include <netinet/in.h> | |
65 | #include <netinet/ip.h> | |
66 | #include <netinet/tcp.h> | |
67 | ||
68 | #include <machine/bus.h> | |
69 | #include <machine/in_cksum.h> | |
70 | #include <machine/resource.h> | |
71 | #include <sys/bus.h> | |
72 | #include <sys/rman.h> | |
73 | #include <sys/smp.h> | |
74 | ||
75 | #include <dev/pci/pcireg.h> | |
76 | #include <dev/pci/pcivar.h> | |
77 | #include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ | |
78 | ||
79 | #include <vm/vm.h> /* for pmap_mapdev() */ | |
80 | #include <vm/pmap.h> | |
81 | ||
82 | #if defined(__i386) || defined(__amd64) | |
83 | #include <machine/specialreg.h> | |
84 | #endif | |
85 | ||
86 | #include <dev/mxge/mxge_mcp.h> | |
87 | #include <dev/mxge/mcp_gen_header.h> | |
88 | /*#define MXGE_FAKE_IFP*/ | |
89 | #include <dev/mxge/if_mxge_var.h> | |
90 | #ifdef IFNET_BUF_RING | |
91 | #include <sys/buf_ring.h> | |
92 | #endif | |
93 | ||
94 | #include "opt_inet.h" | |
95 | ||
96 | /* tunable params */ | |
97 | static int mxge_nvidia_ecrc_enable = 1; | |
98 | static int mxge_force_firmware = 0; | |
99 | static int mxge_intr_coal_delay = 30; | |
100 | static int mxge_deassert_wait = 1; | |
101 | static int mxge_flow_control = 1; | |
102 | static int mxge_verbose = 0; | |
103 | static int mxge_lro_cnt = 8; | |
104 | static int mxge_ticks; | |
105 | static int mxge_max_slices = 1; | |
106 | static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; | |
107 | static int mxge_always_promisc = 0; | |
108 | static int mxge_initial_mtu = ETHERMTU_JUMBO; | |
109 | static char *mxge_fw_unaligned = "mxge_ethp_z8e"; | |
110 | static char *mxge_fw_aligned = "mxge_eth_z8e"; | |
111 | static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; | |
112 | static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; | |
113 | ||
114 | static int mxge_probe(device_t dev); | |
115 | static int mxge_attach(device_t dev); | |
116 | static int mxge_detach(device_t dev); | |
117 | static int mxge_shutdown(device_t dev); | |
118 | static void mxge_intr(void *arg); | |
119 | ||
120 | static device_method_t mxge_methods[] = | |
121 | { | |
122 | /* Device interface */ | |
123 | DEVMETHOD(device_probe, mxge_probe), | |
124 | DEVMETHOD(device_attach, mxge_attach), | |
125 | DEVMETHOD(device_detach, mxge_detach), | |
126 | DEVMETHOD(device_shutdown, mxge_shutdown), | |
127 | {0, 0} | |
128 | }; | |
129 | ||
130 | static driver_t mxge_driver = | |
131 | { | |
132 | "mxge", | |
133 | mxge_methods, | |
134 | sizeof(mxge_softc_t), | |
135 | }; | |
136 | ||
137 | static devclass_t mxge_devclass; | |
138 | ||
139 | /* Declare ourselves to be a child of the PCI bus.*/ | |
140 | DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); | |
141 | MODULE_DEPEND(mxge, firmware, 1, 1, 1); | |
142 | MODULE_DEPEND(mxge, zlib, 1, 1, 1); | |
143 | ||
144 | static int mxge_load_firmware(mxge_softc_t *sc, int adopt); | |
145 | static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); | |
146 | static int mxge_close(mxge_softc_t *sc); | |
147 | static int mxge_open(mxge_softc_t *sc); | |
148 | static void mxge_tick(void *arg); | |
149 | ||
150 | static int | |
151 | mxge_probe(device_t dev) | |
152 | { | |
153 | int rev; | |
154 | ||
155 | ||
156 | if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && | |
157 | ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || | |
158 | (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { | |
159 | rev = pci_get_revid(dev); | |
160 | switch (rev) { | |
161 | case MXGE_PCI_REV_Z8E: | |
162 | device_set_desc(dev, "Myri10G-PCIE-8A"); | |
163 | break; | |
164 | case MXGE_PCI_REV_Z8ES: | |
165 | device_set_desc(dev, "Myri10G-PCIE-8B"); | |
166 | break; | |
167 | default: | |
168 | device_set_desc(dev, "Myri10G-PCIE-8??"); | |
169 | device_printf(dev, "Unrecognized rev %d NIC\n", | |
170 | rev); | |
171 | break; | |
172 | } | |
173 | return 0; | |
174 | } | |
175 | return ENXIO; | |
176 | } | |
177 | ||
178 | static void | |
179 | mxge_enable_wc(mxge_softc_t *sc) | |
180 | { | |
181 | #if defined(__i386) || defined(__amd64) | |
182 | vm_offset_t len; | |
183 | int err; | |
184 | ||
185 | sc->wc = 1; | |
186 | len = rman_get_size(sc->mem_res); | |
187 | err = pmap_change_attr((vm_offset_t) sc->sram, | |
188 | len, PAT_WRITE_COMBINING); | |
189 | if (err != 0) { | |
190 | device_printf(sc->dev, "pmap_change_attr failed, %d\n", | |
191 | err); | |
192 | sc->wc = 0; | |
193 | } | |
194 | #endif | |
195 | } | |
196 | ||
197 | ||
198 | /* callback to get our DMA address */ | |
199 | static void | |
200 | mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, | |
201 | int error) | |
202 | { | |
203 | if (error == 0) { | |
204 | *(bus_addr_t *) arg = segs->ds_addr; | |
205 | } | |
206 | } | |
207 | ||
208 | static int | |
209 | mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, | |
210 | bus_size_t alignment) | |
211 | { | |
212 | int err; | |
213 | device_t dev = sc->dev; | |
214 | bus_size_t boundary, maxsegsize; | |
215 | ||
216 | if (bytes > 4096 && alignment == 4096) { | |
217 | boundary = 0; | |
218 | maxsegsize = bytes; | |
219 | } else { | |
220 | boundary = 4096; | |
221 | maxsegsize = 4096; | |
222 | } | |
223 | ||
224 | /* allocate DMAable memory tags */ | |
225 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
226 | alignment, /* alignment */ | |
227 | boundary, /* boundary */ | |
228 | BUS_SPACE_MAXADDR, /* low */ | |
229 | BUS_SPACE_MAXADDR, /* high */ | |
230 | NULL, NULL, /* filter */ | |
231 | bytes, /* maxsize */ | |
232 | 1, /* num segs */ | |
233 | maxsegsize, /* maxsegsize */ | |
234 | BUS_DMA_COHERENT, /* flags */ | |
235 | NULL, NULL, /* lock */ | |
236 | &dma->dmat); /* tag */ | |
237 | if (err != 0) { | |
238 | device_printf(dev, "couldn't alloc tag (err = %d)\n", err); | |
239 | return err; | |
240 | } | |
241 | ||
242 | /* allocate DMAable memory & map */ | |
243 | err = bus_dmamem_alloc(dma->dmat, &dma->addr, | |
244 | (BUS_DMA_WAITOK | BUS_DMA_COHERENT | |
245 | | BUS_DMA_ZERO), &dma->map); | |
246 | if (err != 0) { | |
247 | device_printf(dev, "couldn't alloc mem (err = %d)\n", err); | |
248 | goto abort_with_dmat; | |
249 | } | |
250 | ||
251 | /* load the memory */ | |
252 | err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, | |
253 | mxge_dmamap_callback, | |
254 | (void *)&dma->bus_addr, 0); | |
255 | if (err != 0) { | |
256 | device_printf(dev, "couldn't load map (err = %d)\n", err); | |
257 | goto abort_with_mem; | |
258 | } | |
259 | return 0; | |
260 | ||
261 | abort_with_mem: | |
262 | bus_dmamem_free(dma->dmat, dma->addr, dma->map); | |
263 | abort_with_dmat: | |
264 | (void)bus_dma_tag_destroy(dma->dmat); | |
265 | return err; | |
266 | } | |
267 | ||
268 | ||
269 | static void | |
270 | mxge_dma_free(mxge_dma_t *dma) | |
271 | { | |
272 | bus_dmamap_unload(dma->dmat, dma->map); | |
273 | bus_dmamem_free(dma->dmat, dma->addr, dma->map); | |
274 | (void)bus_dma_tag_destroy(dma->dmat); | |
275 | } | |
276 | ||
277 | /* | |
278 | * The eeprom strings on the lanaiX have the format | |
279 | * SN=x\0 | |
280 | * MAC=x:x:x:x:x:x\0 | |
281 | * PC=text\0 | |
282 | */ | |
283 | ||
284 | static int | |
285 | mxge_parse_strings(mxge_softc_t *sc) | |
286 | { | |
287 | #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) | |
288 | ||
289 | char *ptr, *limit; | |
290 | int i, found_mac; | |
291 | ||
292 | ptr = sc->eeprom_strings; | |
293 | limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; | |
294 | found_mac = 0; | |
295 | while (ptr < limit && *ptr != '\0') { | |
296 | if (memcmp(ptr, "MAC=", 4) == 0) { | |
297 | ptr += 1; | |
298 | sc->mac_addr_string = ptr; | |
299 | for (i = 0; i < 6; i++) { | |
300 | ptr += 3; | |
301 | if ((ptr + 2) > limit) | |
302 | goto abort; | |
303 | sc->mac_addr[i] = strtoul(ptr, NULL, 16); | |
304 | found_mac = 1; | |
305 | } | |
306 | } else if (memcmp(ptr, "PC=", 3) == 0) { | |
307 | ptr += 3; | |
308 | strncpy(sc->product_code_string, ptr, | |
309 | sizeof (sc->product_code_string) - 1); | |
310 | } else if (memcmp(ptr, "SN=", 3) == 0) { | |
311 | ptr += 3; | |
312 | strncpy(sc->serial_number_string, ptr, | |
313 | sizeof (sc->serial_number_string) - 1); | |
314 | } | |
315 | MXGE_NEXT_STRING(ptr); | |
316 | } | |
317 | ||
318 | if (found_mac) | |
319 | return 0; | |
320 | ||
321 | abort: | |
322 | device_printf(sc->dev, "failed to parse eeprom_strings\n"); | |
323 | ||
324 | return ENXIO; | |
325 | } | |
326 | ||
327 | #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ | |
328 | static void | |
329 | mxge_enable_nvidia_ecrc(mxge_softc_t *sc) | |
330 | { | |
331 | uint32_t val; | |
332 | unsigned long base, off; | |
333 | char *va, *cfgptr; | |
334 | device_t pdev, mcp55; | |
335 | uint16_t vendor_id, device_id, word; | |
336 | uintptr_t bus, slot, func, ivend, idev; | |
337 | uint32_t *ptr32; | |
338 | ||
339 | ||
340 | if (!mxge_nvidia_ecrc_enable) | |
341 | return; | |
342 | ||
343 | pdev = device_get_parent(device_get_parent(sc->dev)); | |
344 | if (pdev == NULL) { | |
345 | device_printf(sc->dev, "could not find parent?\n"); | |
346 | return; | |
347 | } | |
348 | vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); | |
349 | device_id = pci_read_config(pdev, PCIR_DEVICE, 2); | |
350 | ||
351 | if (vendor_id != 0x10de) | |
352 | return; | |
353 | ||
354 | base = 0; | |
355 | ||
356 | if (device_id == 0x005d) { | |
357 | /* ck804, base address is magic */ | |
358 | base = 0xe0000000UL; | |
359 | } else if (device_id >= 0x0374 && device_id <= 0x378) { | |
360 | /* mcp55, base address stored in chipset */ | |
361 | mcp55 = pci_find_bsf(0, 0, 0); | |
362 | if (mcp55 && | |
363 | 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && | |
364 | 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { | |
365 | word = pci_read_config(mcp55, 0x90, 2); | |
366 | base = ((unsigned long)word & 0x7ffeU) << 25; | |
367 | } | |
368 | } | |
369 | if (!base) | |
370 | return; | |
371 | ||
372 | /* XXXX | |
373 | Test below is commented because it is believed that doing | |
374 | config read/write beyond 0xff will access the config space | |
375 | for the next larger function. Uncomment this and remove | |
376 | the hacky pmap_mapdev() way of accessing config space when | |
377 | FreeBSD grows support for extended pcie config space access | |
378 | */ | |
379 | #if 0 | |
380 | /* See if we can, by some miracle, access the extended | |
381 | config space */ | |
382 | val = pci_read_config(pdev, 0x178, 4); | |
383 | if (val != 0xffffffff) { | |
384 | val |= 0x40; | |
385 | pci_write_config(pdev, 0x178, val, 4); | |
386 | return; | |
387 | } | |
388 | #endif | |
389 | /* Rather than using normal pci config space writes, we must | |
390 | * map the Nvidia config space ourselves. This is because on | |
391 | * opteron/nvidia class machine the 0xe000000 mapping is | |
392 | * handled by the nvidia chipset, that means the internal PCI | |
393 | * device (the on-chip northbridge), or the amd-8131 bridge | |
394 | * and things behind them are not visible by this method. | |
395 | */ | |
396 | ||
397 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
398 | PCI_IVAR_BUS, &bus); | |
399 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
400 | PCI_IVAR_SLOT, &slot); | |
401 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
402 | PCI_IVAR_FUNCTION, &func); | |
403 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
404 | PCI_IVAR_VENDOR, &ivend); | |
405 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
406 | PCI_IVAR_DEVICE, &idev); | |
407 | ||
408 | off = base | |
409 | + 0x00100000UL * (unsigned long)bus | |
410 | + 0x00001000UL * (unsigned long)(func | |
411 | + 8 * slot); | |
412 | ||
413 | /* map it into the kernel */ | |
414 | va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); | |
415 | ||
416 | ||
417 | if (va == NULL) { | |
418 | device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); | |
419 | return; | |
420 | } | |
421 | /* get a pointer to the config space mapped into the kernel */ | |
422 | cfgptr = va + (off & PAGE_MASK); | |
423 | ||
424 | /* make sure that we can really access it */ | |
425 | vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); | |
426 | device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); | |
427 | if (! (vendor_id == ivend && device_id == idev)) { | |
428 | device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", | |
429 | vendor_id, device_id); | |
430 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
431 | return; | |
432 | } | |
433 | ||
434 | ptr32 = (uint32_t*)(cfgptr + 0x178); | |
435 | val = *ptr32; | |
436 | ||
437 | if (val == 0xffffffff) { | |
438 | device_printf(sc->dev, "extended mapping failed\n"); | |
439 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
440 | return; | |
441 | } | |
442 | *ptr32 = val | 0x40; | |
443 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
444 | if (mxge_verbose) | |
445 | device_printf(sc->dev, | |
446 | "Enabled ECRC on upstream Nvidia bridge " | |
447 | "at %d:%d:%d\n", | |
448 | (int)bus, (int)slot, (int)func); | |
449 | return; | |
450 | } | |
451 | #else | |
452 | static void | |
453 | mxge_enable_nvidia_ecrc(mxge_softc_t *sc) | |
454 | { | |
455 | device_printf(sc->dev, | |
456 | "Nforce 4 chipset on non-x86/amd64!?!?!\n"); | |
457 | return; | |
458 | } | |
459 | #endif | |
460 | ||
461 | ||
462 | static int | |
463 | mxge_dma_test(mxge_softc_t *sc, int test_type) | |
464 | { | |
465 | mxge_cmd_t cmd; | |
466 | bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; | |
467 | int status; | |
468 | uint32_t len; | |
469 | char *test = " "; | |
470 | ||
471 | ||
472 | /* Run a small DMA test. | |
473 | * The magic multipliers to the length tell the firmware | |
474 | * to do DMA read, write, or read+write tests. The | |
475 | * results are returned in cmd.data0. The upper 16 | |
476 | * bits of the return is the number of transfers completed. | |
477 | * The lower 16 bits is the time in 0.5us ticks that the | |
478 | * transfers took to complete. | |
479 | */ | |
480 | ||
481 | len = sc->tx_boundary; | |
482 | ||
483 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
484 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
485 | cmd.data2 = len * 0x10000; | |
486 | status = mxge_send_cmd(sc, test_type, &cmd); | |
487 | if (status != 0) { | |
488 | test = "read"; | |
489 | goto abort; | |
490 | } | |
491 | sc->read_dma = ((cmd.data0>>16) * len * 2) / | |
492 | (cmd.data0 & 0xffff); | |
493 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
494 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
495 | cmd.data2 = len * 0x1; | |
496 | status = mxge_send_cmd(sc, test_type, &cmd); | |
497 | if (status != 0) { | |
498 | test = "write"; | |
499 | goto abort; | |
500 | } | |
501 | sc->write_dma = ((cmd.data0>>16) * len * 2) / | |
502 | (cmd.data0 & 0xffff); | |
503 | ||
504 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
505 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
506 | cmd.data2 = len * 0x10001; | |
507 | status = mxge_send_cmd(sc, test_type, &cmd); | |
508 | if (status != 0) { | |
509 | test = "read/write"; | |
510 | goto abort; | |
511 | } | |
512 | sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / | |
513 | (cmd.data0 & 0xffff); | |
514 | ||
515 | abort: | |
516 | if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) | |
517 | device_printf(sc->dev, "DMA %s benchmark failed: %d\n", | |
518 | test, status); | |
519 | ||
520 | return status; | |
521 | } | |
522 | ||
523 | /* | |
524 | * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput | |
525 | * when the PCI-E Completion packets are aligned on an 8-byte | |
526 | * boundary. Some PCI-E chip sets always align Completion packets; on | |
527 | * the ones that do not, the alignment can be enforced by enabling | |
528 | * ECRC generation (if supported). | |
529 | * | |
530 | * When PCI-E Completion packets are not aligned, it is actually more | |
531 | * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. | |
532 | * | |
533 | * If the driver can neither enable ECRC nor verify that it has | |
534 | * already been enabled, then it must use a firmware image which works | |
535 | * around unaligned completion packets (ethp_z8e.dat), and it should | |
536 | * also ensure that it never gives the device a Read-DMA which is | |
537 | * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is | |
538 | * enabled, then the driver should use the aligned (eth_z8e.dat) | |
539 | * firmware image, and set tx_boundary to 4KB. | |
540 | */ | |
541 | ||
542 | static int | |
543 | mxge_firmware_probe(mxge_softc_t *sc) | |
544 | { | |
545 | device_t dev = sc->dev; | |
546 | int reg, status; | |
547 | uint16_t pectl; | |
548 | ||
549 | sc->tx_boundary = 4096; | |
550 | /* | |
551 | * Verify the max read request size was set to 4KB | |
552 | * before trying the test with 4KB. | |
553 | */ | |
554 | if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { | |
555 | pectl = pci_read_config(dev, reg + 0x8, 2); | |
556 | if ((pectl & (5 << 12)) != (5 << 12)) { | |
557 | device_printf(dev, "Max Read Req. size != 4k (0x%x\n", | |
558 | pectl); | |
559 | sc->tx_boundary = 2048; | |
560 | } | |
561 | } | |
562 | ||
563 | /* | |
564 | * load the optimized firmware (which assumes aligned PCIe | |
565 | * completions) in order to see if it works on this host. | |
566 | */ | |
567 | sc->fw_name = mxge_fw_aligned; | |
568 | status = mxge_load_firmware(sc, 1); | |
569 | if (status != 0) { | |
570 | return status; | |
571 | } | |
572 | ||
573 | /* | |
574 | * Enable ECRC if possible | |
575 | */ | |
576 | mxge_enable_nvidia_ecrc(sc); | |
577 | ||
578 | /* | |
579 | * Run a DMA test which watches for unaligned completions and | |
580 | * aborts on the first one seen. | |
581 | */ | |
582 | ||
583 | status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); | |
584 | if (status == 0) | |
585 | return 0; /* keep the aligned firmware */ | |
586 | ||
587 | if (status != E2BIG) | |
588 | device_printf(dev, "DMA test failed: %d\n", status); | |
589 | if (status == ENOSYS) | |
590 | device_printf(dev, "Falling back to ethp! " | |
591 | "Please install up to date fw\n"); | |
592 | return status; | |
593 | } | |
594 | ||
595 | static int | |
596 | mxge_select_firmware(mxge_softc_t *sc) | |
597 | { | |
598 | int aligned = 0; | |
599 | ||
600 | ||
601 | if (mxge_force_firmware != 0) { | |
602 | if (mxge_force_firmware == 1) | |
603 | aligned = 1; | |
604 | else | |
605 | aligned = 0; | |
606 | if (mxge_verbose) | |
607 | device_printf(sc->dev, | |
608 | "Assuming %s completions (forced)\n", | |
609 | aligned ? "aligned" : "unaligned"); | |
610 | goto abort; | |
611 | } | |
612 | ||
613 | /* if the PCIe link width is 4 or less, we can use the aligned | |
614 | firmware and skip any checks */ | |
615 | if (sc->link_width != 0 && sc->link_width <= 4) { | |
616 | device_printf(sc->dev, | |
617 | "PCIe x%d Link, expect reduced performance\n", | |
618 | sc->link_width); | |
619 | aligned = 1; | |
620 | goto abort; | |
621 | } | |
622 | ||
623 | if (0 == mxge_firmware_probe(sc)) | |
624 | return 0; | |
625 | ||
626 | abort: | |
627 | if (aligned) { | |
628 | sc->fw_name = mxge_fw_aligned; | |
629 | sc->tx_boundary = 4096; | |
630 | } else { | |
631 | sc->fw_name = mxge_fw_unaligned; | |
632 | sc->tx_boundary = 2048; | |
633 | } | |
634 | return (mxge_load_firmware(sc, 0)); | |
635 | } | |
636 | ||
637 | union qualhack | |
638 | { | |
639 | const char *ro_char; | |
640 | char *rw_char; | |
641 | }; | |
642 | ||
643 | static int | |
644 | mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) | |
645 | { | |
646 | ||
647 | ||
648 | if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { | |
649 | device_printf(sc->dev, "Bad firmware type: 0x%x\n", | |
650 | be32toh(hdr->mcp_type)); | |
651 | return EIO; | |
652 | } | |
653 | ||
654 | /* save firmware version for sysctl */ | |
655 | strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); | |
656 | if (mxge_verbose) | |
657 | device_printf(sc->dev, "firmware id: %s\n", hdr->version); | |
658 | ||
659 | sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, | |
660 | &sc->fw_ver_minor, &sc->fw_ver_tiny); | |
661 | ||
662 | if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR | |
663 | && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { | |
664 | device_printf(sc->dev, "Found firmware version %s\n", | |
665 | sc->fw_version); | |
666 | device_printf(sc->dev, "Driver needs %d.%d\n", | |
667 | MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); | |
668 | return EINVAL; | |
669 | } | |
670 | return 0; | |
671 | ||
672 | } | |
673 | ||
674 | static void * | |
675 | z_alloc(void *nil, u_int items, u_int size) | |
676 | { | |
677 | void *ptr; | |
678 | ||
679 | ptr = malloc(items * size, M_TEMP, M_NOWAIT); | |
680 | return ptr; | |
681 | } | |
682 | ||
683 | static void | |
684 | z_free(void *nil, void *ptr) | |
685 | { | |
686 | free(ptr, M_TEMP); | |
687 | } | |
688 | ||
689 | ||
690 | static int | |
691 | mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) | |
692 | { | |
693 | z_stream zs; | |
694 | char *inflate_buffer; | |
695 | const struct firmware *fw; | |
696 | const mcp_gen_header_t *hdr; | |
697 | unsigned hdr_offset; | |
698 | int status; | |
699 | unsigned int i; | |
700 | char dummy; | |
701 | size_t fw_len; | |
702 | ||
703 | fw = firmware_get(sc->fw_name); | |
704 | if (fw == NULL) { | |
705 | device_printf(sc->dev, "Could not find firmware image %s\n", | |
706 | sc->fw_name); | |
707 | return ENOENT; | |
708 | } | |
709 | ||
710 | ||
711 | ||
712 | /* setup zlib and decompress f/w */ | |
713 | bzero(&zs, sizeof (zs)); | |
714 | zs.zalloc = z_alloc; | |
715 | zs.zfree = z_free; | |
716 | status = inflateInit(&zs); | |
717 | if (status != Z_OK) { | |
718 | status = EIO; | |
719 | goto abort_with_fw; | |
720 | } | |
721 | ||
722 | /* the uncompressed size is stored as the firmware version, | |
723 | which would otherwise go unused */ | |
724 | fw_len = (size_t) fw->version; | |
725 | inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); | |
726 | if (inflate_buffer == NULL) | |
727 | goto abort_with_zs; | |
728 | zs.avail_in = fw->datasize; | |
729 | zs.next_in = __DECONST(char *, fw->data); | |
730 | zs.avail_out = fw_len; | |
731 | zs.next_out = inflate_buffer; | |
732 | status = inflate(&zs, Z_FINISH); | |
733 | if (status != Z_STREAM_END) { | |
734 | device_printf(sc->dev, "zlib %d\n", status); | |
735 | status = EIO; | |
736 | goto abort_with_buffer; | |
737 | } | |
738 | ||
739 | /* check id */ | |
740 | hdr_offset = htobe32(*(const uint32_t *) | |
741 | (inflate_buffer + MCP_HEADER_PTR_OFFSET)); | |
742 | if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { | |
743 | device_printf(sc->dev, "Bad firmware file"); | |
744 | status = EIO; | |
745 | goto abort_with_buffer; | |
746 | } | |
747 | hdr = (const void*)(inflate_buffer + hdr_offset); | |
748 | ||
749 | status = mxge_validate_firmware(sc, hdr); | |
750 | if (status != 0) | |
751 | goto abort_with_buffer; | |
752 | ||
753 | /* Copy the inflated firmware to NIC SRAM. */ | |
754 | for (i = 0; i < fw_len; i += 256) { | |
755 | mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, | |
756 | inflate_buffer + i, | |
757 | min(256U, (unsigned)(fw_len - i))); | |
758 | wmb(); | |
759 | dummy = *sc->sram; | |
760 | wmb(); | |
761 | } | |
762 | ||
763 | *limit = fw_len; | |
764 | status = 0; | |
765 | abort_with_buffer: | |
766 | free(inflate_buffer, M_TEMP); | |
767 | abort_with_zs: | |
768 | inflateEnd(&zs); | |
769 | abort_with_fw: | |
770 | firmware_put(fw, FIRMWARE_UNLOAD); | |
771 | return status; | |
772 | } | |
773 | ||
774 | /* | |
775 | * Enable or disable periodic RDMAs from the host to make certain | |
776 | * chipsets resend dropped PCIe messages | |
777 | */ | |
778 | ||
779 | static void | |
780 | mxge_dummy_rdma(mxge_softc_t *sc, int enable) | |
781 | { | |
782 | char buf_bytes[72]; | |
783 | volatile uint32_t *confirm; | |
784 | volatile char *submit; | |
785 | uint32_t *buf, dma_low, dma_high; | |
786 | int i; | |
787 | ||
788 | buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
789 | ||
790 | /* clear confirmation addr */ | |
791 | confirm = (volatile uint32_t *)sc->cmd; | |
792 | *confirm = 0; | |
793 | wmb(); | |
794 | ||
795 | /* send an rdma command to the PCIe engine, and wait for the | |
796 | response in the confirmation address. The firmware should | |
797 | write a -1 there to indicate it is alive and well | |
798 | */ | |
799 | ||
800 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
801 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
802 | buf[0] = htobe32(dma_high); /* confirm addr MSW */ | |
803 | buf[1] = htobe32(dma_low); /* confirm addr LSW */ | |
804 | buf[2] = htobe32(0xffffffff); /* confirm data */ | |
805 | dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); | |
806 | dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); | |
807 | buf[3] = htobe32(dma_high); /* dummy addr MSW */ | |
808 | buf[4] = htobe32(dma_low); /* dummy addr LSW */ | |
809 | buf[5] = htobe32(enable); /* enable? */ | |
810 | ||
811 | ||
812 | submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); | |
813 | ||
814 | mxge_pio_copy(submit, buf, 64); | |
815 | wmb(); | |
816 | DELAY(1000); | |
817 | wmb(); | |
818 | i = 0; | |
819 | while (*confirm != 0xffffffff && i < 20) { | |
820 | DELAY(1000); | |
821 | i++; | |
822 | } | |
823 | if (*confirm != 0xffffffff) { | |
824 | device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", | |
825 | (enable ? "enable" : "disable"), confirm, | |
826 | *confirm); | |
827 | } | |
828 | return; | |
829 | } | |
830 | ||
831 | static int | |
832 | mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) | |
833 | { | |
834 | mcp_cmd_t *buf; | |
835 | char buf_bytes[sizeof(*buf) + 8]; | |
836 | volatile mcp_cmd_response_t *response = sc->cmd; | |
837 | volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; | |
838 | uint32_t dma_low, dma_high; | |
839 | int err, sleep_total = 0; | |
840 | ||
841 | /* ensure buf is aligned to 8 bytes */ | |
842 | buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
843 | ||
844 | buf->data0 = htobe32(data->data0); | |
845 | buf->data1 = htobe32(data->data1); | |
846 | buf->data2 = htobe32(data->data2); | |
847 | buf->cmd = htobe32(cmd); | |
848 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
849 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
850 | ||
851 | buf->response_addr.low = htobe32(dma_low); | |
852 | buf->response_addr.high = htobe32(dma_high); | |
853 | mtx_lock(&sc->cmd_mtx); | |
854 | response->result = 0xffffffff; | |
855 | wmb(); | |
856 | mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); | |
857 | ||
858 | /* wait up to 20ms */ | |
859 | err = EAGAIN; | |
860 | for (sleep_total = 0; sleep_total < 20; sleep_total++) { | |
861 | bus_dmamap_sync(sc->cmd_dma.dmat, | |
862 | sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); | |
863 | wmb(); | |
864 | switch (be32toh(response->result)) { | |
865 | case 0: | |
866 | data->data0 = be32toh(response->data); | |
867 | err = 0; | |
868 | break; | |
869 | case 0xffffffff: | |
870 | DELAY(1000); | |
871 | break; | |
872 | case MXGEFW_CMD_UNKNOWN: | |
873 | err = ENOSYS; | |
874 | break; | |
875 | case MXGEFW_CMD_ERROR_UNALIGNED: | |
876 | err = E2BIG; | |
877 | break; | |
878 | case MXGEFW_CMD_ERROR_BUSY: | |
879 | err = EBUSY; | |
880 | break; | |
881 | default: | |
882 | device_printf(sc->dev, | |
883 | "mxge: command %d " | |
884 | "failed, result = %d\n", | |
885 | cmd, be32toh(response->result)); | |
886 | err = ENXIO; | |
887 | break; | |
888 | } | |
889 | if (err != EAGAIN) | |
890 | break; | |
891 | } | |
892 | if (err == EAGAIN) | |
893 | device_printf(sc->dev, "mxge: command %d timed out" | |
894 | "result = %d\n", | |
895 | cmd, be32toh(response->result)); | |
896 | mtx_unlock(&sc->cmd_mtx); | |
897 | return err; | |
898 | } | |
899 | ||
900 | static int | |
901 | mxge_adopt_running_firmware(mxge_softc_t *sc) | |
902 | { | |
903 | struct mcp_gen_header *hdr; | |
904 | const size_t bytes = sizeof (struct mcp_gen_header); | |
905 | size_t hdr_offset; | |
906 | int status; | |
907 | ||
908 | /* find running firmware header */ | |
909 | hdr_offset = htobe32(*(volatile uint32_t *) | |
910 | (sc->sram + MCP_HEADER_PTR_OFFSET)); | |
911 | ||
912 | if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { | |
913 | device_printf(sc->dev, | |
914 | "Running firmware has bad header offset (%d)\n", | |
915 | (int)hdr_offset); | |
916 | return EIO; | |
917 | } | |
918 | ||
919 | /* copy header of running firmware from SRAM to host memory to | |
920 | * validate firmware */ | |
921 | hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); | |
922 | if (hdr == NULL) { | |
923 | device_printf(sc->dev, "could not malloc firmware hdr\n"); | |
924 | return ENOMEM; | |
925 | } | |
926 | bus_space_read_region_1(rman_get_bustag(sc->mem_res), | |
927 | rman_get_bushandle(sc->mem_res), | |
928 | hdr_offset, (char *)hdr, bytes); | |
929 | status = mxge_validate_firmware(sc, hdr); | |
930 | free(hdr, M_DEVBUF); | |
931 | ||
932 | /* | |
933 | * check to see if adopted firmware has bug where adopting | |
934 | * it will cause broadcasts to be filtered unless the NIC | |
935 | * is kept in ALLMULTI mode | |
936 | */ | |
937 | if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && | |
938 | sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { | |
939 | sc->adopted_rx_filter_bug = 1; | |
940 | device_printf(sc->dev, "Adopting fw %d.%d.%d: " | |
941 | "working around rx filter bug\n", | |
942 | sc->fw_ver_major, sc->fw_ver_minor, | |
943 | sc->fw_ver_tiny); | |
944 | } | |
945 | ||
946 | return status; | |
947 | } | |
948 | ||
949 | ||
950 | static int | |
951 | mxge_load_firmware(mxge_softc_t *sc, int adopt) | |
952 | { | |
953 | volatile uint32_t *confirm; | |
954 | volatile char *submit; | |
955 | char buf_bytes[72]; | |
956 | uint32_t *buf, size, dma_low, dma_high; | |
957 | int status, i; | |
958 | ||
959 | buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
960 | ||
961 | size = sc->sram_size; | |
962 | status = mxge_load_firmware_helper(sc, &size); | |
963 | if (status) { | |
964 | if (!adopt) | |
965 | return status; | |
966 | /* Try to use the currently running firmware, if | |
967 | it is new enough */ | |
968 | status = mxge_adopt_running_firmware(sc); | |
969 | if (status) { | |
970 | device_printf(sc->dev, | |
971 | "failed to adopt running firmware\n"); | |
972 | return status; | |
973 | } | |
974 | device_printf(sc->dev, | |
975 | "Successfully adopted running firmware\n"); | |
976 | if (sc->tx_boundary == 4096) { | |
977 | device_printf(sc->dev, | |
978 | "Using firmware currently running on NIC" | |
979 | ". For optimal\n"); | |
980 | device_printf(sc->dev, | |
981 | "performance consider loading optimized " | |
982 | "firmware\n"); | |
983 | } | |
984 | sc->fw_name = mxge_fw_unaligned; | |
985 | sc->tx_boundary = 2048; | |
986 | return 0; | |
987 | } | |
988 | /* clear confirmation addr */ | |
989 | confirm = (volatile uint32_t *)sc->cmd; | |
990 | *confirm = 0; | |
991 | wmb(); | |
992 | /* send a reload command to the bootstrap MCP, and wait for the | |
993 | response in the confirmation address. The firmware should | |
994 | write a -1 there to indicate it is alive and well | |
995 | */ | |
996 | ||
997 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
998 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
999 | ||
1000 | buf[0] = htobe32(dma_high); /* confirm addr MSW */ | |
1001 | buf[1] = htobe32(dma_low); /* confirm addr LSW */ | |
1002 | buf[2] = htobe32(0xffffffff); /* confirm data */ | |
1003 | ||
1004 | /* FIX: All newest firmware should un-protect the bottom of | |
1005 | the sram before handoff. However, the very first interfaces | |
1006 | do not. Therefore the handoff copy must skip the first 8 bytes | |
1007 | */ | |
1008 | /* where the code starts*/ | |
1009 | buf[3] = htobe32(MXGE_FW_OFFSET + 8); | |
1010 | buf[4] = htobe32(size - 8); /* length of code */ | |
1011 | buf[5] = htobe32(8); /* where to copy to */ | |
1012 | buf[6] = htobe32(0); /* where to jump to */ | |
1013 | ||
1014 | submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); | |
1015 | mxge_pio_copy(submit, buf, 64); | |
1016 | wmb(); | |
1017 | DELAY(1000); | |
1018 | wmb(); | |
1019 | i = 0; | |
1020 | while (*confirm != 0xffffffff && i < 20) { | |
1021 | DELAY(1000*10); | |
1022 | i++; | |
1023 | bus_dmamap_sync(sc->cmd_dma.dmat, | |
1024 | sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); | |
1025 | } | |
1026 | if (*confirm != 0xffffffff) { | |
1027 | device_printf(sc->dev,"handoff failed (%p = 0x%x)", | |
1028 | confirm, *confirm); | |
1029 | ||
1030 | return ENXIO; | |
1031 | } | |
1032 | return 0; | |
1033 | } | |
1034 | ||
1035 | static int | |
1036 | mxge_update_mac_address(mxge_softc_t *sc) | |
1037 | { | |
1038 | mxge_cmd_t cmd; | |
1039 | uint8_t *addr = sc->mac_addr; | |
1040 | int status; | |
1041 | ||
1042 | ||
1043 | cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) | |
1044 | | (addr[2] << 8) | addr[3]); | |
1045 | ||
1046 | cmd.data1 = ((addr[4] << 8) | (addr[5])); | |
1047 | ||
1048 | status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); | |
1049 | return status; | |
1050 | } | |
1051 | ||
1052 | static int | |
1053 | mxge_change_pause(mxge_softc_t *sc, int pause) | |
1054 | { | |
1055 | mxge_cmd_t cmd; | |
1056 | int status; | |
1057 | ||
1058 | if (pause) | |
1059 | status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, | |
1060 | &cmd); | |
1061 | else | |
1062 | status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, | |
1063 | &cmd); | |
1064 | ||
1065 | if (status) { | |
1066 | device_printf(sc->dev, "Failed to set flow control mode\n"); | |
1067 | return ENXIO; | |
1068 | } | |
1069 | sc->pause = pause; | |
1070 | return 0; | |
1071 | } | |
1072 | ||
1073 | static void | |
1074 | mxge_change_promisc(mxge_softc_t *sc, int promisc) | |
1075 | { | |
1076 | mxge_cmd_t cmd; | |
1077 | int status; | |
1078 | ||
1079 | if (mxge_always_promisc) | |
1080 | promisc = 1; | |
1081 | ||
1082 | if (promisc) | |
1083 | status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, | |
1084 | &cmd); | |
1085 | else | |
1086 | status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, | |
1087 | &cmd); | |
1088 | ||
1089 | if (status) { | |
1090 | device_printf(sc->dev, "Failed to set promisc mode\n"); | |
1091 | } | |
1092 | } | |
1093 | ||
1094 | static void | |
1095 | mxge_set_multicast_list(mxge_softc_t *sc) | |
1096 | { | |
1097 | mxge_cmd_t cmd; | |
1098 | struct ifmultiaddr *ifma; | |
1099 | struct ifnet *ifp = sc->ifp; | |
1100 | int err; | |
1101 | ||
1102 | /* This firmware is known to not support multicast */ | |
1103 | if (!sc->fw_multicast_support) | |
1104 | return; | |
1105 | ||
1106 | /* Disable multicast filtering while we play with the lists*/ | |
1107 | err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); | |
1108 | if (err != 0) { | |
1109 | device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," | |
1110 | " error status: %d\n", err); | |
1111 | return; | |
1112 | } | |
1113 | ||
1114 | if (sc->adopted_rx_filter_bug) | |
1115 | return; | |
1116 | ||
1117 | if (ifp->if_flags & IFF_ALLMULTI) | |
1118 | /* request to disable multicast filtering, so quit here */ | |
1119 | return; | |
1120 | ||
1121 | /* Flush all the filters */ | |
1122 | ||
1123 | err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); | |
1124 | if (err != 0) { | |
1125 | device_printf(sc->dev, | |
1126 | "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" | |
1127 | ", error status: %d\n", err); | |
1128 | return; | |
1129 | } | |
1130 | ||
1131 | /* Walk the multicast list, and add each address */ | |
1132 | ||
1133 | if_maddr_rlock(ifp); | |
1134 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { | |
1135 | if (ifma->ifma_addr->sa_family != AF_LINK) | |
1136 | continue; | |
1137 | bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), | |
1138 | &cmd.data0, 4); | |
1139 | bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, | |
1140 | &cmd.data1, 2); | |
1141 | cmd.data0 = htonl(cmd.data0); | |
1142 | cmd.data1 = htonl(cmd.data1); | |
1143 | err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); | |
1144 | if (err != 0) { | |
1145 | device_printf(sc->dev, "Failed " | |
1146 | "MXGEFW_JOIN_MULTICAST_GROUP, error status:" | |
1147 | "%d\t", err); | |
1148 | /* abort, leaving multicast filtering off */ | |
1149 | if_maddr_runlock(ifp); | |
1150 | return; | |
1151 | } | |
1152 | } | |
1153 | if_maddr_runlock(ifp); | |
1154 | /* Enable multicast filtering */ | |
1155 | err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); | |
1156 | if (err != 0) { | |
1157 | device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" | |
1158 | ", error status: %d\n", err); | |
1159 | } | |
1160 | } | |
1161 | ||
1162 | static int | |
1163 | mxge_max_mtu(mxge_softc_t *sc) | |
1164 | { | |
1165 | mxge_cmd_t cmd; | |
1166 | int status; | |
1167 | ||
1168 | if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) | |
1169 | return MXGEFW_MAX_MTU - MXGEFW_PAD; | |
1170 | ||
1171 | /* try to set nbufs to see if it we can | |
1172 | use virtually contiguous jumbos */ | |
1173 | cmd.data0 = 0; | |
1174 | status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, | |
1175 | &cmd); | |
1176 | if (status == 0) | |
1177 | return MXGEFW_MAX_MTU - MXGEFW_PAD; | |
1178 | ||
1179 | /* otherwise, we're limited to MJUMPAGESIZE */ | |
1180 | return MJUMPAGESIZE - MXGEFW_PAD; | |
1181 | } | |
1182 | ||
1183 | static int | |
1184 | mxge_reset(mxge_softc_t *sc, int interrupts_setup) | |
1185 | { | |
1186 | struct mxge_slice_state *ss; | |
1187 | mxge_rx_done_t *rx_done; | |
1188 | volatile uint32_t *irq_claim; | |
1189 | mxge_cmd_t cmd; | |
1190 | int slice, status; | |
1191 | ||
1192 | /* try to send a reset command to the card to see if it | |
1193 | is alive */ | |
1194 | memset(&cmd, 0, sizeof (cmd)); | |
1195 | status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); | |
1196 | if (status != 0) { | |
1197 | device_printf(sc->dev, "failed reset\n"); | |
1198 | return ENXIO; | |
1199 | } | |
1200 | ||
1201 | mxge_dummy_rdma(sc, 1); | |
1202 | ||
1203 | ||
1204 | /* set the intrq size */ | |
1205 | cmd.data0 = sc->rx_ring_size; | |
1206 | status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); | |
1207 | ||
1208 | /* | |
1209 | * Even though we already know how many slices are supported | |
1210 | * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES | |
1211 | * has magic side effects, and must be called after a reset. | |
1212 | * It must be called prior to calling any RSS related cmds, | |
1213 | * including assigning an interrupt queue for anything but | |
1214 | * slice 0. It must also be called *after* | |
1215 | * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by | |
1216 | * the firmware to compute offsets. | |
1217 | */ | |
1218 | ||
1219 | if (sc->num_slices > 1) { | |
1220 | /* ask the maximum number of slices it supports */ | |
1221 | status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, | |
1222 | &cmd); | |
1223 | if (status != 0) { | |
1224 | device_printf(sc->dev, | |
1225 | "failed to get number of slices\n"); | |
1226 | return status; | |
1227 | } | |
1228 | /* | |
1229 | * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior | |
1230 | * to setting up the interrupt queue DMA | |
1231 | */ | |
1232 | cmd.data0 = sc->num_slices; | |
1233 | cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; | |
1234 | #ifdef IFNET_BUF_RING | |
1235 | cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; | |
1236 | #endif | |
1237 | status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, | |
1238 | &cmd); | |
1239 | if (status != 0) { | |
1240 | device_printf(sc->dev, | |
1241 | "failed to set number of slices\n"); | |
1242 | return status; | |
1243 | } | |
1244 | } | |
1245 | ||
1246 | ||
1247 | if (interrupts_setup) { | |
1248 | /* Now exchange information about interrupts */ | |
1249 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1250 | rx_done = &sc->ss[slice].rx_done; | |
1251 | memset(rx_done->entry, 0, sc->rx_ring_size); | |
1252 | cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); | |
1253 | cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); | |
1254 | cmd.data2 = slice; | |
1255 | status |= mxge_send_cmd(sc, | |
1256 | MXGEFW_CMD_SET_INTRQ_DMA, | |
1257 | &cmd); | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | status |= mxge_send_cmd(sc, | |
1262 | MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); | |
1263 | ||
1264 | ||
1265 | sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1266 | ||
1267 | status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); | |
1268 | irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1269 | ||
1270 | ||
1271 | status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, | |
1272 | &cmd); | |
1273 | sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1274 | if (status != 0) { | |
1275 | device_printf(sc->dev, "failed set interrupt parameters\n"); | |
1276 | return status; | |
1277 | } | |
1278 | ||
1279 | ||
1280 | *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); | |
1281 | ||
1282 | ||
1283 | /* run a DMA benchmark */ | |
1284 | (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); | |
1285 | ||
1286 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1287 | ss = &sc->ss[slice]; | |
1288 | ||
1289 | ss->irq_claim = irq_claim + (2 * slice); | |
1290 | /* reset mcp/driver shared state back to 0 */ | |
1291 | ss->rx_done.idx = 0; | |
1292 | ss->rx_done.cnt = 0; | |
1293 | ss->tx.req = 0; | |
1294 | ss->tx.done = 0; | |
1295 | ss->tx.pkt_done = 0; | |
1296 | ss->tx.queue_active = 0; | |
1297 | ss->tx.activate = 0; | |
1298 | ss->tx.deactivate = 0; | |
1299 | ss->tx.wake = 0; | |
1300 | ss->tx.defrag = 0; | |
1301 | ss->tx.stall = 0; | |
1302 | ss->rx_big.cnt = 0; | |
1303 | ss->rx_small.cnt = 0; | |
1304 | ss->lro_bad_csum = 0; | |
1305 | ss->lro_queued = 0; | |
1306 | ss->lro_flushed = 0; | |
1307 | if (ss->fw_stats != NULL) { | |
1308 | ss->fw_stats->valid = 0; | |
1309 | ss->fw_stats->send_done_count = 0; | |
1310 | } | |
1311 | } | |
1312 | sc->rdma_tags_available = 15; | |
1313 | status = mxge_update_mac_address(sc); | |
1314 | mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); | |
1315 | mxge_change_pause(sc, sc->pause); | |
1316 | mxge_set_multicast_list(sc); | |
1317 | return status; | |
1318 | } | |
1319 | ||
1320 | static int | |
1321 | mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) | |
1322 | { | |
1323 | mxge_softc_t *sc; | |
1324 | unsigned int intr_coal_delay; | |
1325 | int err; | |
1326 | ||
1327 | sc = arg1; | |
1328 | intr_coal_delay = sc->intr_coal_delay; | |
1329 | err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); | |
1330 | if (err != 0) { | |
1331 | return err; | |
1332 | } | |
1333 | if (intr_coal_delay == sc->intr_coal_delay) | |
1334 | return 0; | |
1335 | ||
1336 | if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) | |
1337 | return EINVAL; | |
1338 | ||
1339 | mtx_lock(&sc->driver_mtx); | |
1340 | *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); | |
1341 | sc->intr_coal_delay = intr_coal_delay; | |
1342 | ||
1343 | mtx_unlock(&sc->driver_mtx); | |
1344 | return err; | |
1345 | } | |
1346 | ||
1347 | static int | |
1348 | mxge_change_flow_control(SYSCTL_HANDLER_ARGS) | |
1349 | { | |
1350 | mxge_softc_t *sc; | |
1351 | unsigned int enabled; | |
1352 | int err; | |
1353 | ||
1354 | sc = arg1; | |
1355 | enabled = sc->pause; | |
1356 | err = sysctl_handle_int(oidp, &enabled, arg2, req); | |
1357 | if (err != 0) { | |
1358 | return err; | |
1359 | } | |
1360 | if (enabled == sc->pause) | |
1361 | return 0; | |
1362 | ||
1363 | mtx_lock(&sc->driver_mtx); | |
1364 | err = mxge_change_pause(sc, enabled); | |
1365 | mtx_unlock(&sc->driver_mtx); | |
1366 | return err; | |
1367 | } | |
1368 | ||
1369 | static int | |
1370 | mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) | |
1371 | { | |
1372 | struct ifnet *ifp; | |
1373 | int err = 0; | |
1374 | ||
1375 | ifp = sc->ifp; | |
1376 | if (lro_cnt == 0) | |
1377 | ifp->if_capenable &= ~IFCAP_LRO; | |
1378 | else | |
1379 | ifp->if_capenable |= IFCAP_LRO; | |
1380 | sc->lro_cnt = lro_cnt; | |
1381 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
1382 | mxge_close(sc); | |
1383 | err = mxge_open(sc); | |
1384 | } | |
1385 | return err; | |
1386 | } | |
1387 | ||
1388 | static int | |
1389 | mxge_change_lro(SYSCTL_HANDLER_ARGS) | |
1390 | { | |
1391 | mxge_softc_t *sc; | |
1392 | unsigned int lro_cnt; | |
1393 | int err; | |
1394 | ||
1395 | sc = arg1; | |
1396 | lro_cnt = sc->lro_cnt; | |
1397 | err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); | |
1398 | if (err != 0) | |
1399 | return err; | |
1400 | ||
1401 | if (lro_cnt == sc->lro_cnt) | |
1402 | return 0; | |
1403 | ||
1404 | if (lro_cnt > 128) | |
1405 | return EINVAL; | |
1406 | ||
1407 | mtx_lock(&sc->driver_mtx); | |
1408 | err = mxge_change_lro_locked(sc, lro_cnt); | |
1409 | mtx_unlock(&sc->driver_mtx); | |
1410 | return err; | |
1411 | } | |
1412 | ||
1413 | static int | |
1414 | mxge_handle_be32(SYSCTL_HANDLER_ARGS) | |
1415 | { | |
1416 | int err; | |
1417 | ||
1418 | if (arg1 == NULL) | |
1419 | return EFAULT; | |
1420 | arg2 = be32toh(*(int *)arg1); | |
1421 | arg1 = NULL; | |
1422 | err = sysctl_handle_int(oidp, arg1, arg2, req); | |
1423 | ||
1424 | return err; | |
1425 | } | |
1426 | ||
1427 | static void | |
1428 | mxge_rem_sysctls(mxge_softc_t *sc) | |
1429 | { | |
1430 | struct mxge_slice_state *ss; | |
1431 | int slice; | |
1432 | ||
1433 | if (sc->slice_sysctl_tree == NULL) | |
1434 | return; | |
1435 | ||
1436 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1437 | ss = &sc->ss[slice]; | |
1438 | if (ss == NULL || ss->sysctl_tree == NULL) | |
1439 | continue; | |
1440 | sysctl_ctx_free(&ss->sysctl_ctx); | |
1441 | ss->sysctl_tree = NULL; | |
1442 | } | |
1443 | sysctl_ctx_free(&sc->slice_sysctl_ctx); | |
1444 | sc->slice_sysctl_tree = NULL; | |
1445 | } | |
1446 | ||
1447 | static void | |
1448 | mxge_add_sysctls(mxge_softc_t *sc) | |
1449 | { | |
1450 | struct sysctl_ctx_list *ctx; | |
1451 | struct sysctl_oid_list *children; | |
1452 | mcp_irq_data_t *fw; | |
1453 | struct mxge_slice_state *ss; | |
1454 | int slice; | |
1455 | char slice_num[8]; | |
1456 | ||
1457 | ctx = device_get_sysctl_ctx(sc->dev); | |
1458 | children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); | |
1459 | fw = sc->ss[0].fw_stats; | |
1460 | ||
1461 | /* random information */ | |
1462 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1463 | "firmware_version", | |
1464 | CTLFLAG_RD, &sc->fw_version, | |
1465 | 0, "firmware version"); | |
1466 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1467 | "serial_number", | |
1468 | CTLFLAG_RD, &sc->serial_number_string, | |
1469 | 0, "serial number"); | |
1470 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1471 | "product_code", | |
1472 | CTLFLAG_RD, &sc->product_code_string, | |
1473 | 0, "product_code"); | |
1474 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1475 | "pcie_link_width", | |
1476 | CTLFLAG_RD, &sc->link_width, | |
1477 | 0, "tx_boundary"); | |
1478 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1479 | "tx_boundary", | |
1480 | CTLFLAG_RD, &sc->tx_boundary, | |
1481 | 0, "tx_boundary"); | |
1482 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1483 | "write_combine", | |
1484 | CTLFLAG_RD, &sc->wc, | |
1485 | 0, "write combining PIO?"); | |
1486 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1487 | "read_dma_MBs", | |
1488 | CTLFLAG_RD, &sc->read_dma, | |
1489 | 0, "DMA Read speed in MB/s"); | |
1490 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1491 | "write_dma_MBs", | |
1492 | CTLFLAG_RD, &sc->write_dma, | |
1493 | 0, "DMA Write speed in MB/s"); | |
1494 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1495 | "read_write_dma_MBs", | |
1496 | CTLFLAG_RD, &sc->read_write_dma, | |
1497 | 0, "DMA concurrent Read/Write speed in MB/s"); | |
1498 | ||
1499 | ||
1500 | /* performance related tunables */ | |
1501 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1502 | "intr_coal_delay", | |
1503 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1504 | 0, mxge_change_intr_coal, | |
1505 | "I", "interrupt coalescing delay in usecs"); | |
1506 | ||
1507 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1508 | "flow_control_enabled", | |
1509 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1510 | 0, mxge_change_flow_control, | |
1511 | "I", "interrupt coalescing delay in usecs"); | |
1512 | ||
1513 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1514 | "deassert_wait", | |
1515 | CTLFLAG_RW, &mxge_deassert_wait, | |
1516 | 0, "Wait for IRQ line to go low in ihandler"); | |
1517 | ||
1518 | /* stats block from firmware is in network byte order. | |
1519 | Need to swap it */ | |
1520 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1521 | "link_up", | |
1522 | CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, | |
1523 | 0, mxge_handle_be32, | |
1524 | "I", "link up"); | |
1525 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1526 | "rdma_tags_available", | |
1527 | CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, | |
1528 | 0, mxge_handle_be32, | |
1529 | "I", "rdma_tags_available"); | |
1530 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1531 | "dropped_bad_crc32", | |
1532 | CTLTYPE_INT|CTLFLAG_RD, | |
1533 | &fw->dropped_bad_crc32, | |
1534 | 0, mxge_handle_be32, | |
1535 | "I", "dropped_bad_crc32"); | |
1536 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1537 | "dropped_bad_phy", | |
1538 | CTLTYPE_INT|CTLFLAG_RD, | |
1539 | &fw->dropped_bad_phy, | |
1540 | 0, mxge_handle_be32, | |
1541 | "I", "dropped_bad_phy"); | |
1542 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1543 | "dropped_link_error_or_filtered", | |
1544 | CTLTYPE_INT|CTLFLAG_RD, | |
1545 | &fw->dropped_link_error_or_filtered, | |
1546 | 0, mxge_handle_be32, | |
1547 | "I", "dropped_link_error_or_filtered"); | |
1548 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1549 | "dropped_link_overflow", | |
1550 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, | |
1551 | 0, mxge_handle_be32, | |
1552 | "I", "dropped_link_overflow"); | |
1553 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1554 | "dropped_multicast_filtered", | |
1555 | CTLTYPE_INT|CTLFLAG_RD, | |
1556 | &fw->dropped_multicast_filtered, | |
1557 | 0, mxge_handle_be32, | |
1558 | "I", "dropped_multicast_filtered"); | |
1559 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1560 | "dropped_no_big_buffer", | |
1561 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, | |
1562 | 0, mxge_handle_be32, | |
1563 | "I", "dropped_no_big_buffer"); | |
1564 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1565 | "dropped_no_small_buffer", | |
1566 | CTLTYPE_INT|CTLFLAG_RD, | |
1567 | &fw->dropped_no_small_buffer, | |
1568 | 0, mxge_handle_be32, | |
1569 | "I", "dropped_no_small_buffer"); | |
1570 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1571 | "dropped_overrun", | |
1572 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, | |
1573 | 0, mxge_handle_be32, | |
1574 | "I", "dropped_overrun"); | |
1575 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1576 | "dropped_pause", | |
1577 | CTLTYPE_INT|CTLFLAG_RD, | |
1578 | &fw->dropped_pause, | |
1579 | 0, mxge_handle_be32, | |
1580 | "I", "dropped_pause"); | |
1581 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1582 | "dropped_runt", | |
1583 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, | |
1584 | 0, mxge_handle_be32, | |
1585 | "I", "dropped_runt"); | |
1586 | ||
1587 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1588 | "dropped_unicast_filtered", | |
1589 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, | |
1590 | 0, mxge_handle_be32, | |
1591 | "I", "dropped_unicast_filtered"); | |
1592 | ||
1593 | /* verbose printing? */ | |
1594 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1595 | "verbose", | |
1596 | CTLFLAG_RW, &mxge_verbose, | |
1597 | 0, "verbose printing"); | |
1598 | ||
1599 | /* lro */ | |
1600 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1601 | "lro_cnt", | |
1602 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1603 | 0, mxge_change_lro, | |
1604 | "I", "number of lro merge queues"); | |
1605 | ||
1606 | ||
1607 | /* add counters exported for debugging from all slices */ | |
1608 | sysctl_ctx_init(&sc->slice_sysctl_ctx); | |
1609 | sc->slice_sysctl_tree = | |
1610 | SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, | |
1611 | "slice", CTLFLAG_RD, 0, ""); | |
1612 | ||
1613 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1614 | ss = &sc->ss[slice]; | |
1615 | sysctl_ctx_init(&ss->sysctl_ctx); | |
1616 | ctx = &ss->sysctl_ctx; | |
1617 | children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); | |
1618 | sprintf(slice_num, "%d", slice); | |
1619 | ss->sysctl_tree = | |
1620 | SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, | |
1621 | CTLFLAG_RD, 0, ""); | |
1622 | children = SYSCTL_CHILDREN(ss->sysctl_tree); | |
1623 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1624 | "rx_small_cnt", | |
1625 | CTLFLAG_RD, &ss->rx_small.cnt, | |
1626 | 0, "rx_small_cnt"); | |
1627 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1628 | "rx_big_cnt", | |
1629 | CTLFLAG_RD, &ss->rx_big.cnt, | |
1630 | 0, "rx_small_cnt"); | |
1631 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1632 | "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, | |
1633 | 0, "number of lro merge queues flushed"); | |
1634 | ||
1635 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1636 | "lro_queued", CTLFLAG_RD, &ss->lro_queued, | |
1637 | 0, "number of frames appended to lro merge" | |
1638 | "queues"); | |
1639 | ||
1640 | #ifndef IFNET_BUF_RING | |
1641 | /* only transmit from slice 0 for now */ | |
1642 | if (slice > 0) | |
1643 | continue; | |
1644 | #endif | |
1645 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1646 | "tx_req", | |
1647 | CTLFLAG_RD, &ss->tx.req, | |
1648 | 0, "tx_req"); | |
1649 | ||
1650 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1651 | "tx_done", | |
1652 | CTLFLAG_RD, &ss->tx.done, | |
1653 | 0, "tx_done"); | |
1654 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1655 | "tx_pkt_done", | |
1656 | CTLFLAG_RD, &ss->tx.pkt_done, | |
1657 | 0, "tx_done"); | |
1658 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1659 | "tx_stall", | |
1660 | CTLFLAG_RD, &ss->tx.stall, | |
1661 | 0, "tx_stall"); | |
1662 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1663 | "tx_wake", | |
1664 | CTLFLAG_RD, &ss->tx.wake, | |
1665 | 0, "tx_wake"); | |
1666 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1667 | "tx_defrag", | |
1668 | CTLFLAG_RD, &ss->tx.defrag, | |
1669 | 0, "tx_defrag"); | |
1670 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1671 | "tx_queue_active", | |
1672 | CTLFLAG_RD, &ss->tx.queue_active, | |
1673 | 0, "tx_queue_active"); | |
1674 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1675 | "tx_activate", | |
1676 | CTLFLAG_RD, &ss->tx.activate, | |
1677 | 0, "tx_activate"); | |
1678 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1679 | "tx_deactivate", | |
1680 | CTLFLAG_RD, &ss->tx.deactivate, | |
1681 | 0, "tx_deactivate"); | |
1682 | } | |
1683 | } | |
1684 | ||
1685 | /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy | |
1686 | backwards one at a time and handle ring wraps */ | |
1687 | ||
1688 | static inline void | |
1689 | mxge_submit_req_backwards(mxge_tx_ring_t *tx, | |
1690 | mcp_kreq_ether_send_t *src, int cnt) | |
1691 | { | |
1692 | int idx, starting_slot; | |
1693 | starting_slot = tx->req; | |
1694 | while (cnt > 1) { | |
1695 | cnt--; | |
1696 | idx = (starting_slot + cnt) & tx->mask; | |
1697 | mxge_pio_copy(&tx->lanai[idx], | |
1698 | &src[cnt], sizeof(*src)); | |
1699 | wmb(); | |
1700 | } | |
1701 | } | |
1702 | ||
1703 | /* | |
1704 | * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy | |
1705 | * at most 32 bytes at a time, so as to avoid involving the software | |
1706 | * pio handler in the nic. We re-write the first segment's flags | |
1707 | * to mark them valid only after writing the entire chain | |
1708 | */ | |
1709 | ||
1710 | static inline void | |
1711 | mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, | |
1712 | int cnt) | |
1713 | { | |
1714 | int idx, i; | |
1715 | uint32_t *src_ints; | |
1716 | volatile uint32_t *dst_ints; | |
1717 | mcp_kreq_ether_send_t *srcp; | |
1718 | volatile mcp_kreq_ether_send_t *dstp, *dst; | |
1719 | uint8_t last_flags; | |
1720 | ||
1721 | idx = tx->req & tx->mask; | |
1722 | ||
1723 | last_flags = src->flags; | |
1724 | src->flags = 0; | |
1725 | wmb(); | |
1726 | dst = dstp = &tx->lanai[idx]; | |
1727 | srcp = src; | |
1728 | ||
1729 | if ((idx + cnt) < tx->mask) { | |
1730 | for (i = 0; i < (cnt - 1); i += 2) { | |
1731 | mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); | |
1732 | wmb(); /* force write every 32 bytes */ | |
1733 | srcp += 2; | |
1734 | dstp += 2; | |
1735 | } | |
1736 | } else { | |
1737 | /* submit all but the first request, and ensure | |
1738 | that it is submitted below */ | |
1739 | mxge_submit_req_backwards(tx, src, cnt); | |
1740 | i = 0; | |
1741 | } | |
1742 | if (i < cnt) { | |
1743 | /* submit the first request */ | |
1744 | mxge_pio_copy(dstp, srcp, sizeof(*src)); | |
1745 | wmb(); /* barrier before setting valid flag */ | |
1746 | } | |
1747 | ||
1748 | /* re-write the last 32-bits with the valid flags */ | |
1749 | src->flags = last_flags; | |
1750 | src_ints = (uint32_t *)src; | |
1751 | src_ints+=3; | |
1752 | dst_ints = (volatile uint32_t *)dst; | |
1753 | dst_ints+=3; | |
1754 | *dst_ints = *src_ints; | |
1755 | tx->req += cnt; | |
1756 | wmb(); | |
1757 | } | |
1758 | ||
1759 | #if IFCAP_TSO4 | |
1760 | ||
1761 | static void | |
1762 | mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, | |
1763 | int busdma_seg_cnt, int ip_off) | |
1764 | { | |
1765 | mxge_tx_ring_t *tx; | |
1766 | mcp_kreq_ether_send_t *req; | |
1767 | bus_dma_segment_t *seg; | |
1768 | struct ip *ip; | |
1769 | struct tcphdr *tcp; | |
1770 | uint32_t low, high_swapped; | |
1771 | int len, seglen, cum_len, cum_len_next; | |
1772 | int next_is_first, chop, cnt, rdma_count, small; | |
1773 | uint16_t pseudo_hdr_offset, cksum_offset, mss; | |
1774 | uint8_t flags, flags_next; | |
1775 | static int once; | |
1776 | ||
1777 | mss = m->m_pkthdr.tso_segsz; | |
1778 | ||
1779 | /* negative cum_len signifies to the | |
1780 | * send loop that we are still in the | |
1781 | * header portion of the TSO packet. | |
1782 | */ | |
1783 | ||
1784 | /* ensure we have the ethernet, IP and TCP | |
1785 | header together in the first mbuf, copy | |
1786 | it to a scratch buffer if not */ | |
1787 | if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { | |
1788 | m_copydata(m, 0, ip_off + sizeof (*ip), | |
1789 | ss->scratch); | |
1790 | ip = (struct ip *)(ss->scratch + ip_off); | |
1791 | } else { | |
1792 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
1793 | } | |
1794 | if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) | |
1795 | + sizeof (*tcp))) { | |
1796 | m_copydata(m, 0, ip_off + (ip->ip_hl << 2) | |
1797 | + sizeof (*tcp), ss->scratch); | |
1798 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
1799 | } | |
1800 | ||
1801 | tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); | |
1802 | cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); | |
1803 | ||
1804 | /* TSO implies checksum offload on this hardware */ | |
1805 | cksum_offset = ip_off + (ip->ip_hl << 2); | |
1806 | flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; | |
1807 | ||
1808 | ||
1809 | /* for TSO, pseudo_hdr_offset holds mss. | |
1810 | * The firmware figures out where to put | |
1811 | * the checksum by parsing the header. */ | |
1812 | pseudo_hdr_offset = htobe16(mss); | |
1813 | ||
1814 | tx = &ss->tx; | |
1815 | req = tx->req_list; | |
1816 | seg = tx->seg_list; | |
1817 | cnt = 0; | |
1818 | rdma_count = 0; | |
1819 | /* "rdma_count" is the number of RDMAs belonging to the | |
1820 | * current packet BEFORE the current send request. For | |
1821 | * non-TSO packets, this is equal to "count". | |
1822 | * For TSO packets, rdma_count needs to be reset | |
1823 | * to 0 after a segment cut. | |
1824 | * | |
1825 | * The rdma_count field of the send request is | |
1826 | * the number of RDMAs of the packet starting at | |
1827 | * that request. For TSO send requests with one ore more cuts | |
1828 | * in the middle, this is the number of RDMAs starting | |
1829 | * after the last cut in the request. All previous | |
1830 | * segments before the last cut implicitly have 1 RDMA. | |
1831 | * | |
1832 | * Since the number of RDMAs is not known beforehand, | |
1833 | * it must be filled-in retroactively - after each | |
1834 | * segmentation cut or at the end of the entire packet. | |
1835 | */ | |
1836 | ||
1837 | while (busdma_seg_cnt) { | |
1838 | /* Break the busdma segment up into pieces*/ | |
1839 | low = MXGE_LOWPART_TO_U32(seg->ds_addr); | |
1840 | high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
1841 | len = seg->ds_len; | |
1842 | ||
1843 | while (len) { | |
1844 | flags_next = flags & ~MXGEFW_FLAGS_FIRST; | |
1845 | seglen = len; | |
1846 | cum_len_next = cum_len + seglen; | |
1847 | (req-rdma_count)->rdma_count = rdma_count + 1; | |
1848 | if (__predict_true(cum_len >= 0)) { | |
1849 | /* payload */ | |
1850 | chop = (cum_len_next > mss); | |
1851 | cum_len_next = cum_len_next % mss; | |
1852 | next_is_first = (cum_len_next == 0); | |
1853 | flags |= chop * MXGEFW_FLAGS_TSO_CHOP; | |
1854 | flags_next |= next_is_first * | |
1855 | MXGEFW_FLAGS_FIRST; | |
1856 | rdma_count |= -(chop | next_is_first); | |
1857 | rdma_count += chop & !next_is_first; | |
1858 | } else if (cum_len_next >= 0) { | |
1859 | /* header ends */ | |
1860 | rdma_count = -1; | |
1861 | cum_len_next = 0; | |
1862 | seglen = -cum_len; | |
1863 | small = (mss <= MXGEFW_SEND_SMALL_SIZE); | |
1864 | flags_next = MXGEFW_FLAGS_TSO_PLD | | |
1865 | MXGEFW_FLAGS_FIRST | | |
1866 | (small * MXGEFW_FLAGS_SMALL); | |
1867 | } | |
1868 | ||
1869 | req->addr_high = high_swapped; | |
1870 | req->addr_low = htobe32(low); | |
1871 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
1872 | req->pad = 0; | |
1873 | req->rdma_count = 1; | |
1874 | req->length = htobe16(seglen); | |
1875 | req->cksum_offset = cksum_offset; | |
1876 | req->flags = flags | ((cum_len & 1) * | |
1877 | MXGEFW_FLAGS_ALIGN_ODD); | |
1878 | low += seglen; | |
1879 | len -= seglen; | |
1880 | cum_len = cum_len_next; | |
1881 | flags = flags_next; | |
1882 | req++; | |
1883 | cnt++; | |
1884 | rdma_count++; | |
1885 | if (__predict_false(cksum_offset > seglen)) | |
1886 | cksum_offset -= seglen; | |
1887 | else | |
1888 | cksum_offset = 0; | |
1889 | if (__predict_false(cnt > tx->max_desc)) | |
1890 | goto drop; | |
1891 | } | |
1892 | busdma_seg_cnt--; | |
1893 | seg++; | |
1894 | } | |
1895 | (req-rdma_count)->rdma_count = rdma_count; | |
1896 | ||
1897 | do { | |
1898 | req--; | |
1899 | req->flags |= MXGEFW_FLAGS_TSO_LAST; | |
1900 | } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); | |
1901 | ||
1902 | tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; | |
1903 | mxge_submit_req(tx, tx->req_list, cnt); | |
1904 | #ifdef IFNET_BUF_RING | |
1905 | if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { | |
1906 | /* tell the NIC to start polling this slice */ | |
1907 | *tx->send_go = 1; | |
1908 | tx->queue_active = 1; | |
1909 | tx->activate++; | |
1910 | wmb(); | |
1911 | } | |
1912 | #endif | |
1913 | return; | |
1914 | ||
1915 | drop: | |
1916 | bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); | |
1917 | m_freem(m); | |
1918 | ss->oerrors++; | |
1919 | if (!once) { | |
1920 | printf("tx->max_desc exceeded via TSO!\n"); | |
1921 | printf("mss = %d, %ld, %d!\n", mss, | |
1922 | (long)seg - (long)tx->seg_list, tx->max_desc); | |
1923 | once = 1; | |
1924 | } | |
1925 | return; | |
1926 | ||
1927 | } | |
1928 | ||
1929 | #endif /* IFCAP_TSO4 */ | |
1930 | ||
1931 | #ifdef MXGE_NEW_VLAN_API | |
1932 | /* | |
1933 | * We reproduce the software vlan tag insertion from | |
1934 | * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" | |
1935 | * vlan tag insertion. We need to advertise this in order to have the | |
1936 | * vlan interface respect our csum offload flags. | |
1937 | */ | |
1938 | static struct mbuf * | |
1939 | mxge_vlan_tag_insert(struct mbuf *m) | |
1940 | { | |
1941 | struct ether_vlan_header *evl; | |
1942 | ||
1943 | M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); | |
1944 | if (__predict_false(m == NULL)) | |
1945 | return NULL; | |
1946 | if (m->m_len < sizeof(*evl)) { | |
1947 | m = m_pullup(m, sizeof(*evl)); | |
1948 | if (__predict_false(m == NULL)) | |
1949 | return NULL; | |
1950 | } | |
1951 | /* | |
1952 | * Transform the Ethernet header into an Ethernet header | |
1953 | * with 802.1Q encapsulation. | |
1954 | */ | |
1955 | evl = mtod(m, struct ether_vlan_header *); | |
1956 | bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, | |
1957 | (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); | |
1958 | evl->evl_encap_proto = htons(ETHERTYPE_VLAN); | |
1959 | evl->evl_tag = htons(m->m_pkthdr.ether_vtag); | |
1960 | m->m_flags &= ~M_VLANTAG; | |
1961 | return m; | |
1962 | } | |
1963 | #endif /* MXGE_NEW_VLAN_API */ | |
1964 | ||
1965 | static void | |
1966 | mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) | |
1967 | { | |
1968 | mxge_softc_t *sc; | |
1969 | mcp_kreq_ether_send_t *req; | |
1970 | bus_dma_segment_t *seg; | |
1971 | struct mbuf *m_tmp; | |
1972 | struct ifnet *ifp; | |
1973 | mxge_tx_ring_t *tx; | |
1974 | struct ip *ip; | |
1975 | int cnt, cum_len, err, i, idx, odd_flag, ip_off; | |
1976 | uint16_t pseudo_hdr_offset; | |
1977 | uint8_t flags, cksum_offset; | |
1978 | ||
1979 | ||
1980 | sc = ss->sc; | |
1981 | ifp = sc->ifp; | |
1982 | tx = &ss->tx; | |
1983 | ||
1984 | ip_off = sizeof (struct ether_header); | |
1985 | #ifdef MXGE_NEW_VLAN_API | |
1986 | if (m->m_flags & M_VLANTAG) { | |
1987 | m = mxge_vlan_tag_insert(m); | |
1988 | if (__predict_false(m == NULL)) | |
1989 | goto drop; | |
1990 | ip_off += ETHER_VLAN_ENCAP_LEN; | |
1991 | } | |
1992 | #endif | |
1993 | /* (try to) map the frame for DMA */ | |
1994 | idx = tx->req & tx->mask; | |
1995 | err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, | |
1996 | m, tx->seg_list, &cnt, | |
1997 | BUS_DMA_NOWAIT); | |
1998 | if (__predict_false(err == EFBIG)) { | |
1999 | /* Too many segments in the chain. Try | |
2000 | to defrag */ | |
2001 | m_tmp = m_defrag(m, M_NOWAIT); | |
2002 | if (m_tmp == NULL) { | |
2003 | goto drop; | |
2004 | } | |
2005 | ss->tx.defrag++; | |
2006 | m = m_tmp; | |
2007 | err = bus_dmamap_load_mbuf_sg(tx->dmat, | |
2008 | tx->info[idx].map, | |
2009 | m, tx->seg_list, &cnt, | |
2010 | BUS_DMA_NOWAIT); | |
2011 | } | |
2012 | if (__predict_false(err != 0)) { | |
2013 | device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" | |
2014 | " packet len = %d\n", err, m->m_pkthdr.len); | |
2015 | goto drop; | |
2016 | } | |
2017 | bus_dmamap_sync(tx->dmat, tx->info[idx].map, | |
2018 | BUS_DMASYNC_PREWRITE); | |
2019 | tx->info[idx].m = m; | |
2020 | ||
2021 | #if IFCAP_TSO4 | |
2022 | /* TSO is different enough, we handle it in another routine */ | |
2023 | if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { | |
2024 | mxge_encap_tso(ss, m, cnt, ip_off); | |
2025 | return; | |
2026 | } | |
2027 | #endif | |
2028 | ||
2029 | req = tx->req_list; | |
2030 | cksum_offset = 0; | |
2031 | pseudo_hdr_offset = 0; | |
2032 | flags = MXGEFW_FLAGS_NO_TSO; | |
2033 | ||
2034 | /* checksum offloading? */ | |
2035 | if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { | |
2036 | /* ensure ip header is in first mbuf, copy | |
2037 | it to a scratch buffer if not */ | |
2038 | if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { | |
2039 | m_copydata(m, 0, ip_off + sizeof (*ip), | |
2040 | ss->scratch); | |
2041 | ip = (struct ip *)(ss->scratch + ip_off); | |
2042 | } else { | |
2043 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
2044 | } | |
2045 | cksum_offset = ip_off + (ip->ip_hl << 2); | |
2046 | pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; | |
2047 | pseudo_hdr_offset = htobe16(pseudo_hdr_offset); | |
2048 | req->cksum_offset = cksum_offset; | |
2049 | flags |= MXGEFW_FLAGS_CKSUM; | |
2050 | odd_flag = MXGEFW_FLAGS_ALIGN_ODD; | |
2051 | } else { | |
2052 | odd_flag = 0; | |
2053 | } | |
2054 | if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) | |
2055 | flags |= MXGEFW_FLAGS_SMALL; | |
2056 | ||
2057 | /* convert segments into a request list */ | |
2058 | cum_len = 0; | |
2059 | seg = tx->seg_list; | |
2060 | req->flags = MXGEFW_FLAGS_FIRST; | |
2061 | for (i = 0; i < cnt; i++) { | |
2062 | req->addr_low = | |
2063 | htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); | |
2064 | req->addr_high = | |
2065 | htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
2066 | req->length = htobe16(seg->ds_len); | |
2067 | req->cksum_offset = cksum_offset; | |
2068 | if (cksum_offset > seg->ds_len) | |
2069 | cksum_offset -= seg->ds_len; | |
2070 | else | |
2071 | cksum_offset = 0; | |
2072 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
2073 | req->pad = 0; /* complete solid 16-byte block */ | |
2074 | req->rdma_count = 1; | |
2075 | req->flags |= flags | ((cum_len & 1) * odd_flag); | |
2076 | cum_len += seg->ds_len; | |
2077 | seg++; | |
2078 | req++; | |
2079 | req->flags = 0; | |
2080 | } | |
2081 | req--; | |
2082 | /* pad runts to 60 bytes */ | |
2083 | if (cum_len < 60) { | |
2084 | req++; | |
2085 | req->addr_low = | |
2086 | htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); | |
2087 | req->addr_high = | |
2088 | htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); | |
2089 | req->length = htobe16(60 - cum_len); | |
2090 | req->cksum_offset = 0; | |
2091 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
2092 | req->pad = 0; /* complete solid 16-byte block */ | |
2093 | req->rdma_count = 1; | |
2094 | req->flags |= flags | ((cum_len & 1) * odd_flag); | |
2095 | cnt++; | |
2096 | } | |
2097 | ||
2098 | tx->req_list[0].rdma_count = cnt; | |
2099 | #if 0 | |
2100 | /* print what the firmware will see */ | |
2101 | for (i = 0; i < cnt; i++) { | |
2102 | printf("%d: addr: 0x%x 0x%x len:%d pso%d," | |
2103 | "cso:%d, flags:0x%x, rdma:%d\n", | |
2104 | i, (int)ntohl(tx->req_list[i].addr_high), | |
2105 | (int)ntohl(tx->req_list[i].addr_low), | |
2106 | (int)ntohs(tx->req_list[i].length), | |
2107 | (int)ntohs(tx->req_list[i].pseudo_hdr_offset), | |
2108 | tx->req_list[i].cksum_offset, tx->req_list[i].flags, | |
2109 | tx->req_list[i].rdma_count); | |
2110 | } | |
2111 | printf("--------------\n"); | |
2112 | #endif | |
2113 | tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; | |
2114 | mxge_submit_req(tx, tx->req_list, cnt); | |
2115 | #ifdef IFNET_BUF_RING | |
2116 | if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { | |
2117 | /* tell the NIC to start polling this slice */ | |
2118 | *tx->send_go = 1; | |
2119 | tx->queue_active = 1; | |
2120 | tx->activate++; | |
2121 | wmb(); | |
2122 | } | |
2123 | #endif | |
2124 | return; | |
2125 | ||
2126 | drop: | |
2127 | m_freem(m); | |
2128 | ss->oerrors++; | |
2129 | return; | |
2130 | } | |
2131 | ||
2132 | #ifdef IFNET_BUF_RING | |
2133 | static void | |
2134 | mxge_qflush(struct ifnet *ifp) | |
2135 | { | |
2136 | mxge_softc_t *sc = ifp->if_softc; | |
2137 | mxge_tx_ring_t *tx; | |
2138 | struct mbuf *m; | |
2139 | int slice; | |
2140 | ||
2141 | for (slice = 0; slice < sc->num_slices; slice++) { | |
2142 | tx = &sc->ss[slice].tx; | |
2143 | mtx_lock(&tx->mtx); | |
2144 | while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) | |
2145 | m_freem(m); | |
2146 | mtx_unlock(&tx->mtx); | |
2147 | } | |
2148 | if_qflush(ifp); | |
2149 | } | |
2150 | ||
2151 | static inline void | |
2152 | mxge_start_locked(struct mxge_slice_state *ss) | |
2153 | { | |
2154 | mxge_softc_t *sc; | |
2155 | struct mbuf *m; | |
2156 | struct ifnet *ifp; | |
2157 | mxge_tx_ring_t *tx; | |
2158 | ||
2159 | sc = ss->sc; | |
2160 | ifp = sc->ifp; | |
2161 | tx = &ss->tx; | |
2162 | ||
2163 | while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { | |
2164 | m = drbr_dequeue(ifp, tx->br); | |
2165 | if (m == NULL) { | |
2166 | return; | |
2167 | } | |
2168 | /* let BPF see it */ | |
2169 | BPF_MTAP(ifp, m); | |
2170 | ||
2171 | /* give it to the nic */ | |
2172 | mxge_encap(ss, m); | |
2173 | } | |
2174 | /* ran out of transmit slots */ | |
2175 | if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) | |
2176 | && (!drbr_empty(ifp, tx->br))) { | |
2177 | ss->if_drv_flags |= IFF_DRV_OACTIVE; | |
2178 | tx->stall++; | |
2179 | } | |
2180 | } | |
2181 | ||
2182 | static int | |
2183 | mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) | |
2184 | { | |
2185 | mxge_softc_t *sc; | |
2186 | struct ifnet *ifp; | |
2187 | mxge_tx_ring_t *tx; | |
2188 | int err; | |
2189 | ||
2190 | sc = ss->sc; | |
2191 | ifp = sc->ifp; | |
2192 | tx = &ss->tx; | |
2193 | ||
2194 | if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != | |
2195 | IFF_DRV_RUNNING) { | |
2196 | err = drbr_enqueue(ifp, tx->br, m); | |
2197 | return (err); | |
2198 | } | |
2199 | ||
2200 | if (drbr_empty(ifp, tx->br) && | |
2201 | ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { | |
2202 | /* let BPF see it */ | |
2203 | BPF_MTAP(ifp, m); | |
2204 | /* give it to the nic */ | |
2205 | mxge_encap(ss, m); | |
2206 | } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { | |
2207 | return (err); | |
2208 | } | |
2209 | if (!drbr_empty(ifp, tx->br)) | |
2210 | mxge_start_locked(ss); | |
2211 | return (0); | |
2212 | } | |
2213 | ||
2214 | static int | |
2215 | mxge_transmit(struct ifnet *ifp, struct mbuf *m) | |
2216 | { | |
2217 | mxge_softc_t *sc = ifp->if_softc; | |
2218 | struct mxge_slice_state *ss; | |
2219 | mxge_tx_ring_t *tx; | |
2220 | int err = 0; | |
2221 | int slice; | |
2222 | ||
2223 | slice = m->m_pkthdr.flowid; | |
2224 | slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ | |
2225 | ||
2226 | ss = &sc->ss[slice]; | |
2227 | tx = &ss->tx; | |
2228 | ||
2229 | if (mtx_trylock(&tx->mtx)) { | |
2230 | err = mxge_transmit_locked(ss, m); | |
2231 | mtx_unlock(&tx->mtx); | |
2232 | } else { | |
2233 | err = drbr_enqueue(ifp, tx->br, m); | |
2234 | } | |
2235 | ||
2236 | return (err); | |
2237 | } | |
2238 | ||
2239 | #else | |
2240 | ||
2241 | static inline void | |
2242 | mxge_start_locked(struct mxge_slice_state *ss) | |
2243 | { | |
2244 | mxge_softc_t *sc; | |
2245 | struct mbuf *m; | |
2246 | struct ifnet *ifp; | |
2247 | mxge_tx_ring_t *tx; | |
2248 | ||
2249 | sc = ss->sc; | |
2250 | ifp = sc->ifp; | |
2251 | tx = &ss->tx; | |
2252 | while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { | |
2253 | IFQ_DRV_DEQUEUE(&ifp->if_snd, m); | |
2254 | if (m == NULL) { | |
2255 | return; | |
2256 | } | |
2257 | /* let BPF see it */ | |
2258 | BPF_MTAP(ifp, m); | |
2259 | ||
2260 | /* give it to the nic */ | |
2261 | mxge_encap(ss, m); | |
2262 | } | |
2263 | /* ran out of transmit slots */ | |
2264 | if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { | |
2265 | sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; | |
2266 | tx->stall++; | |
2267 | } | |
2268 | } | |
2269 | #endif | |
2270 | static void | |
2271 | mxge_start(struct ifnet *ifp) | |
2272 | { | |
2273 | mxge_softc_t *sc = ifp->if_softc; | |
2274 | struct mxge_slice_state *ss; | |
2275 | ||
2276 | /* only use the first slice for now */ | |
2277 | ss = &sc->ss[0]; | |
2278 | mtx_lock(&ss->tx.mtx); | |
2279 | mxge_start_locked(ss); | |
2280 | mtx_unlock(&ss->tx.mtx); | |
2281 | } | |
2282 | ||
2283 | /* | |
2284 | * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy | |
2285 | * at most 32 bytes at a time, so as to avoid involving the software | |
2286 | * pio handler in the nic. We re-write the first segment's low | |
2287 | * DMA address to mark it valid only after we write the entire chunk | |
2288 | * in a burst | |
2289 | */ | |
2290 | static inline void | |
2291 | mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, | |
2292 | mcp_kreq_ether_recv_t *src) | |
2293 | { | |
2294 | uint32_t low; | |
2295 | ||
2296 | low = src->addr_low; | |
2297 | src->addr_low = 0xffffffff; | |
2298 | mxge_pio_copy(dst, src, 4 * sizeof (*src)); | |
2299 | wmb(); | |
2300 | mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); | |
2301 | wmb(); | |
2302 | src->addr_low = low; | |
2303 | dst->addr_low = low; | |
2304 | wmb(); | |
2305 | } | |
2306 | ||
2307 | static int | |
2308 | mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) | |
2309 | { | |
2310 | bus_dma_segment_t seg; | |
2311 | struct mbuf *m; | |
2312 | mxge_rx_ring_t *rx = &ss->rx_small; | |
2313 | int cnt, err; | |
2314 | ||
2315 | m = m_gethdr(M_DONTWAIT, MT_DATA); | |
2316 | if (m == NULL) { | |
2317 | rx->alloc_fail++; | |
2318 | err = ENOBUFS; | |
2319 | goto done; | |
2320 | } | |
2321 | m->m_len = MHLEN; | |
2322 | err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, | |
2323 | &seg, &cnt, BUS_DMA_NOWAIT); | |
2324 | if (err != 0) { | |
2325 | m_free(m); | |
2326 | goto done; | |
2327 | } | |
2328 | rx->info[idx].m = m; | |
2329 | rx->shadow[idx].addr_low = | |
2330 | htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); | |
2331 | rx->shadow[idx].addr_high = | |
2332 | htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); | |
2333 | ||
2334 | done: | |
2335 | if ((idx & 7) == 7) | |
2336 | mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); | |
2337 | return err; | |
2338 | } | |
2339 | ||
2340 | static int | |
2341 | mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) | |
2342 | { | |
2343 | bus_dma_segment_t seg[3]; | |
2344 | struct mbuf *m; | |
2345 | mxge_rx_ring_t *rx = &ss->rx_big; | |
2346 | int cnt, err, i; | |
2347 | ||
2348 | if (rx->cl_size == MCLBYTES) | |
2349 | m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); | |
2350 | else | |
2351 | m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); | |
2352 | if (m == NULL) { | |
2353 | rx->alloc_fail++; | |
2354 | err = ENOBUFS; | |
2355 | goto done; | |
2356 | } | |
2357 | m->m_len = rx->mlen; | |
2358 | err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, | |
2359 | seg, &cnt, BUS_DMA_NOWAIT); | |
2360 | if (err != 0) { | |
2361 | m_free(m); | |
2362 | goto done; | |
2363 | } | |
2364 | rx->info[idx].m = m; | |
2365 | rx->shadow[idx].addr_low = | |
2366 | htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); | |
2367 | rx->shadow[idx].addr_high = | |
2368 | htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
2369 | ||
2370 | #if MXGE_VIRT_JUMBOS | |
2371 | for (i = 1; i < cnt; i++) { | |
2372 | rx->shadow[idx + i].addr_low = | |
2373 | htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); | |
2374 | rx->shadow[idx + i].addr_high = | |
2375 | htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); | |
2376 | } | |
2377 | #endif | |
2378 | ||
2379 | done: | |
2380 | for (i = 0; i < rx->nbufs; i++) { | |
2381 | if ((idx & 7) == 7) { | |
2382 | mxge_submit_8rx(&rx->lanai[idx - 7], | |
2383 | &rx->shadow[idx - 7]); | |
2384 | } | |
2385 | idx++; | |
2386 | } | |
2387 | return err; | |
2388 | } | |
2389 | ||
2390 | /* | |
2391 | * Myri10GE hardware checksums are not valid if the sender | |
2392 | * padded the frame with non-zero padding. This is because | |
2393 | * the firmware just does a simple 16-bit 1s complement | |
2394 | * checksum across the entire frame, excluding the first 14 | |
2395 | * bytes. It is best to simply to check the checksum and | |
2396 | * tell the stack about it only if the checksum is good | |
2397 | */ | |
2398 | ||
2399 | static inline uint16_t | |
2400 | mxge_rx_csum(struct mbuf *m, int csum) | |
2401 | { | |
2402 | struct ether_header *eh; | |
2403 | struct ip *ip; | |
2404 | uint16_t c; | |
2405 | ||
2406 | eh = mtod(m, struct ether_header *); | |
2407 | ||
2408 | /* only deal with IPv4 TCP & UDP for now */ | |
2409 | if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) | |
2410 | return 1; | |
2411 | ip = (struct ip *)(eh + 1); | |
2412 | if (__predict_false(ip->ip_p != IPPROTO_TCP && | |
2413 | ip->ip_p != IPPROTO_UDP)) | |
2414 | return 1; | |
2415 | #ifdef INET | |
2416 | c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
2417 | htonl(ntohs(csum) + ntohs(ip->ip_len) + | |
2418 | - (ip->ip_hl << 2) + ip->ip_p)); | |
2419 | #else | |
2420 | c = 1; | |
2421 | #endif | |
2422 | c ^= 0xffff; | |
2423 | return (c); | |
2424 | } | |
2425 | ||
2426 | static void | |
2427 | mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) | |
2428 | { | |
2429 | struct ether_vlan_header *evl; | |
2430 | struct ether_header *eh; | |
2431 | uint32_t partial; | |
2432 | ||
2433 | evl = mtod(m, struct ether_vlan_header *); | |
2434 | eh = mtod(m, struct ether_header *); | |
2435 | ||
2436 | /* | |
2437 | * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes | |
2438 | * after what the firmware thought was the end of the ethernet | |
2439 | * header. | |
2440 | */ | |
2441 | ||
2442 | /* put checksum into host byte order */ | |
2443 | *csum = ntohs(*csum); | |
2444 | partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); | |
2445 | (*csum) += ~partial; | |
2446 | (*csum) += ((*csum) < ~partial); | |
2447 | (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); | |
2448 | (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); | |
2449 | ||
2450 | /* restore checksum to network byte order; | |
2451 | later consumers expect this */ | |
2452 | *csum = htons(*csum); | |
2453 | ||
2454 | /* save the tag */ | |
2455 | #ifdef MXGE_NEW_VLAN_API | |
2456 | m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); | |
2457 | #else | |
2458 | { | |
2459 | struct m_tag *mtag; | |
2460 | mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), | |
2461 | M_NOWAIT); | |
2462 | if (mtag == NULL) | |
2463 | return; | |
2464 | VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); | |
2465 | m_tag_prepend(m, mtag); | |
2466 | } | |
2467 | ||
2468 | #endif | |
2469 | m->m_flags |= M_VLANTAG; | |
2470 | ||
2471 | /* | |
2472 | * Remove the 802.1q header by copying the Ethernet | |
2473 | * addresses over it and adjusting the beginning of | |
2474 | * the data in the mbuf. The encapsulated Ethernet | |
2475 | * type field is already in place. | |
2476 | */ | |
2477 | bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, | |
2478 | ETHER_HDR_LEN - ETHER_TYPE_LEN); | |
2479 | m_adj(m, ETHER_VLAN_ENCAP_LEN); | |
2480 | } | |
2481 | ||
2482 | ||
2483 | static inline void | |
2484 | mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) | |
2485 | { | |
2486 | mxge_softc_t *sc; | |
2487 | struct ifnet *ifp; | |
2488 | struct mbuf *m; | |
2489 | struct ether_header *eh; | |
2490 | mxge_rx_ring_t *rx; | |
2491 | bus_dmamap_t old_map; | |
2492 | int idx; | |
2493 | uint16_t tcpudp_csum; | |
2494 | ||
2495 | sc = ss->sc; | |
2496 | ifp = sc->ifp; | |
2497 | rx = &ss->rx_big; | |
2498 | idx = rx->cnt & rx->mask; | |
2499 | rx->cnt += rx->nbufs; | |
2500 | /* save a pointer to the received mbuf */ | |
2501 | m = rx->info[idx].m; | |
2502 | /* try to replace the received mbuf */ | |
2503 | if (mxge_get_buf_big(ss, rx->extra_map, idx)) { | |
2504 | /* drop the frame -- the old mbuf is re-cycled */ | |
2505 | ifp->if_ierrors++; | |
2506 | return; | |
2507 | } | |
2508 | ||
2509 | /* unmap the received buffer */ | |
2510 | old_map = rx->info[idx].map; | |
2511 | bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); | |
2512 | bus_dmamap_unload(rx->dmat, old_map); | |
2513 | ||
2514 | /* swap the bus_dmamap_t's */ | |
2515 | rx->info[idx].map = rx->extra_map; | |
2516 | rx->extra_map = old_map; | |
2517 | ||
2518 | /* mcp implicitly skips 1st 2 bytes so that packet is properly | |
2519 | * aligned */ | |
2520 | m->m_data += MXGEFW_PAD; | |
2521 | ||
2522 | m->m_pkthdr.rcvif = ifp; | |
2523 | m->m_len = m->m_pkthdr.len = len; | |
2524 | ss->ipackets++; | |
2525 | eh = mtod(m, struct ether_header *); | |
2526 | if (eh->ether_type == htons(ETHERTYPE_VLAN)) { | |
2527 | mxge_vlan_tag_remove(m, &csum); | |
2528 | } | |
2529 | /* if the checksum is valid, mark it in the mbuf header */ | |
2530 | if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { | |
2531 | if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) | |
2532 | return; | |
2533 | /* otherwise, it was a UDP frame, or a TCP frame which | |
2534 | we could not do LRO on. Tell the stack that the | |
2535 | checksum is good */ | |
2536 | m->m_pkthdr.csum_data = 0xffff; | |
2537 | m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; | |
2538 | } | |
2539 | /* flowid only valid if RSS hashing is enabled */ | |
2540 | if (sc->num_slices > 1) { | |
2541 | m->m_pkthdr.flowid = (ss - sc->ss); | |
2542 | m->m_flags |= M_FLOWID; | |
2543 | } | |
2544 | /* pass the frame up the stack */ | |
2545 | (*ifp->if_input)(ifp, m); | |
2546 | } | |
2547 | ||
2548 | static inline void | |
2549 | mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) | |
2550 | { | |
2551 | mxge_softc_t *sc; | |
2552 | struct ifnet *ifp; | |
2553 | struct ether_header *eh; | |
2554 | struct mbuf *m; | |
2555 | mxge_rx_ring_t *rx; | |
2556 | bus_dmamap_t old_map; | |
2557 | int idx; | |
2558 | uint16_t tcpudp_csum; | |
2559 | ||
2560 | sc = ss->sc; | |
2561 | ifp = sc->ifp; | |
2562 | rx = &ss->rx_small; | |
2563 | idx = rx->cnt & rx->mask; | |
2564 | rx->cnt++; | |
2565 | /* save a pointer to the received mbuf */ | |
2566 | m = rx->info[idx].m; | |
2567 | /* try to replace the received mbuf */ | |
2568 | if (mxge_get_buf_small(ss, rx->extra_map, idx)) { | |
2569 | /* drop the frame -- the old mbuf is re-cycled */ | |
2570 | ifp->if_ierrors++; | |
2571 | return; | |
2572 | } | |
2573 | ||
2574 | /* unmap the received buffer */ | |
2575 | old_map = rx->info[idx].map; | |
2576 | bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); | |
2577 | bus_dmamap_unload(rx->dmat, old_map); | |
2578 | ||
2579 | /* swap the bus_dmamap_t's */ | |
2580 | rx->info[idx].map = rx->extra_map; | |
2581 | rx->extra_map = old_map; | |
2582 | ||
2583 | /* mcp implicitly skips 1st 2 bytes so that packet is properly | |
2584 | * aligned */ | |
2585 | m->m_data += MXGEFW_PAD; | |
2586 | ||
2587 | m->m_pkthdr.rcvif = ifp; | |
2588 | m->m_len = m->m_pkthdr.len = len; | |
2589 | ss->ipackets++; | |
2590 | eh = mtod(m, struct ether_header *); | |
2591 | if (eh->ether_type == htons(ETHERTYPE_VLAN)) { | |
2592 | mxge_vlan_tag_remove(m, &csum); | |
2593 | } | |
2594 | /* if the checksum is valid, mark it in the mbuf header */ | |
2595 | if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { | |
2596 | if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) | |
2597 | return; | |
2598 | /* otherwise, it was a UDP frame, or a TCP frame which | |
2599 | we could not do LRO on. Tell the stack that the | |
2600 | checksum is good */ | |
2601 | m->m_pkthdr.csum_data = 0xffff; | |
2602 | m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; | |
2603 | } | |
2604 | /* flowid only valid if RSS hashing is enabled */ | |
2605 | if (sc->num_slices > 1) { | |
2606 | m->m_pkthdr.flowid = (ss - sc->ss); | |
2607 | m->m_flags |= M_FLOWID; | |
2608 | } | |
2609 | /* pass the frame up the stack */ | |
2610 | (*ifp->if_input)(ifp, m); | |
2611 | } | |
2612 | ||
2613 | static inline void | |
2614 | mxge_clean_rx_done(struct mxge_slice_state *ss) | |
2615 | { | |
2616 | mxge_rx_done_t *rx_done = &ss->rx_done; | |
2617 | int limit = 0; | |
2618 | uint16_t length; | |
2619 | uint16_t checksum; | |
2620 | ||
2621 | ||
2622 | while (rx_done->entry[rx_done->idx].length != 0) { | |
2623 | length = ntohs(rx_done->entry[rx_done->idx].length); | |
2624 | rx_done->entry[rx_done->idx].length = 0; | |
2625 | checksum = rx_done->entry[rx_done->idx].checksum; | |
2626 | if (length <= (MHLEN - MXGEFW_PAD)) | |
2627 | mxge_rx_done_small(ss, length, checksum); | |
2628 | else | |
2629 | mxge_rx_done_big(ss, length, checksum); | |
2630 | rx_done->cnt++; | |
2631 | rx_done->idx = rx_done->cnt & rx_done->mask; | |
2632 | ||
2633 | /* limit potential for livelock */ | |
2634 | if (__predict_false(++limit > rx_done->mask / 2)) | |
2635 | break; | |
2636 | } | |
2637 | #ifdef INET | |
2638 | while (!SLIST_EMPTY(&ss->lro_active)) { | |
2639 | struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); | |
2640 | SLIST_REMOVE_HEAD(&ss->lro_active, next); | |
2641 | mxge_lro_flush(ss, lro); | |
2642 | } | |
2643 | #endif | |
2644 | } | |
2645 | ||
2646 | ||
2647 | static inline void | |
2648 | mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) | |
2649 | { | |
2650 | struct ifnet *ifp; | |
2651 | mxge_tx_ring_t *tx; | |
2652 | struct mbuf *m; | |
2653 | bus_dmamap_t map; | |
2654 | int idx; | |
2655 | int *flags; | |
2656 | ||
2657 | tx = &ss->tx; | |
2658 | ifp = ss->sc->ifp; | |
2659 | while (tx->pkt_done != mcp_idx) { | |
2660 | idx = tx->done & tx->mask; | |
2661 | tx->done++; | |
2662 | m = tx->info[idx].m; | |
2663 | /* mbuf and DMA map only attached to the first | |
2664 | segment per-mbuf */ | |
2665 | if (m != NULL) { | |
2666 | ss->obytes += m->m_pkthdr.len; | |
2667 | if (m->m_flags & M_MCAST) | |
2668 | ss->omcasts++; | |
2669 | ss->opackets++; | |
2670 | tx->info[idx].m = NULL; | |
2671 | map = tx->info[idx].map; | |
2672 | bus_dmamap_unload(tx->dmat, map); | |
2673 | m_freem(m); | |
2674 | } | |
2675 | if (tx->info[idx].flag) { | |
2676 | tx->info[idx].flag = 0; | |
2677 | tx->pkt_done++; | |
2678 | } | |
2679 | } | |
2680 | ||
2681 | /* If we have space, clear IFF_OACTIVE to tell the stack that | |
2682 | its OK to send packets */ | |
2683 | #ifdef IFNET_BUF_RING | |
2684 | flags = &ss->if_drv_flags; | |
2685 | #else | |
2686 | flags = &ifp->if_drv_flags; | |
2687 | #endif | |
2688 | mtx_lock(&ss->tx.mtx); | |
2689 | if ((*flags) & IFF_DRV_OACTIVE && | |
2690 | tx->req - tx->done < (tx->mask + 1)/4) { | |
2691 | *(flags) &= ~IFF_DRV_OACTIVE; | |
2692 | ss->tx.wake++; | |
2693 | mxge_start_locked(ss); | |
2694 | } | |
2695 | #ifdef IFNET_BUF_RING | |
2696 | if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { | |
2697 | /* let the NIC stop polling this queue, since there | |
2698 | * are no more transmits pending */ | |
2699 | if (tx->req == tx->done) { | |
2700 | *tx->send_stop = 1; | |
2701 | tx->queue_active = 0; | |
2702 | tx->deactivate++; | |
2703 | wmb(); | |
2704 | } | |
2705 | } | |
2706 | #endif | |
2707 | mtx_unlock(&ss->tx.mtx); | |
2708 | ||
2709 | } | |
2710 | ||
2711 | static struct mxge_media_type mxge_xfp_media_types[] = | |
2712 | { | |
2713 | {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, | |
2714 | {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, | |
2715 | {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, | |
2716 | {0, (1 << 5), "10GBASE-ER"}, | |
2717 | {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, | |
2718 | {0, (1 << 3), "10GBASE-SW"}, | |
2719 | {0, (1 << 2), "10GBASE-LW"}, | |
2720 | {0, (1 << 1), "10GBASE-EW"}, | |
2721 | {0, (1 << 0), "Reserved"} | |
2722 | }; | |
2723 | static struct mxge_media_type mxge_sfp_media_types[] = | |
2724 | { | |
2725 | {0, (1 << 7), "Reserved"}, | |
2726 | {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, | |
2727 | {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, | |
2728 | {IFM_10G_SR, (1 << 4), "10GBASE-SR"} | |
2729 | }; | |
2730 | ||
2731 | static void | |
2732 | mxge_set_media(mxge_softc_t *sc, int type) | |
2733 | { | |
2734 | sc->media_flags |= type; | |
2735 | ifmedia_add(&sc->media, sc->media_flags, 0, NULL); | |
2736 | ifmedia_set(&sc->media, sc->media_flags); | |
2737 | } | |
2738 | ||
2739 | ||
2740 | /* | |
2741 | * Determine the media type for a NIC. Some XFPs will identify | |
2742 | * themselves only when their link is up, so this is initiated via a | |
2743 | * link up interrupt. However, this can potentially take up to | |
2744 | * several milliseconds, so it is run via the watchdog routine, rather | |
2745 | * than in the interrupt handler itself. This need only be done | |
2746 | * once, not each time the link is up. | |
2747 | */ | |
2748 | static void | |
2749 | mxge_media_probe(mxge_softc_t *sc) | |
2750 | { | |
2751 | mxge_cmd_t cmd; | |
2752 | char *cage_type; | |
2753 | char *ptr; | |
2754 | struct mxge_media_type *mxge_media_types = NULL; | |
2755 | int i, err, ms, mxge_media_type_entries; | |
2756 | uint32_t byte; | |
2757 | ||
2758 | sc->need_media_probe = 0; | |
2759 | ||
2760 | /* if we've already set a media type, we're done */ | |
2761 | if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) | |
2762 | return; | |
2763 | ||
2764 | /* | |
2765 | * parse the product code to deterimine the interface type | |
2766 | * (CX4, XFP, Quad Ribbon Fiber) by looking at the character | |
2767 | * after the 3rd dash in the driver's cached copy of the | |
2768 | * EEPROM's product code string. | |
2769 | */ | |
2770 | ptr = sc->product_code_string; | |
2771 | if (ptr == NULL) { | |
2772 | device_printf(sc->dev, "Missing product code\n"); | |
2773 | } | |
2774 | ||
2775 | for (i = 0; i < 3; i++, ptr++) { | |
2776 | ptr = index(ptr, '-'); | |
2777 | if (ptr == NULL) { | |
2778 | device_printf(sc->dev, | |
2779 | "only %d dashes in PC?!?\n", i); | |
2780 | return; | |
2781 | } | |
2782 | } | |
2783 | if (*ptr == 'C') { | |
2784 | /* -C is CX4 */ | |
2785 | mxge_set_media(sc, IFM_10G_CX4); | |
2786 | return; | |
2787 | } | |
2788 | else if (*ptr == 'Q') { | |
2789 | /* -Q is Quad Ribbon Fiber */ | |
2790 | device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); | |
2791 | /* FreeBSD has no media type for Quad ribbon fiber */ | |
2792 | return; | |
2793 | } | |
2794 | ||
2795 | if (*ptr == 'R') { | |
2796 | /* -R is XFP */ | |
2797 | mxge_media_types = mxge_xfp_media_types; | |
2798 | mxge_media_type_entries = | |
2799 | sizeof (mxge_xfp_media_types) / | |
2800 | sizeof (mxge_xfp_media_types[0]); | |
2801 | byte = MXGE_XFP_COMPLIANCE_BYTE; | |
2802 | cage_type = "XFP"; | |
2803 | } | |
2804 | ||
2805 | if (*ptr == 'S' || *(ptr +1) == 'S') { | |
2806 | /* -S or -2S is SFP+ */ | |
2807 | mxge_media_types = mxge_sfp_media_types; | |
2808 | mxge_media_type_entries = | |
2809 | sizeof (mxge_sfp_media_types) / | |
2810 | sizeof (mxge_sfp_media_types[0]); | |
2811 | cage_type = "SFP+"; | |
2812 | byte = 3; | |
2813 | } | |
2814 | ||
2815 | if (mxge_media_types == NULL) { | |
2816 | device_printf(sc->dev, "Unknown media type: %c\n", *ptr); | |
2817 | return; | |
2818 | } | |
2819 | ||
2820 | /* | |
2821 | * At this point we know the NIC has an XFP cage, so now we | |
2822 | * try to determine what is in the cage by using the | |
2823 | * firmware's XFP I2C commands to read the XFP 10GbE compilance | |
2824 | * register. We read just one byte, which may take over | |
2825 | * a millisecond | |
2826 | */ | |
2827 | ||
2828 | cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ | |
2829 | cmd.data1 = byte; | |
2830 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); | |
2831 | if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { | |
2832 | device_printf(sc->dev, "failed to read XFP\n"); | |
2833 | } | |
2834 | if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { | |
2835 | device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); | |
2836 | } | |
2837 | if (err != MXGEFW_CMD_OK) { | |
2838 | return; | |
2839 | } | |
2840 | ||
2841 | /* now we wait for the data to be cached */ | |
2842 | cmd.data0 = byte; | |
2843 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); | |
2844 | for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { | |
2845 | DELAY(1000); | |
2846 | cmd.data0 = byte; | |
2847 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); | |
2848 | } | |
2849 | if (err != MXGEFW_CMD_OK) { | |
2850 | device_printf(sc->dev, "failed to read %s (%d, %dms)\n", | |
2851 | cage_type, err, ms); | |
2852 | return; | |
2853 | } | |
2854 | ||
2855 | if (cmd.data0 == mxge_media_types[0].bitmask) { | |
2856 | if (mxge_verbose) | |
2857 | device_printf(sc->dev, "%s:%s\n", cage_type, | |
2858 | mxge_media_types[0].name); | |
2859 | mxge_set_media(sc, IFM_10G_CX4); | |
2860 | return; | |
2861 | } | |
2862 | for (i = 1; i < mxge_media_type_entries; i++) { | |
2863 | if (cmd.data0 & mxge_media_types[i].bitmask) { | |
2864 | if (mxge_verbose) | |
2865 | device_printf(sc->dev, "%s:%s\n", | |
2866 | cage_type, | |
2867 | mxge_media_types[i].name); | |
2868 | ||
2869 | mxge_set_media(sc, mxge_media_types[i].flag); | |
2870 | return; | |
2871 | } | |
2872 | } | |
2873 | device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, | |
2874 | cmd.data0); | |
2875 | ||
2876 | return; | |
2877 | } | |
2878 | ||
2879 | static void | |
2880 | mxge_intr(void *arg) | |
2881 | { | |
2882 | struct mxge_slice_state *ss = arg; | |
2883 | mxge_softc_t *sc = ss->sc; | |
2884 | mcp_irq_data_t *stats = ss->fw_stats; | |
2885 | mxge_tx_ring_t *tx = &ss->tx; | |
2886 | mxge_rx_done_t *rx_done = &ss->rx_done; | |
2887 | uint32_t send_done_count; | |
2888 | uint8_t valid; | |
2889 | ||
2890 | ||
2891 | #ifndef IFNET_BUF_RING | |
2892 | /* an interrupt on a non-zero slice is implicitly valid | |
2893 | since MSI-X irqs are not shared */ | |
2894 | if (ss != sc->ss) { | |
2895 | mxge_clean_rx_done(ss); | |
2896 | *ss->irq_claim = be32toh(3); | |
2897 | return; | |
2898 | } | |
2899 | #endif | |
2900 | ||
2901 | /* make sure the DMA has finished */ | |
2902 | if (!stats->valid) { | |
2903 | return; | |
2904 | } | |
2905 | valid = stats->valid; | |
2906 | ||
2907 | if (sc->legacy_irq) { | |
2908 | /* lower legacy IRQ */ | |
2909 | *sc->irq_deassert = 0; | |
2910 | if (!mxge_deassert_wait) | |
2911 | /* don't wait for conf. that irq is low */ | |
2912 | stats->valid = 0; | |
2913 | } else { | |
2914 | stats->valid = 0; | |
2915 | } | |
2916 | ||
2917 | /* loop while waiting for legacy irq deassertion */ | |
2918 | do { | |
2919 | /* check for transmit completes and receives */ | |
2920 | send_done_count = be32toh(stats->send_done_count); | |
2921 | while ((send_done_count != tx->pkt_done) || | |
2922 | (rx_done->entry[rx_done->idx].length != 0)) { | |
2923 | if (send_done_count != tx->pkt_done) | |
2924 | mxge_tx_done(ss, (int)send_done_count); | |
2925 | mxge_clean_rx_done(ss); | |
2926 | send_done_count = be32toh(stats->send_done_count); | |
2927 | } | |
2928 | if (sc->legacy_irq && mxge_deassert_wait) | |
2929 | wmb(); | |
2930 | } while (*((volatile uint8_t *) &stats->valid)); | |
2931 | ||
2932 | /* fw link & error stats meaningful only on the first slice */ | |
2933 | if (__predict_false((ss == sc->ss) && stats->stats_updated)) { | |
2934 | if (sc->link_state != stats->link_up) { | |
2935 | sc->link_state = stats->link_up; | |
2936 | if (sc->link_state) { | |
2937 | if_link_state_change(sc->ifp, LINK_STATE_UP); | |
2938 | if (mxge_verbose) | |
2939 | device_printf(sc->dev, "link up\n"); | |
2940 | } else { | |
2941 | if_link_state_change(sc->ifp, LINK_STATE_DOWN); | |
2942 | if (mxge_verbose) | |
2943 | device_printf(sc->dev, "link down\n"); | |
2944 | } | |
2945 | sc->need_media_probe = 1; | |
2946 | } | |
2947 | if (sc->rdma_tags_available != | |
2948 | be32toh(stats->rdma_tags_available)) { | |
2949 | sc->rdma_tags_available = | |
2950 | be32toh(stats->rdma_tags_available); | |
2951 | device_printf(sc->dev, "RDMA timed out! %d tags " | |
2952 | "left\n", sc->rdma_tags_available); | |
2953 | } | |
2954 | ||
2955 | if (stats->link_down) { | |
2956 | sc->down_cnt += stats->link_down; | |
2957 | sc->link_state = 0; | |
2958 | if_link_state_change(sc->ifp, LINK_STATE_DOWN); | |
2959 | } | |
2960 | } | |
2961 | ||
2962 | /* check to see if we have rx token to pass back */ | |
2963 | if (valid & 0x1) | |
2964 | *ss->irq_claim = be32toh(3); | |
2965 | *(ss->irq_claim + 1) = be32toh(3); | |
2966 | } | |
2967 | ||
2968 | static void | |
2969 | mxge_init(void *arg) | |
2970 | { | |
2971 | } | |
2972 | ||
2973 | ||
2974 | ||
2975 | static void | |
2976 | mxge_free_slice_mbufs(struct mxge_slice_state *ss) | |
2977 | { | |
2978 | struct lro_entry *lro_entry; | |
2979 | int i; | |
2980 | ||
2981 | while (!SLIST_EMPTY(&ss->lro_free)) { | |
2982 | lro_entry = SLIST_FIRST(&ss->lro_free); | |
2983 | SLIST_REMOVE_HEAD(&ss->lro_free, next); | |
2984 | free(lro_entry, M_DEVBUF); | |
2985 | } | |
2986 | ||
2987 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
2988 | if (ss->rx_big.info[i].m == NULL) | |
2989 | continue; | |
2990 | bus_dmamap_unload(ss->rx_big.dmat, | |
2991 | ss->rx_big.info[i].map); | |
2992 | m_freem(ss->rx_big.info[i].m); | |
2993 | ss->rx_big.info[i].m = NULL; | |
2994 | } | |
2995 | ||
2996 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
2997 | if (ss->rx_small.info[i].m == NULL) | |
2998 | continue; | |
2999 | bus_dmamap_unload(ss->rx_small.dmat, | |
3000 | ss->rx_small.info[i].map); | |
3001 | m_freem(ss->rx_small.info[i].m); | |
3002 | ss->rx_small.info[i].m = NULL; | |
3003 | } | |
3004 | ||
3005 | /* transmit ring used only on the first slice */ | |
3006 | if (ss->tx.info == NULL) | |
3007 | return; | |
3008 | ||
3009 | for (i = 0; i <= ss->tx.mask; i++) { | |
3010 | ss->tx.info[i].flag = 0; | |
3011 | if (ss->tx.info[i].m == NULL) | |
3012 | continue; | |
3013 | bus_dmamap_unload(ss->tx.dmat, | |
3014 | ss->tx.info[i].map); | |
3015 | m_freem(ss->tx.info[i].m); | |
3016 | ss->tx.info[i].m = NULL; | |
3017 | } | |
3018 | } | |
3019 | ||
3020 | static void | |
3021 | mxge_free_mbufs(mxge_softc_t *sc) | |
3022 | { | |
3023 | int slice; | |
3024 | ||
3025 | for (slice = 0; slice < sc->num_slices; slice++) | |
3026 | mxge_free_slice_mbufs(&sc->ss[slice]); | |
3027 | } | |
3028 | ||
3029 | static void | |
3030 | mxge_free_slice_rings(struct mxge_slice_state *ss) | |
3031 | { | |
3032 | int i; | |
3033 | ||
3034 | ||
3035 | if (ss->rx_done.entry != NULL) | |
3036 | mxge_dma_free(&ss->rx_done.dma); | |
3037 | ss->rx_done.entry = NULL; | |
3038 | ||
3039 | if (ss->tx.req_bytes != NULL) | |
3040 | free(ss->tx.req_bytes, M_DEVBUF); | |
3041 | ss->tx.req_bytes = NULL; | |
3042 | ||
3043 | if (ss->tx.seg_list != NULL) | |
3044 | free(ss->tx.seg_list, M_DEVBUF); | |
3045 | ss->tx.seg_list = NULL; | |
3046 | ||
3047 | if (ss->rx_small.shadow != NULL) | |
3048 | free(ss->rx_small.shadow, M_DEVBUF); | |
3049 | ss->rx_small.shadow = NULL; | |
3050 | ||
3051 | if (ss->rx_big.shadow != NULL) | |
3052 | free(ss->rx_big.shadow, M_DEVBUF); | |
3053 | ss->rx_big.shadow = NULL; | |
3054 | ||
3055 | if (ss->tx.info != NULL) { | |
3056 | if (ss->tx.dmat != NULL) { | |
3057 | for (i = 0; i <= ss->tx.mask; i++) { | |
3058 | bus_dmamap_destroy(ss->tx.dmat, | |
3059 | ss->tx.info[i].map); | |
3060 | } | |
3061 | bus_dma_tag_destroy(ss->tx.dmat); | |
3062 | } | |
3063 | free(ss->tx.info, M_DEVBUF); | |
3064 | } | |
3065 | ss->tx.info = NULL; | |
3066 | ||
3067 | if (ss->rx_small.info != NULL) { | |
3068 | if (ss->rx_small.dmat != NULL) { | |
3069 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3070 | bus_dmamap_destroy(ss->rx_small.dmat, | |
3071 | ss->rx_small.info[i].map); | |
3072 | } | |
3073 | bus_dmamap_destroy(ss->rx_small.dmat, | |
3074 | ss->rx_small.extra_map); | |
3075 | bus_dma_tag_destroy(ss->rx_small.dmat); | |
3076 | } | |
3077 | free(ss->rx_small.info, M_DEVBUF); | |
3078 | } | |
3079 | ss->rx_small.info = NULL; | |
3080 | ||
3081 | if (ss->rx_big.info != NULL) { | |
3082 | if (ss->rx_big.dmat != NULL) { | |
3083 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3084 | bus_dmamap_destroy(ss->rx_big.dmat, | |
3085 | ss->rx_big.info[i].map); | |
3086 | } | |
3087 | bus_dmamap_destroy(ss->rx_big.dmat, | |
3088 | ss->rx_big.extra_map); | |
3089 | bus_dma_tag_destroy(ss->rx_big.dmat); | |
3090 | } | |
3091 | free(ss->rx_big.info, M_DEVBUF); | |
3092 | } | |
3093 | ss->rx_big.info = NULL; | |
3094 | } | |
3095 | ||
3096 | static void | |
3097 | mxge_free_rings(mxge_softc_t *sc) | |
3098 | { | |
3099 | int slice; | |
3100 | ||
3101 | for (slice = 0; slice < sc->num_slices; slice++) | |
3102 | mxge_free_slice_rings(&sc->ss[slice]); | |
3103 | } | |
3104 | ||
3105 | static int | |
3106 | mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, | |
3107 | int tx_ring_entries) | |
3108 | { | |
3109 | mxge_softc_t *sc = ss->sc; | |
3110 | size_t bytes; | |
3111 | int err, i; | |
3112 | ||
3113 | err = ENOMEM; | |
3114 | ||
3115 | /* allocate per-slice receive resources */ | |
3116 | ||
3117 | ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; | |
3118 | ss->rx_done.mask = (2 * rx_ring_entries) - 1; | |
3119 | ||
3120 | /* allocate the rx shadow rings */ | |
3121 | bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); | |
3122 | ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); | |
3123 | if (ss->rx_small.shadow == NULL) | |
3124 | return err;; | |
3125 | ||
3126 | bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); | |
3127 | ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); | |
3128 | if (ss->rx_big.shadow == NULL) | |
3129 | return err;; | |
3130 | ||
3131 | /* allocate the rx host info rings */ | |
3132 | bytes = rx_ring_entries * sizeof (*ss->rx_small.info); | |
3133 | ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); | |
3134 | if (ss->rx_small.info == NULL) | |
3135 | return err;; | |
3136 | ||
3137 | bytes = rx_ring_entries * sizeof (*ss->rx_big.info); | |
3138 | ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); | |
3139 | if (ss->rx_big.info == NULL) | |
3140 | return err;; | |
3141 | ||
3142 | /* allocate the rx busdma resources */ | |
3143 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3144 | 1, /* alignment */ | |
3145 | 4096, /* boundary */ | |
3146 | BUS_SPACE_MAXADDR, /* low */ | |
3147 | BUS_SPACE_MAXADDR, /* high */ | |
3148 | NULL, NULL, /* filter */ | |
3149 | MHLEN, /* maxsize */ | |
3150 | 1, /* num segs */ | |
3151 | MHLEN, /* maxsegsize */ | |
3152 | BUS_DMA_ALLOCNOW, /* flags */ | |
3153 | NULL, NULL, /* lock */ | |
3154 | &ss->rx_small.dmat); /* tag */ | |
3155 | if (err != 0) { | |
3156 | device_printf(sc->dev, "Err %d allocating rx_small dmat\n", | |
3157 | err); | |
3158 | return err;; | |
3159 | } | |
3160 | ||
3161 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3162 | 1, /* alignment */ | |
3163 | #if MXGE_VIRT_JUMBOS | |
3164 | 4096, /* boundary */ | |
3165 | #else | |
3166 | 0, /* boundary */ | |
3167 | #endif | |
3168 | BUS_SPACE_MAXADDR, /* low */ | |
3169 | BUS_SPACE_MAXADDR, /* high */ | |
3170 | NULL, NULL, /* filter */ | |
3171 | 3*4096, /* maxsize */ | |
3172 | #if MXGE_VIRT_JUMBOS | |
3173 | 3, /* num segs */ | |
3174 | 4096, /* maxsegsize*/ | |
3175 | #else | |
3176 | 1, /* num segs */ | |
3177 | MJUM9BYTES, /* maxsegsize*/ | |
3178 | #endif | |
3179 | BUS_DMA_ALLOCNOW, /* flags */ | |
3180 | NULL, NULL, /* lock */ | |
3181 | &ss->rx_big.dmat); /* tag */ | |
3182 | if (err != 0) { | |
3183 | device_printf(sc->dev, "Err %d allocating rx_big dmat\n", | |
3184 | err); | |
3185 | return err;; | |
3186 | } | |
3187 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3188 | err = bus_dmamap_create(ss->rx_small.dmat, 0, | |
3189 | &ss->rx_small.info[i].map); | |
3190 | if (err != 0) { | |
3191 | device_printf(sc->dev, "Err %d rx_small dmamap\n", | |
3192 | err); | |
3193 | return err;; | |
3194 | } | |
3195 | } | |
3196 | err = bus_dmamap_create(ss->rx_small.dmat, 0, | |
3197 | &ss->rx_small.extra_map); | |
3198 | if (err != 0) { | |
3199 | device_printf(sc->dev, "Err %d extra rx_small dmamap\n", | |
3200 | err); | |
3201 | return err;; | |
3202 | } | |
3203 | ||
3204 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3205 | err = bus_dmamap_create(ss->rx_big.dmat, 0, | |
3206 | &ss->rx_big.info[i].map); | |
3207 | if (err != 0) { | |
3208 | device_printf(sc->dev, "Err %d rx_big dmamap\n", | |
3209 | err); | |
3210 | return err;; | |
3211 | } | |
3212 | } | |
3213 | err = bus_dmamap_create(ss->rx_big.dmat, 0, | |
3214 | &ss->rx_big.extra_map); | |
3215 | if (err != 0) { | |
3216 | device_printf(sc->dev, "Err %d extra rx_big dmamap\n", | |
3217 | err); | |
3218 | return err;; | |
3219 | } | |
3220 | ||
3221 | /* now allocate TX resouces */ | |
3222 | ||
3223 | #ifndef IFNET_BUF_RING | |
3224 | /* only use a single TX ring for now */ | |
3225 | if (ss != ss->sc->ss) | |
3226 | return 0; | |
3227 | #endif | |
3228 | ||
3229 | ss->tx.mask = tx_ring_entries - 1; | |
3230 | ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); | |
3231 | ||
3232 | ||
3233 | /* allocate the tx request copy block */ | |
3234 | bytes = 8 + | |
3235 | sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); | |
3236 | ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); | |
3237 | if (ss->tx.req_bytes == NULL) | |
3238 | return err;; | |
3239 | /* ensure req_list entries are aligned to 8 bytes */ | |
3240 | ss->tx.req_list = (mcp_kreq_ether_send_t *) | |
3241 | ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); | |
3242 | ||
3243 | /* allocate the tx busdma segment list */ | |
3244 | bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; | |
3245 | ss->tx.seg_list = (bus_dma_segment_t *) | |
3246 | malloc(bytes, M_DEVBUF, M_WAITOK); | |
3247 | if (ss->tx.seg_list == NULL) | |
3248 | return err;; | |
3249 | ||
3250 | /* allocate the tx host info ring */ | |
3251 | bytes = tx_ring_entries * sizeof (*ss->tx.info); | |
3252 | ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); | |
3253 | if (ss->tx.info == NULL) | |
3254 | return err;; | |
3255 | ||
3256 | /* allocate the tx busdma resources */ | |
3257 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3258 | 1, /* alignment */ | |
3259 | sc->tx_boundary, /* boundary */ | |
3260 | BUS_SPACE_MAXADDR, /* low */ | |
3261 | BUS_SPACE_MAXADDR, /* high */ | |
3262 | NULL, NULL, /* filter */ | |
3263 | 65536 + 256, /* maxsize */ | |
3264 | ss->tx.max_desc - 2, /* num segs */ | |
3265 | sc->tx_boundary, /* maxsegsz */ | |
3266 | BUS_DMA_ALLOCNOW, /* flags */ | |
3267 | NULL, NULL, /* lock */ | |
3268 | &ss->tx.dmat); /* tag */ | |
3269 | ||
3270 | if (err != 0) { | |
3271 | device_printf(sc->dev, "Err %d allocating tx dmat\n", | |
3272 | err); | |
3273 | return err;; | |
3274 | } | |
3275 | ||
3276 | /* now use these tags to setup dmamaps for each slot | |
3277 | in the ring */ | |
3278 | for (i = 0; i <= ss->tx.mask; i++) { | |
3279 | err = bus_dmamap_create(ss->tx.dmat, 0, | |
3280 | &ss->tx.info[i].map); | |
3281 | if (err != 0) { | |
3282 | device_printf(sc->dev, "Err %d tx dmamap\n", | |
3283 | err); | |
3284 | return err;; | |
3285 | } | |
3286 | } | |
3287 | return 0; | |
3288 | ||
3289 | } | |
3290 | ||
3291 | static int | |
3292 | mxge_alloc_rings(mxge_softc_t *sc) | |
3293 | { | |
3294 | mxge_cmd_t cmd; | |
3295 | int tx_ring_size; | |
3296 | int tx_ring_entries, rx_ring_entries; | |
3297 | int err, slice; | |
3298 | ||
3299 | /* get ring sizes */ | |
3300 | err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); | |
3301 | tx_ring_size = cmd.data0; | |
3302 | if (err != 0) { | |
3303 | device_printf(sc->dev, "Cannot determine tx ring sizes\n"); | |
3304 | goto abort; | |
3305 | } | |
3306 | ||
3307 | tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); | |
3308 | rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); | |
3309 | IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); | |
3310 | sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; | |
3311 | IFQ_SET_READY(&sc->ifp->if_snd); | |
3312 | ||
3313 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3314 | err = mxge_alloc_slice_rings(&sc->ss[slice], | |
3315 | rx_ring_entries, | |
3316 | tx_ring_entries); | |
3317 | if (err != 0) | |
3318 | goto abort; | |
3319 | } | |
3320 | return 0; | |
3321 | ||
3322 | abort: | |
3323 | mxge_free_rings(sc); | |
3324 | return err; | |
3325 | ||
3326 | } | |
3327 | ||
3328 | ||
3329 | static void | |
3330 | mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) | |
3331 | { | |
3332 | int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; | |
3333 | ||
3334 | if (bufsize < MCLBYTES) { | |
3335 | /* easy, everything fits in a single buffer */ | |
3336 | *big_buf_size = MCLBYTES; | |
3337 | *cl_size = MCLBYTES; | |
3338 | *nbufs = 1; | |
3339 | return; | |
3340 | } | |
3341 | ||
3342 | if (bufsize < MJUMPAGESIZE) { | |
3343 | /* still easy, everything still fits in a single buffer */ | |
3344 | *big_buf_size = MJUMPAGESIZE; | |
3345 | *cl_size = MJUMPAGESIZE; | |
3346 | *nbufs = 1; | |
3347 | return; | |
3348 | } | |
3349 | #if MXGE_VIRT_JUMBOS | |
3350 | /* now we need to use virtually contiguous buffers */ | |
3351 | *cl_size = MJUM9BYTES; | |
3352 | *big_buf_size = 4096; | |
3353 | *nbufs = mtu / 4096 + 1; | |
3354 | /* needs to be a power of two, so round up */ | |
3355 | if (*nbufs == 3) | |
3356 | *nbufs = 4; | |
3357 | #else | |
3358 | *cl_size = MJUM9BYTES; | |
3359 | *big_buf_size = MJUM9BYTES; | |
3360 | *nbufs = 1; | |
3361 | #endif | |
3362 | } | |
3363 | ||
3364 | static int | |
3365 | mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) | |
3366 | { | |
3367 | mxge_softc_t *sc; | |
3368 | mxge_cmd_t cmd; | |
3369 | bus_dmamap_t map; | |
3370 | struct lro_entry *lro_entry; | |
3371 | int err, i, slice; | |
3372 | ||
3373 | ||
3374 | sc = ss->sc; | |
3375 | slice = ss - sc->ss; | |
3376 | ||
3377 | SLIST_INIT(&ss->lro_free); | |
3378 | SLIST_INIT(&ss->lro_active); | |
3379 | ||
3380 | for (i = 0; i < sc->lro_cnt; i++) { | |
3381 | lro_entry = (struct lro_entry *) | |
3382 | malloc(sizeof (*lro_entry), M_DEVBUF, | |
3383 | M_NOWAIT | M_ZERO); | |
3384 | if (lro_entry == NULL) { | |
3385 | sc->lro_cnt = i; | |
3386 | break; | |
3387 | } | |
3388 | SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); | |
3389 | } | |
3390 | /* get the lanai pointers to the send and receive rings */ | |
3391 | ||
3392 | err = 0; | |
3393 | #ifndef IFNET_BUF_RING | |
3394 | /* We currently only send from the first slice */ | |
3395 | if (slice == 0) { | |
3396 | #endif | |
3397 | cmd.data0 = slice; | |
3398 | err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); | |
3399 | ss->tx.lanai = | |
3400 | (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); | |
3401 | ss->tx.send_go = (volatile uint32_t *) | |
3402 | (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); | |
3403 | ss->tx.send_stop = (volatile uint32_t *) | |
3404 | (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); | |
3405 | #ifndef IFNET_BUF_RING | |
3406 | } | |
3407 | #endif | |
3408 | cmd.data0 = slice; | |
3409 | err |= mxge_send_cmd(sc, | |
3410 | MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); | |
3411 | ss->rx_small.lanai = | |
3412 | (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); | |
3413 | cmd.data0 = slice; | |
3414 | err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); | |
3415 | ss->rx_big.lanai = | |
3416 | (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); | |
3417 | ||
3418 | if (err != 0) { | |
3419 | device_printf(sc->dev, | |
3420 | "failed to get ring sizes or locations\n"); | |
3421 | return EIO; | |
3422 | } | |
3423 | ||
3424 | /* stock receive rings */ | |
3425 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3426 | map = ss->rx_small.info[i].map; | |
3427 | err = mxge_get_buf_small(ss, map, i); | |
3428 | if (err) { | |
3429 | device_printf(sc->dev, "alloced %d/%d smalls\n", | |
3430 | i, ss->rx_small.mask + 1); | |
3431 | return ENOMEM; | |
3432 | } | |
3433 | } | |
3434 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3435 | ss->rx_big.shadow[i].addr_low = 0xffffffff; | |
3436 | ss->rx_big.shadow[i].addr_high = 0xffffffff; | |
3437 | } | |
3438 | ss->rx_big.nbufs = nbufs; | |
3439 | ss->rx_big.cl_size = cl_size; | |
3440 | ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + | |
3441 | ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; | |
3442 | for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { | |
3443 | map = ss->rx_big.info[i].map; | |
3444 | err = mxge_get_buf_big(ss, map, i); | |
3445 | if (err) { | |
3446 | device_printf(sc->dev, "alloced %d/%d bigs\n", | |
3447 | i, ss->rx_big.mask + 1); | |
3448 | return ENOMEM; | |
3449 | } | |
3450 | } | |
3451 | return 0; | |
3452 | } | |
3453 | ||
3454 | static int | |
3455 | mxge_open(mxge_softc_t *sc) | |
3456 | { | |
3457 | mxge_cmd_t cmd; | |
3458 | int err, big_bytes, nbufs, slice, cl_size, i; | |
3459 | bus_addr_t bus; | |
3460 | volatile uint8_t *itable; | |
3461 | struct mxge_slice_state *ss; | |
3462 | ||
3463 | /* Copy the MAC address in case it was overridden */ | |
3464 | bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); | |
3465 | ||
3466 | err = mxge_reset(sc, 1); | |
3467 | if (err != 0) { | |
3468 | device_printf(sc->dev, "failed to reset\n"); | |
3469 | return EIO; | |
3470 | } | |
3471 | ||
3472 | if (sc->num_slices > 1) { | |
3473 | /* setup the indirection table */ | |
3474 | cmd.data0 = sc->num_slices; | |
3475 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, | |
3476 | &cmd); | |
3477 | ||
3478 | err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, | |
3479 | &cmd); | |
3480 | if (err != 0) { | |
3481 | device_printf(sc->dev, | |
3482 | "failed to setup rss tables\n"); | |
3483 | return err; | |
3484 | } | |
3485 | ||
3486 | /* just enable an identity mapping */ | |
3487 | itable = sc->sram + cmd.data0; | |
3488 | for (i = 0; i < sc->num_slices; i++) | |
3489 | itable[i] = (uint8_t)i; | |
3490 | ||
3491 | cmd.data0 = 1; | |
3492 | cmd.data1 = mxge_rss_hash_type; | |
3493 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); | |
3494 | if (err != 0) { | |
3495 | device_printf(sc->dev, "failed to enable slices\n"); | |
3496 | return err; | |
3497 | } | |
3498 | } | |
3499 | ||
3500 | ||
3501 | mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); | |
3502 | ||
3503 | cmd.data0 = nbufs; | |
3504 | err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, | |
3505 | &cmd); | |
3506 | /* error is only meaningful if we're trying to set | |
3507 | MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ | |
3508 | if (err && nbufs > 1) { | |
3509 | device_printf(sc->dev, | |
3510 | "Failed to set alway-use-n to %d\n", | |
3511 | nbufs); | |
3512 | return EIO; | |
3513 | } | |
3514 | /* Give the firmware the mtu and the big and small buffer | |
3515 | sizes. The firmware wants the big buf size to be a power | |
3516 | of two. Luckily, FreeBSD's clusters are powers of two */ | |
3517 | cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; | |
3518 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); | |
3519 | cmd.data0 = MHLEN - MXGEFW_PAD; | |
3520 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, | |
3521 | &cmd); | |
3522 | cmd.data0 = big_bytes; | |
3523 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); | |
3524 | ||
3525 | if (err != 0) { | |
3526 | device_printf(sc->dev, "failed to setup params\n"); | |
3527 | goto abort; | |
3528 | } | |
3529 | ||
3530 | /* Now give him the pointer to the stats block */ | |
3531 | for (slice = 0; | |
3532 | #ifdef IFNET_BUF_RING | |
3533 | slice < sc->num_slices; | |
3534 | #else | |
3535 | slice < 1; | |
3536 | #endif | |
3537 | slice++) { | |
3538 | ss = &sc->ss[slice]; | |
3539 | cmd.data0 = | |
3540 | MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); | |
3541 | cmd.data1 = | |
3542 | MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); | |
3543 | cmd.data2 = sizeof(struct mcp_irq_data); | |
3544 | cmd.data2 |= (slice << 16); | |
3545 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); | |
3546 | } | |
3547 | ||
3548 | if (err != 0) { | |
3549 | bus = sc->ss->fw_stats_dma.bus_addr; | |
3550 | bus += offsetof(struct mcp_irq_data, send_done_count); | |
3551 | cmd.data0 = MXGE_LOWPART_TO_U32(bus); | |
3552 | cmd.data1 = MXGE_HIGHPART_TO_U32(bus); | |
3553 | err = mxge_send_cmd(sc, | |
3554 | MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, | |
3555 | &cmd); | |
3556 | /* Firmware cannot support multicast without STATS_DMA_V2 */ | |
3557 | sc->fw_multicast_support = 0; | |
3558 | } else { | |
3559 | sc->fw_multicast_support = 1; | |
3560 | } | |
3561 | ||
3562 | if (err != 0) { | |
3563 | device_printf(sc->dev, "failed to setup params\n"); | |
3564 | goto abort; | |
3565 | } | |
3566 | ||
3567 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3568 | err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); | |
3569 | if (err != 0) { | |
3570 | device_printf(sc->dev, "couldn't open slice %d\n", | |
3571 | slice); | |
3572 | goto abort; | |
3573 | } | |
3574 | } | |
3575 | ||
3576 | /* Finally, start the firmware running */ | |
3577 | err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); | |
3578 | if (err) { | |
3579 | device_printf(sc->dev, "Couldn't bring up link\n"); | |
3580 | goto abort; | |
3581 | } | |
3582 | #ifdef IFNET_BUF_RING | |
3583 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3584 | ss = &sc->ss[slice]; | |
3585 | ss->if_drv_flags |= IFF_DRV_RUNNING; | |
3586 | ss->if_drv_flags &= ~IFF_DRV_OACTIVE; | |
3587 | } | |
3588 | #endif | |
3589 | sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; | |
3590 | sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; | |
3591 | callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); | |
3592 | ||
3593 | return 0; | |
3594 | ||
3595 | ||
3596 | abort: | |
3597 | mxge_free_mbufs(sc); | |
3598 | ||
3599 | return err; | |
3600 | } | |
3601 | ||
3602 | static int | |
3603 | mxge_close(mxge_softc_t *sc) | |
3604 | { | |
3605 | mxge_cmd_t cmd; | |
3606 | int err, old_down_cnt; | |
3607 | #ifdef IFNET_BUF_RING | |
3608 | struct mxge_slice_state *ss; | |
3609 | int slice; | |
3610 | #endif | |
3611 | ||
3612 | callout_stop(&sc->co_hdl); | |
3613 | #ifdef IFNET_BUF_RING | |
3614 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3615 | ss = &sc->ss[slice]; | |
3616 | ss->if_drv_flags &= ~IFF_DRV_RUNNING; | |
3617 | } | |
3618 | #endif | |
3619 | sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; | |
3620 | old_down_cnt = sc->down_cnt; | |
3621 | wmb(); | |
3622 | err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); | |
3623 | if (err) { | |
3624 | device_printf(sc->dev, "Couldn't bring down link\n"); | |
3625 | } | |
3626 | if (old_down_cnt == sc->down_cnt) { | |
3627 | /* wait for down irq */ | |
3628 | DELAY(10 * sc->intr_coal_delay); | |
3629 | } | |
3630 | wmb(); | |
3631 | if (old_down_cnt == sc->down_cnt) { | |
3632 | device_printf(sc->dev, "never got down irq\n"); | |
3633 | } | |
3634 | ||
3635 | mxge_free_mbufs(sc); | |
3636 | ||
3637 | return 0; | |
3638 | } | |
3639 | ||
3640 | static void | |
3641 | mxge_setup_cfg_space(mxge_softc_t *sc) | |
3642 | { | |
3643 | device_t dev = sc->dev; | |
3644 | int reg; | |
3645 | uint16_t cmd, lnk, pectl; | |
3646 | ||
3647 | /* find the PCIe link width and set max read request to 4KB*/ | |
3648 | if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { | |
3649 | lnk = pci_read_config(dev, reg + 0x12, 2); | |
3650 | sc->link_width = (lnk >> 4) & 0x3f; | |
3651 | ||
3652 | pectl = pci_read_config(dev, reg + 0x8, 2); | |
3653 | pectl = (pectl & ~0x7000) | (5 << 12); | |
3654 | pci_write_config(dev, reg + 0x8, pectl, 2); | |
3655 | } | |
3656 | ||
3657 | /* Enable DMA and Memory space access */ | |
3658 | pci_enable_busmaster(dev); | |
3659 | cmd = pci_read_config(dev, PCIR_COMMAND, 2); | |
3660 | cmd |= PCIM_CMD_MEMEN; | |
3661 | pci_write_config(dev, PCIR_COMMAND, cmd, 2); | |
3662 | } | |
3663 | ||
3664 | static uint32_t | |
3665 | mxge_read_reboot(mxge_softc_t *sc) | |
3666 | { | |
3667 | device_t dev = sc->dev; | |
3668 | uint32_t vs; | |
3669 | ||
3670 | /* find the vendor specific offset */ | |
3671 | if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { | |
3672 | device_printf(sc->dev, | |
3673 | "could not find vendor specific offset\n"); | |
3674 | return (uint32_t)-1; | |
3675 | } | |
3676 | /* enable read32 mode */ | |
3677 | pci_write_config(dev, vs + 0x10, 0x3, 1); | |
3678 | /* tell NIC which register to read */ | |
3679 | pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); | |
3680 | return (pci_read_config(dev, vs + 0x14, 4)); | |
3681 | } | |
3682 | ||
3683 | static int | |
3684 | mxge_watchdog_reset(mxge_softc_t *sc, int slice) | |
3685 | { | |
3686 | struct pci_devinfo *dinfo; | |
3687 | mxge_tx_ring_t *tx; | |
3688 | int err; | |
3689 | uint32_t reboot; | |
3690 | uint16_t cmd; | |
3691 | ||
3692 | err = ENXIO; | |
3693 | ||
3694 | device_printf(sc->dev, "Watchdog reset!\n"); | |
3695 | ||
3696 | /* | |
3697 | * check to see if the NIC rebooted. If it did, then all of | |
3698 | * PCI config space has been reset, and things like the | |
3699 | * busmaster bit will be zero. If this is the case, then we | |
3700 | * must restore PCI config space before the NIC can be used | |
3701 | * again | |
3702 | */ | |
3703 | cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); | |
3704 | if (cmd == 0xffff) { | |
3705 | /* | |
3706 | * maybe the watchdog caught the NIC rebooting; wait | |
3707 | * up to 100ms for it to finish. If it does not come | |
3708 | * back, then give up | |
3709 | */ | |
3710 | DELAY(1000*100); | |
3711 | cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); | |
3712 | if (cmd == 0xffff) { | |
3713 | device_printf(sc->dev, "NIC disappeared!\n"); | |
3714 | return (err); | |
3715 | } | |
3716 | } | |
3717 | if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { | |
3718 | /* print the reboot status */ | |
3719 | reboot = mxge_read_reboot(sc); | |
3720 | device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", | |
3721 | reboot); | |
3722 | /* restore PCI configuration space */ | |
3723 | dinfo = device_get_ivars(sc->dev); | |
3724 | pci_cfg_restore(sc->dev, dinfo); | |
3725 | ||
3726 | /* and redo any changes we made to our config space */ | |
3727 | mxge_setup_cfg_space(sc); | |
3728 | ||
3729 | if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3730 | mxge_close(sc); | |
3731 | err = mxge_open(sc); | |
3732 | } | |
3733 | } else { | |
3734 | tx = &sc->ss[slice].tx; | |
3735 | device_printf(sc->dev, | |
3736 | "NIC did not reboot, slice %d ring state:\n", | |
3737 | slice); | |
3738 | device_printf(sc->dev, | |
3739 | "tx.req=%d tx.done=%d, tx.queue_active=%d\n", | |
3740 | tx->req, tx->done, tx->queue_active); | |
3741 | device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", | |
3742 | tx->activate, tx->deactivate); | |
3743 | device_printf(sc->dev, "pkt_done=%d fw=%d\n", | |
3744 | tx->pkt_done, | |
3745 | be32toh(sc->ss->fw_stats->send_done_count)); | |
3746 | device_printf(sc->dev, "not resetting\n"); | |
3747 | } | |
3748 | return (err); | |
3749 | } | |
3750 | ||
3751 | static int | |
3752 | mxge_watchdog(mxge_softc_t *sc) | |
3753 | { | |
3754 | mxge_tx_ring_t *tx; | |
3755 | uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); | |
3756 | int i, err = 0; | |
3757 | ||
3758 | /* see if we have outstanding transmits, which | |
3759 | have been pending for more than mxge_ticks */ | |
3760 | for (i = 0; | |
3761 | #ifdef IFNET_BUF_RING | |
3762 | (i < sc->num_slices) && (err == 0); | |
3763 | #else | |
3764 | (i < 1) && (err == 0); | |
3765 | #endif | |
3766 | i++) { | |
3767 | tx = &sc->ss[i].tx; | |
3768 | if (tx->req != tx->done && | |
3769 | tx->watchdog_req != tx->watchdog_done && | |
3770 | tx->done == tx->watchdog_done) { | |
3771 | /* check for pause blocking before resetting */ | |
3772 | if (tx->watchdog_rx_pause == rx_pause) | |
3773 | err = mxge_watchdog_reset(sc, i); | |
3774 | else | |
3775 | device_printf(sc->dev, "Flow control blocking " | |
3776 | "xmits, check link partner\n"); | |
3777 | } | |
3778 | ||
3779 | tx->watchdog_req = tx->req; | |
3780 | tx->watchdog_done = tx->done; | |
3781 | tx->watchdog_rx_pause = rx_pause; | |
3782 | } | |
3783 | ||
3784 | if (sc->need_media_probe) | |
3785 | mxge_media_probe(sc); | |
3786 | return (err); | |
3787 | } | |
3788 | ||
3789 | static void | |
3790 | mxge_update_stats(mxge_softc_t *sc) | |
3791 | { | |
3792 | struct mxge_slice_state *ss; | |
3793 | u_long ipackets = 0; | |
3794 | u_long opackets = 0; | |
3795 | #ifdef IFNET_BUF_RING | |
3796 | u_long obytes = 0; | |
3797 | u_long omcasts = 0; | |
3798 | u_long odrops = 0; | |
3799 | #endif | |
3800 | u_long oerrors = 0; | |
3801 | int slice; | |
3802 | ||
3803 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3804 | ss = &sc->ss[slice]; | |
3805 | ipackets += ss->ipackets; | |
3806 | opackets += ss->opackets; | |
3807 | #ifdef IFNET_BUF_RING | |
3808 | obytes += ss->obytes; | |
3809 | omcasts += ss->omcasts; | |
3810 | odrops += ss->tx.br->br_drops; | |
3811 | #endif | |
3812 | oerrors += ss->oerrors; | |
3813 | } | |
3814 | sc->ifp->if_ipackets = ipackets; | |
3815 | sc->ifp->if_opackets = opackets; | |
3816 | #ifdef IFNET_BUF_RING | |
3817 | sc->ifp->if_obytes = obytes; | |
3818 | sc->ifp->if_omcasts = omcasts; | |
3819 | sc->ifp->if_snd.ifq_drops = odrops; | |
3820 | #endif | |
3821 | sc->ifp->if_oerrors = oerrors; | |
3822 | } | |
3823 | ||
3824 | static void | |
3825 | mxge_tick(void *arg) | |
3826 | { | |
3827 | mxge_softc_t *sc = arg; | |
3828 | int err = 0; | |
3829 | ||
3830 | /* aggregate stats from different slices */ | |
3831 | mxge_update_stats(sc); | |
3832 | if (!sc->watchdog_countdown) { | |
3833 | err = mxge_watchdog(sc); | |
3834 | sc->watchdog_countdown = 4; | |
3835 | } | |
3836 | sc->watchdog_countdown--; | |
3837 | if (err == 0) | |
3838 | callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); | |
3839 | ||
3840 | } | |
3841 | ||
3842 | static int | |
3843 | mxge_media_change(struct ifnet *ifp) | |
3844 | { | |
3845 | return EINVAL; | |
3846 | } | |
3847 | ||
3848 | static int | |
3849 | mxge_change_mtu(mxge_softc_t *sc, int mtu) | |
3850 | { | |
3851 | struct ifnet *ifp = sc->ifp; | |
3852 | int real_mtu, old_mtu; | |
3853 | int err = 0; | |
3854 | ||
3855 | ||
3856 | real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; | |
3857 | if ((real_mtu > sc->max_mtu) || real_mtu < 60) | |
3858 | return EINVAL; | |
3859 | mtx_lock(&sc->driver_mtx); | |
3860 | old_mtu = ifp->if_mtu; | |
3861 | ifp->if_mtu = mtu; | |
3862 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3863 | mxge_close(sc); | |
3864 | err = mxge_open(sc); | |
3865 | if (err != 0) { | |
3866 | ifp->if_mtu = old_mtu; | |
3867 | mxge_close(sc); | |
3868 | (void) mxge_open(sc); | |
3869 | } | |
3870 | } | |
3871 | mtx_unlock(&sc->driver_mtx); | |
3872 | return err; | |
3873 | } | |
3874 | ||
3875 | static void | |
3876 | mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) | |
3877 | { | |
3878 | mxge_softc_t *sc = ifp->if_softc; | |
3879 | ||
3880 | ||
3881 | if (sc == NULL) | |
3882 | return; | |
3883 | ifmr->ifm_status = IFM_AVALID; | |
3884 | ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; | |
3885 | ifmr->ifm_active = IFM_AUTO | IFM_ETHER; | |
3886 | ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; | |
3887 | } | |
3888 | ||
3889 | static int | |
3890 | mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) | |
3891 | { | |
3892 | mxge_softc_t *sc = ifp->if_softc; | |
3893 | struct ifreq *ifr = (struct ifreq *)data; | |
3894 | int err, mask; | |
3895 | ||
3896 | err = 0; | |
3897 | switch (command) { | |
3898 | case SIOCSIFADDR: | |
3899 | case SIOCGIFADDR: | |
3900 | err = ether_ioctl(ifp, command, data); | |
3901 | break; | |
3902 | ||
3903 | case SIOCSIFMTU: | |
3904 | err = mxge_change_mtu(sc, ifr->ifr_mtu); | |
3905 | break; | |
3906 | ||
3907 | case SIOCSIFFLAGS: | |
3908 | mtx_lock(&sc->driver_mtx); | |
3909 | if (sc->dying) { | |
3910 | mtx_unlock(&sc->driver_mtx); | |
3911 | return EINVAL; | |
3912 | } | |
3913 | if (ifp->if_flags & IFF_UP) { | |
3914 | if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { | |
3915 | err = mxge_open(sc); | |
3916 | } else { | |
3917 | /* take care of promis can allmulti | |
3918 | flag chages */ | |
3919 | mxge_change_promisc(sc, | |
3920 | ifp->if_flags & IFF_PROMISC); | |
3921 | mxge_set_multicast_list(sc); | |
3922 | } | |
3923 | } else { | |
3924 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3925 | mxge_close(sc); | |
3926 | } | |
3927 | } | |
3928 | mtx_unlock(&sc->driver_mtx); | |
3929 | break; | |
3930 | ||
3931 | case SIOCADDMULTI: | |
3932 | case SIOCDELMULTI: | |
3933 | mtx_lock(&sc->driver_mtx); | |
3934 | mxge_set_multicast_list(sc); | |
3935 | mtx_unlock(&sc->driver_mtx); | |
3936 | break; | |
3937 | ||
3938 | case SIOCSIFCAP: | |
3939 | mtx_lock(&sc->driver_mtx); | |
3940 | mask = ifr->ifr_reqcap ^ ifp->if_capenable; | |
3941 | if (mask & IFCAP_TXCSUM) { | |
3942 | if (IFCAP_TXCSUM & ifp->if_capenable) { | |
3943 | ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); | |
3944 | ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | |
3945 | | CSUM_TSO); | |
3946 | } else { | |
3947 | ifp->if_capenable |= IFCAP_TXCSUM; | |
3948 | ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); | |
3949 | } | |
3950 | } else if (mask & IFCAP_RXCSUM) { | |
3951 | if (IFCAP_RXCSUM & ifp->if_capenable) { | |
3952 | ifp->if_capenable &= ~IFCAP_RXCSUM; | |
3953 | sc->csum_flag = 0; | |
3954 | } else { | |
3955 | ifp->if_capenable |= IFCAP_RXCSUM; | |
3956 | sc->csum_flag = 1; | |
3957 | } | |
3958 | } | |
3959 | if (mask & IFCAP_TSO4) { | |
3960 | if (IFCAP_TSO4 & ifp->if_capenable) { | |
3961 | ifp->if_capenable &= ~IFCAP_TSO4; | |
3962 | ifp->if_hwassist &= ~CSUM_TSO; | |
3963 | } else if (IFCAP_TXCSUM & ifp->if_capenable) { | |
3964 | ifp->if_capenable |= IFCAP_TSO4; | |
3965 | ifp->if_hwassist |= CSUM_TSO; | |
3966 | } else { | |
3967 | printf("mxge requires tx checksum offload" | |
3968 | " be enabled to use TSO\n"); | |
3969 | err = EINVAL; | |
3970 | } | |
3971 | } | |
3972 | if (mask & IFCAP_LRO) { | |
3973 | if (IFCAP_LRO & ifp->if_capenable) | |
3974 | err = mxge_change_lro_locked(sc, 0); | |
3975 | else | |
3976 | err = mxge_change_lro_locked(sc, mxge_lro_cnt); | |
3977 | } | |
3978 | if (mask & IFCAP_VLAN_HWTAGGING) | |
3979 | ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; | |
3980 | mtx_unlock(&sc->driver_mtx); | |
3981 | VLAN_CAPABILITIES(ifp); | |
3982 | ||
3983 | break; | |
3984 | ||
3985 | case SIOCGIFMEDIA: | |
3986 | err = ifmedia_ioctl(ifp, (struct ifreq *)data, | |
3987 | &sc->media, command); | |
3988 | break; | |
3989 | ||
3990 | default: | |
3991 | err = ENOTTY; | |
3992 | } | |
3993 | return err; | |
3994 | } | |
3995 | ||
3996 | static void | |
3997 | mxge_fetch_tunables(mxge_softc_t *sc) | |
3998 | { | |
3999 | ||
4000 | TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); | |
4001 | TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", | |
4002 | &mxge_flow_control); | |
4003 | TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", | |
4004 | &mxge_intr_coal_delay); | |
4005 | TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", | |
4006 | &mxge_nvidia_ecrc_enable); | |
4007 | TUNABLE_INT_FETCH("hw.mxge.force_firmware", | |
4008 | &mxge_force_firmware); | |
4009 | TUNABLE_INT_FETCH("hw.mxge.deassert_wait", | |
4010 | &mxge_deassert_wait); | |
4011 | TUNABLE_INT_FETCH("hw.mxge.verbose", | |
4012 | &mxge_verbose); | |
4013 | TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); | |
4014 | TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); | |
4015 | TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); | |
4016 | TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); | |
4017 | TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); | |
4018 | if (sc->lro_cnt != 0) | |
4019 | mxge_lro_cnt = sc->lro_cnt; | |
4020 | ||
4021 | if (bootverbose) | |
4022 | mxge_verbose = 1; | |
4023 | if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) | |
4024 | mxge_intr_coal_delay = 30; | |
4025 | if (mxge_ticks == 0) | |
4026 | mxge_ticks = hz / 2; | |
4027 | sc->pause = mxge_flow_control; | |
4028 | if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 | |
4029 | || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { | |
4030 | mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; | |
4031 | } | |
4032 | if (mxge_initial_mtu > ETHERMTU_JUMBO || | |
4033 | mxge_initial_mtu < ETHER_MIN_LEN) | |
4034 | mxge_initial_mtu = ETHERMTU_JUMBO; | |
4035 | } | |
4036 | ||
4037 | ||
4038 | static void | |
4039 | mxge_free_slices(mxge_softc_t *sc) | |
4040 | { | |
4041 | struct mxge_slice_state *ss; | |
4042 | int i; | |
4043 | ||
4044 | ||
4045 | if (sc->ss == NULL) | |
4046 | return; | |
4047 | ||
4048 | for (i = 0; i < sc->num_slices; i++) { | |
4049 | ss = &sc->ss[i]; | |
4050 | if (ss->fw_stats != NULL) { | |
4051 | mxge_dma_free(&ss->fw_stats_dma); | |
4052 | ss->fw_stats = NULL; | |
4053 | #ifdef IFNET_BUF_RING | |
4054 | if (ss->tx.br != NULL) { | |
4055 | drbr_free(ss->tx.br, M_DEVBUF); | |
4056 | ss->tx.br = NULL; | |
4057 | } | |
4058 | #endif | |
4059 | mtx_destroy(&ss->tx.mtx); | |
4060 | } | |
4061 | if (ss->rx_done.entry != NULL) { | |
4062 | mxge_dma_free(&ss->rx_done.dma); | |
4063 | ss->rx_done.entry = NULL; | |
4064 | } | |
4065 | } | |
4066 | free(sc->ss, M_DEVBUF); | |
4067 | sc->ss = NULL; | |
4068 | } | |
4069 | ||
4070 | static int | |
4071 | mxge_alloc_slices(mxge_softc_t *sc) | |
4072 | { | |
4073 | mxge_cmd_t cmd; | |
4074 | struct mxge_slice_state *ss; | |
4075 | size_t bytes; | |
4076 | int err, i, max_intr_slots; | |
4077 | ||
4078 | err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); | |
4079 | if (err != 0) { | |
4080 | device_printf(sc->dev, "Cannot determine rx ring size\n"); | |
4081 | return err; | |
4082 | } | |
4083 | sc->rx_ring_size = cmd.data0; | |
4084 | max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); | |
4085 | ||
4086 | bytes = sizeof (*sc->ss) * sc->num_slices; | |
4087 | sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); | |
4088 | if (sc->ss == NULL) | |
4089 | return (ENOMEM); | |
4090 | for (i = 0; i < sc->num_slices; i++) { | |
4091 | ss = &sc->ss[i]; | |
4092 | ||
4093 | ss->sc = sc; | |
4094 | ||
4095 | /* allocate per-slice rx interrupt queues */ | |
4096 | ||
4097 | bytes = max_intr_slots * sizeof (*ss->rx_done.entry); | |
4098 | err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); | |
4099 | if (err != 0) | |
4100 | goto abort; | |
4101 | ss->rx_done.entry = ss->rx_done.dma.addr; | |
4102 | bzero(ss->rx_done.entry, bytes); | |
4103 | ||
4104 | /* | |
4105 | * allocate the per-slice firmware stats; stats | |
4106 | * (including tx) are used used only on the first | |
4107 | * slice for now | |
4108 | */ | |
4109 | #ifndef IFNET_BUF_RING | |
4110 | if (i > 0) | |
4111 | continue; | |
4112 | #endif | |
4113 | ||
4114 | bytes = sizeof (*ss->fw_stats); | |
4115 | err = mxge_dma_alloc(sc, &ss->fw_stats_dma, | |
4116 | sizeof (*ss->fw_stats), 64); | |
4117 | if (err != 0) | |
4118 | goto abort; | |
4119 | ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; | |
4120 | snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), | |
4121 | "%s:tx(%d)", device_get_nameunit(sc->dev), i); | |
4122 | mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); | |
4123 | #ifdef IFNET_BUF_RING | |
4124 | ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, | |
4125 | &ss->tx.mtx); | |
4126 | #endif | |
4127 | } | |
4128 | ||
4129 | return (0); | |
4130 | ||
4131 | abort: | |
4132 | mxge_free_slices(sc); | |
4133 | return (ENOMEM); | |
4134 | } | |
4135 | ||
4136 | static void | |
4137 | mxge_slice_probe(mxge_softc_t *sc) | |
4138 | { | |
4139 | mxge_cmd_t cmd; | |
4140 | char *old_fw; | |
4141 | int msix_cnt, status, max_intr_slots; | |
4142 | ||
4143 | sc->num_slices = 1; | |
4144 | /* | |
4145 | * don't enable multiple slices if they are not enabled, | |
4146 | * or if this is not an SMP system | |
4147 | */ | |
4148 | ||
4149 | if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) | |
4150 | return; | |
4151 | ||
4152 | /* see how many MSI-X interrupts are available */ | |
4153 | msix_cnt = pci_msix_count(sc->dev); | |
4154 | if (msix_cnt < 2) | |
4155 | return; | |
4156 | ||
4157 | /* now load the slice aware firmware see what it supports */ | |
4158 | old_fw = sc->fw_name; | |
4159 | if (old_fw == mxge_fw_aligned) | |
4160 | sc->fw_name = mxge_fw_rss_aligned; | |
4161 | else | |
4162 | sc->fw_name = mxge_fw_rss_unaligned; | |
4163 | status = mxge_load_firmware(sc, 0); | |
4164 | if (status != 0) { | |
4165 | device_printf(sc->dev, "Falling back to a single slice\n"); | |
4166 | return; | |
4167 | } | |
4168 | ||
4169 | /* try to send a reset command to the card to see if it | |
4170 | is alive */ | |
4171 | memset(&cmd, 0, sizeof (cmd)); | |
4172 | status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); | |
4173 | if (status != 0) { | |
4174 | device_printf(sc->dev, "failed reset\n"); | |
4175 | goto abort_with_fw; | |
4176 | } | |