Commit | Line | Data |
---|---|---|
8892ea20 AE |
1 | /****************************************************************************** |
2 | ||
3 | Copyright (c) 2006-2009, Myricom Inc. | |
4 | All rights reserved. | |
5 | ||
6 | Redistribution and use in source and binary forms, with or without | |
7 | modification, are permitted provided that the following conditions are met: | |
8 | ||
9 | 1. Redistributions of source code must retain the above copyright notice, | |
10 | this list of conditions and the following disclaimer. | |
11 | ||
12 | 2. Neither the name of the Myricom Inc, nor the names of its | |
13 | contributors may be used to endorse or promote products derived from | |
14 | this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
26 | POSSIBILITY OF SUCH DAMAGE. | |
27 | ||
28 | ***************************************************************************/ | |
29 | ||
30 | #include <sys/cdefs.h> | |
b3535a6f | 31 | /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/ |
8892ea20 AE |
32 | |
33 | #include <sys/param.h> | |
34 | #include <sys/systm.h> | |
35 | #include <sys/linker.h> | |
36 | #include <sys/firmware.h> | |
37 | #include <sys/endian.h> | |
38 | #include <sys/sockio.h> | |
39 | #include <sys/mbuf.h> | |
40 | #include <sys/malloc.h> | |
8892ea20 AE |
41 | #include <sys/kernel.h> |
42 | #include <sys/lock.h> | |
43 | #include <sys/module.h> | |
44 | #include <sys/socket.h> | |
45 | #include <sys/sysctl.h> | |
8892ea20 AE |
46 | |
47 | /* count xmits ourselves, rather than via drbr */ | |
48 | #define NO_SLOW_STATS | |
49 | #include <net/if.h> | |
50 | #include <net/if_arp.h> | |
51 | #include <net/ethernet.h> | |
52 | #include <net/if_dl.h> | |
53 | #include <net/if_media.h> | |
54 | ||
55 | #include <net/bpf.h> | |
56 | ||
57 | #include <net/if_types.h> | |
b3535a6f | 58 | #include <net/vlan/if_vlan_var.h> |
8892ea20 AE |
59 | #include <net/zlib.h> |
60 | ||
61 | #include <netinet/in_systm.h> | |
62 | #include <netinet/in.h> | |
63 | #include <netinet/ip.h> | |
64 | #include <netinet/tcp.h> | |
65 | ||
8892ea20 AE |
66 | #include <machine/resource.h> |
67 | #include <sys/bus.h> | |
68 | #include <sys/rman.h> | |
8892ea20 | 69 | |
b3535a6f AE |
70 | #include <bus/pci/pcireg.h> |
71 | #include <bus/pci/pcivar.h> | |
72 | #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ | |
8892ea20 AE |
73 | |
74 | #include <vm/vm.h> /* for pmap_mapdev() */ | |
75 | #include <vm/pmap.h> | |
76 | ||
77 | #if defined(__i386) || defined(__amd64) | |
78 | #include <machine/specialreg.h> | |
79 | #endif | |
80 | ||
b3535a6f AE |
81 | #include <dev/netif/mxge/mxge_mcp.h> |
82 | #include <dev/netif/mxge/mcp_gen_header.h> | |
8892ea20 | 83 | /*#define MXGE_FAKE_IFP*/ |
b3535a6f | 84 | #include <dev/netif/mxge/if_mxge_var.h> |
8892ea20 AE |
85 | #ifdef IFNET_BUF_RING |
86 | #include <sys/buf_ring.h> | |
87 | #endif | |
88 | ||
89 | #include "opt_inet.h" | |
90 | ||
91 | /* tunable params */ | |
92 | static int mxge_nvidia_ecrc_enable = 1; | |
93 | static int mxge_force_firmware = 0; | |
94 | static int mxge_intr_coal_delay = 30; | |
95 | static int mxge_deassert_wait = 1; | |
96 | static int mxge_flow_control = 1; | |
97 | static int mxge_verbose = 0; | |
98 | static int mxge_lro_cnt = 8; | |
99 | static int mxge_ticks; | |
100 | static int mxge_max_slices = 1; | |
101 | static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; | |
102 | static int mxge_always_promisc = 0; | |
103 | static int mxge_initial_mtu = ETHERMTU_JUMBO; | |
104 | static char *mxge_fw_unaligned = "mxge_ethp_z8e"; | |
105 | static char *mxge_fw_aligned = "mxge_eth_z8e"; | |
106 | static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; | |
107 | static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; | |
108 | ||
109 | static int mxge_probe(device_t dev); | |
110 | static int mxge_attach(device_t dev); | |
111 | static int mxge_detach(device_t dev); | |
112 | static int mxge_shutdown(device_t dev); | |
113 | static void mxge_intr(void *arg); | |
114 | ||
115 | static device_method_t mxge_methods[] = | |
116 | { | |
117 | /* Device interface */ | |
118 | DEVMETHOD(device_probe, mxge_probe), | |
119 | DEVMETHOD(device_attach, mxge_attach), | |
120 | DEVMETHOD(device_detach, mxge_detach), | |
121 | DEVMETHOD(device_shutdown, mxge_shutdown), | |
122 | {0, 0} | |
123 | }; | |
124 | ||
125 | static driver_t mxge_driver = | |
126 | { | |
127 | "mxge", | |
128 | mxge_methods, | |
129 | sizeof(mxge_softc_t), | |
130 | }; | |
131 | ||
132 | static devclass_t mxge_devclass; | |
133 | ||
134 | /* Declare ourselves to be a child of the PCI bus.*/ | |
135 | DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); | |
136 | MODULE_DEPEND(mxge, firmware, 1, 1, 1); | |
137 | MODULE_DEPEND(mxge, zlib, 1, 1, 1); | |
138 | ||
139 | static int mxge_load_firmware(mxge_softc_t *sc, int adopt); | |
140 | static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); | |
141 | static int mxge_close(mxge_softc_t *sc); | |
142 | static int mxge_open(mxge_softc_t *sc); | |
143 | static void mxge_tick(void *arg); | |
144 | ||
145 | static int | |
146 | mxge_probe(device_t dev) | |
147 | { | |
148 | int rev; | |
149 | ||
150 | ||
151 | if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && | |
152 | ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || | |
153 | (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { | |
154 | rev = pci_get_revid(dev); | |
155 | switch (rev) { | |
156 | case MXGE_PCI_REV_Z8E: | |
157 | device_set_desc(dev, "Myri10G-PCIE-8A"); | |
158 | break; | |
159 | case MXGE_PCI_REV_Z8ES: | |
160 | device_set_desc(dev, "Myri10G-PCIE-8B"); | |
161 | break; | |
162 | default: | |
163 | device_set_desc(dev, "Myri10G-PCIE-8??"); | |
164 | device_printf(dev, "Unrecognized rev %d NIC\n", | |
165 | rev); | |
166 | break; | |
167 | } | |
168 | return 0; | |
169 | } | |
170 | return ENXIO; | |
171 | } | |
172 | ||
173 | static void | |
174 | mxge_enable_wc(mxge_softc_t *sc) | |
175 | { | |
176 | #if defined(__i386) || defined(__amd64) | |
177 | vm_offset_t len; | |
178 | int err; | |
179 | ||
180 | sc->wc = 1; | |
181 | len = rman_get_size(sc->mem_res); | |
182 | err = pmap_change_attr((vm_offset_t) sc->sram, | |
183 | len, PAT_WRITE_COMBINING); | |
184 | if (err != 0) { | |
185 | device_printf(sc->dev, "pmap_change_attr failed, %d\n", | |
186 | err); | |
187 | sc->wc = 0; | |
188 | } | |
189 | #endif | |
190 | } | |
191 | ||
192 | ||
193 | /* callback to get our DMA address */ | |
194 | static void | |
195 | mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, | |
196 | int error) | |
197 | { | |
198 | if (error == 0) { | |
199 | *(bus_addr_t *) arg = segs->ds_addr; | |
200 | } | |
201 | } | |
202 | ||
203 | static int | |
204 | mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, | |
205 | bus_size_t alignment) | |
206 | { | |
207 | int err; | |
208 | device_t dev = sc->dev; | |
209 | bus_size_t boundary, maxsegsize; | |
210 | ||
211 | if (bytes > 4096 && alignment == 4096) { | |
212 | boundary = 0; | |
213 | maxsegsize = bytes; | |
214 | } else { | |
215 | boundary = 4096; | |
216 | maxsegsize = 4096; | |
217 | } | |
218 | ||
219 | /* allocate DMAable memory tags */ | |
220 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
221 | alignment, /* alignment */ | |
222 | boundary, /* boundary */ | |
223 | BUS_SPACE_MAXADDR, /* low */ | |
224 | BUS_SPACE_MAXADDR, /* high */ | |
225 | NULL, NULL, /* filter */ | |
226 | bytes, /* maxsize */ | |
227 | 1, /* num segs */ | |
228 | maxsegsize, /* maxsegsize */ | |
229 | BUS_DMA_COHERENT, /* flags */ | |
230 | NULL, NULL, /* lock */ | |
231 | &dma->dmat); /* tag */ | |
232 | if (err != 0) { | |
233 | device_printf(dev, "couldn't alloc tag (err = %d)\n", err); | |
234 | return err; | |
235 | } | |
236 | ||
237 | /* allocate DMAable memory & map */ | |
238 | err = bus_dmamem_alloc(dma->dmat, &dma->addr, | |
239 | (BUS_DMA_WAITOK | BUS_DMA_COHERENT | |
240 | | BUS_DMA_ZERO), &dma->map); | |
241 | if (err != 0) { | |
242 | device_printf(dev, "couldn't alloc mem (err = %d)\n", err); | |
243 | goto abort_with_dmat; | |
244 | } | |
245 | ||
246 | /* load the memory */ | |
247 | err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, | |
248 | mxge_dmamap_callback, | |
249 | (void *)&dma->bus_addr, 0); | |
250 | if (err != 0) { | |
251 | device_printf(dev, "couldn't load map (err = %d)\n", err); | |
252 | goto abort_with_mem; | |
253 | } | |
254 | return 0; | |
255 | ||
256 | abort_with_mem: | |
257 | bus_dmamem_free(dma->dmat, dma->addr, dma->map); | |
258 | abort_with_dmat: | |
259 | (void)bus_dma_tag_destroy(dma->dmat); | |
260 | return err; | |
261 | } | |
262 | ||
263 | ||
264 | static void | |
265 | mxge_dma_free(mxge_dma_t *dma) | |
266 | { | |
267 | bus_dmamap_unload(dma->dmat, dma->map); | |
268 | bus_dmamem_free(dma->dmat, dma->addr, dma->map); | |
269 | (void)bus_dma_tag_destroy(dma->dmat); | |
270 | } | |
271 | ||
272 | /* | |
273 | * The eeprom strings on the lanaiX have the format | |
274 | * SN=x\0 | |
275 | * MAC=x:x:x:x:x:x\0 | |
276 | * PC=text\0 | |
277 | */ | |
278 | ||
279 | static int | |
280 | mxge_parse_strings(mxge_softc_t *sc) | |
281 | { | |
282 | #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) | |
283 | ||
284 | char *ptr, *limit; | |
285 | int i, found_mac; | |
286 | ||
287 | ptr = sc->eeprom_strings; | |
288 | limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; | |
289 | found_mac = 0; | |
290 | while (ptr < limit && *ptr != '\0') { | |
291 | if (memcmp(ptr, "MAC=", 4) == 0) { | |
292 | ptr += 1; | |
293 | sc->mac_addr_string = ptr; | |
294 | for (i = 0; i < 6; i++) { | |
295 | ptr += 3; | |
296 | if ((ptr + 2) > limit) | |
297 | goto abort; | |
298 | sc->mac_addr[i] = strtoul(ptr, NULL, 16); | |
299 | found_mac = 1; | |
300 | } | |
301 | } else if (memcmp(ptr, "PC=", 3) == 0) { | |
302 | ptr += 3; | |
303 | strncpy(sc->product_code_string, ptr, | |
304 | sizeof (sc->product_code_string) - 1); | |
305 | } else if (memcmp(ptr, "SN=", 3) == 0) { | |
306 | ptr += 3; | |
307 | strncpy(sc->serial_number_string, ptr, | |
308 | sizeof (sc->serial_number_string) - 1); | |
309 | } | |
310 | MXGE_NEXT_STRING(ptr); | |
311 | } | |
312 | ||
313 | if (found_mac) | |
314 | return 0; | |
315 | ||
316 | abort: | |
317 | device_printf(sc->dev, "failed to parse eeprom_strings\n"); | |
318 | ||
319 | return ENXIO; | |
320 | } | |
321 | ||
322 | #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ | |
323 | static void | |
324 | mxge_enable_nvidia_ecrc(mxge_softc_t *sc) | |
325 | { | |
326 | uint32_t val; | |
327 | unsigned long base, off; | |
328 | char *va, *cfgptr; | |
329 | device_t pdev, mcp55; | |
330 | uint16_t vendor_id, device_id, word; | |
331 | uintptr_t bus, slot, func, ivend, idev; | |
332 | uint32_t *ptr32; | |
333 | ||
334 | ||
335 | if (!mxge_nvidia_ecrc_enable) | |
336 | return; | |
337 | ||
338 | pdev = device_get_parent(device_get_parent(sc->dev)); | |
339 | if (pdev == NULL) { | |
340 | device_printf(sc->dev, "could not find parent?\n"); | |
341 | return; | |
342 | } | |
343 | vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); | |
344 | device_id = pci_read_config(pdev, PCIR_DEVICE, 2); | |
345 | ||
346 | if (vendor_id != 0x10de) | |
347 | return; | |
348 | ||
349 | base = 0; | |
350 | ||
351 | if (device_id == 0x005d) { | |
352 | /* ck804, base address is magic */ | |
353 | base = 0xe0000000UL; | |
354 | } else if (device_id >= 0x0374 && device_id <= 0x378) { | |
355 | /* mcp55, base address stored in chipset */ | |
356 | mcp55 = pci_find_bsf(0, 0, 0); | |
357 | if (mcp55 && | |
358 | 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && | |
359 | 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { | |
360 | word = pci_read_config(mcp55, 0x90, 2); | |
361 | base = ((unsigned long)word & 0x7ffeU) << 25; | |
362 | } | |
363 | } | |
364 | if (!base) | |
365 | return; | |
366 | ||
367 | /* XXXX | |
368 | Test below is commented because it is believed that doing | |
369 | config read/write beyond 0xff will access the config space | |
370 | for the next larger function. Uncomment this and remove | |
371 | the hacky pmap_mapdev() way of accessing config space when | |
372 | FreeBSD grows support for extended pcie config space access | |
373 | */ | |
374 | #if 0 | |
375 | /* See if we can, by some miracle, access the extended | |
376 | config space */ | |
377 | val = pci_read_config(pdev, 0x178, 4); | |
378 | if (val != 0xffffffff) { | |
379 | val |= 0x40; | |
380 | pci_write_config(pdev, 0x178, val, 4); | |
381 | return; | |
382 | } | |
383 | #endif | |
384 | /* Rather than using normal pci config space writes, we must | |
385 | * map the Nvidia config space ourselves. This is because on | |
386 | * opteron/nvidia class machine the 0xe000000 mapping is | |
387 | * handled by the nvidia chipset, that means the internal PCI | |
388 | * device (the on-chip northbridge), or the amd-8131 bridge | |
389 | * and things behind them are not visible by this method. | |
390 | */ | |
391 | ||
392 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
393 | PCI_IVAR_BUS, &bus); | |
394 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
395 | PCI_IVAR_SLOT, &slot); | |
396 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
397 | PCI_IVAR_FUNCTION, &func); | |
398 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
399 | PCI_IVAR_VENDOR, &ivend); | |
400 | BUS_READ_IVAR(device_get_parent(pdev), pdev, | |
401 | PCI_IVAR_DEVICE, &idev); | |
402 | ||
403 | off = base | |
404 | + 0x00100000UL * (unsigned long)bus | |
405 | + 0x00001000UL * (unsigned long)(func | |
406 | + 8 * slot); | |
407 | ||
408 | /* map it into the kernel */ | |
409 | va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); | |
410 | ||
411 | ||
412 | if (va == NULL) { | |
413 | device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); | |
414 | return; | |
415 | } | |
416 | /* get a pointer to the config space mapped into the kernel */ | |
417 | cfgptr = va + (off & PAGE_MASK); | |
418 | ||
419 | /* make sure that we can really access it */ | |
420 | vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); | |
421 | device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); | |
422 | if (! (vendor_id == ivend && device_id == idev)) { | |
423 | device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", | |
424 | vendor_id, device_id); | |
425 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
426 | return; | |
427 | } | |
428 | ||
429 | ptr32 = (uint32_t*)(cfgptr + 0x178); | |
430 | val = *ptr32; | |
431 | ||
432 | if (val == 0xffffffff) { | |
433 | device_printf(sc->dev, "extended mapping failed\n"); | |
434 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
435 | return; | |
436 | } | |
437 | *ptr32 = val | 0x40; | |
438 | pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); | |
439 | if (mxge_verbose) | |
440 | device_printf(sc->dev, | |
441 | "Enabled ECRC on upstream Nvidia bridge " | |
442 | "at %d:%d:%d\n", | |
443 | (int)bus, (int)slot, (int)func); | |
444 | return; | |
445 | } | |
446 | #else | |
447 | static void | |
448 | mxge_enable_nvidia_ecrc(mxge_softc_t *sc) | |
449 | { | |
450 | device_printf(sc->dev, | |
451 | "Nforce 4 chipset on non-x86/amd64!?!?!\n"); | |
452 | return; | |
453 | } | |
454 | #endif | |
455 | ||
456 | ||
457 | static int | |
458 | mxge_dma_test(mxge_softc_t *sc, int test_type) | |
459 | { | |
460 | mxge_cmd_t cmd; | |
461 | bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; | |
462 | int status; | |
463 | uint32_t len; | |
464 | char *test = " "; | |
465 | ||
466 | ||
467 | /* Run a small DMA test. | |
468 | * The magic multipliers to the length tell the firmware | |
469 | * to do DMA read, write, or read+write tests. The | |
470 | * results are returned in cmd.data0. The upper 16 | |
471 | * bits of the return is the number of transfers completed. | |
472 | * The lower 16 bits is the time in 0.5us ticks that the | |
473 | * transfers took to complete. | |
474 | */ | |
475 | ||
476 | len = sc->tx_boundary; | |
477 | ||
478 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
479 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
480 | cmd.data2 = len * 0x10000; | |
481 | status = mxge_send_cmd(sc, test_type, &cmd); | |
482 | if (status != 0) { | |
483 | test = "read"; | |
484 | goto abort; | |
485 | } | |
486 | sc->read_dma = ((cmd.data0>>16) * len * 2) / | |
487 | (cmd.data0 & 0xffff); | |
488 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
489 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
490 | cmd.data2 = len * 0x1; | |
491 | status = mxge_send_cmd(sc, test_type, &cmd); | |
492 | if (status != 0) { | |
493 | test = "write"; | |
494 | goto abort; | |
495 | } | |
496 | sc->write_dma = ((cmd.data0>>16) * len * 2) / | |
497 | (cmd.data0 & 0xffff); | |
498 | ||
499 | cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); | |
500 | cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); | |
501 | cmd.data2 = len * 0x10001; | |
502 | status = mxge_send_cmd(sc, test_type, &cmd); | |
503 | if (status != 0) { | |
504 | test = "read/write"; | |
505 | goto abort; | |
506 | } | |
507 | sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / | |
508 | (cmd.data0 & 0xffff); | |
509 | ||
510 | abort: | |
511 | if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) | |
512 | device_printf(sc->dev, "DMA %s benchmark failed: %d\n", | |
513 | test, status); | |
514 | ||
515 | return status; | |
516 | } | |
517 | ||
518 | /* | |
519 | * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput | |
520 | * when the PCI-E Completion packets are aligned on an 8-byte | |
521 | * boundary. Some PCI-E chip sets always align Completion packets; on | |
522 | * the ones that do not, the alignment can be enforced by enabling | |
523 | * ECRC generation (if supported). | |
524 | * | |
525 | * When PCI-E Completion packets are not aligned, it is actually more | |
526 | * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. | |
527 | * | |
528 | * If the driver can neither enable ECRC nor verify that it has | |
529 | * already been enabled, then it must use a firmware image which works | |
530 | * around unaligned completion packets (ethp_z8e.dat), and it should | |
531 | * also ensure that it never gives the device a Read-DMA which is | |
532 | * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is | |
533 | * enabled, then the driver should use the aligned (eth_z8e.dat) | |
534 | * firmware image, and set tx_boundary to 4KB. | |
535 | */ | |
536 | ||
537 | static int | |
538 | mxge_firmware_probe(mxge_softc_t *sc) | |
539 | { | |
540 | device_t dev = sc->dev; | |
541 | int reg, status; | |
542 | uint16_t pectl; | |
543 | ||
544 | sc->tx_boundary = 4096; | |
545 | /* | |
546 | * Verify the max read request size was set to 4KB | |
547 | * before trying the test with 4KB. | |
548 | */ | |
549 | if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { | |
550 | pectl = pci_read_config(dev, reg + 0x8, 2); | |
551 | if ((pectl & (5 << 12)) != (5 << 12)) { | |
552 | device_printf(dev, "Max Read Req. size != 4k (0x%x\n", | |
553 | pectl); | |
554 | sc->tx_boundary = 2048; | |
555 | } | |
556 | } | |
557 | ||
558 | /* | |
559 | * load the optimized firmware (which assumes aligned PCIe | |
560 | * completions) in order to see if it works on this host. | |
561 | */ | |
562 | sc->fw_name = mxge_fw_aligned; | |
563 | status = mxge_load_firmware(sc, 1); | |
564 | if (status != 0) { | |
565 | return status; | |
566 | } | |
567 | ||
568 | /* | |
569 | * Enable ECRC if possible | |
570 | */ | |
571 | mxge_enable_nvidia_ecrc(sc); | |
572 | ||
573 | /* | |
574 | * Run a DMA test which watches for unaligned completions and | |
575 | * aborts on the first one seen. | |
576 | */ | |
577 | ||
578 | status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); | |
579 | if (status == 0) | |
580 | return 0; /* keep the aligned firmware */ | |
581 | ||
582 | if (status != E2BIG) | |
583 | device_printf(dev, "DMA test failed: %d\n", status); | |
584 | if (status == ENOSYS) | |
585 | device_printf(dev, "Falling back to ethp! " | |
586 | "Please install up to date fw\n"); | |
587 | return status; | |
588 | } | |
589 | ||
590 | static int | |
591 | mxge_select_firmware(mxge_softc_t *sc) | |
592 | { | |
593 | int aligned = 0; | |
594 | ||
595 | ||
596 | if (mxge_force_firmware != 0) { | |
597 | if (mxge_force_firmware == 1) | |
598 | aligned = 1; | |
599 | else | |
600 | aligned = 0; | |
601 | if (mxge_verbose) | |
602 | device_printf(sc->dev, | |
603 | "Assuming %s completions (forced)\n", | |
604 | aligned ? "aligned" : "unaligned"); | |
605 | goto abort; | |
606 | } | |
607 | ||
608 | /* if the PCIe link width is 4 or less, we can use the aligned | |
609 | firmware and skip any checks */ | |
610 | if (sc->link_width != 0 && sc->link_width <= 4) { | |
611 | device_printf(sc->dev, | |
612 | "PCIe x%d Link, expect reduced performance\n", | |
613 | sc->link_width); | |
614 | aligned = 1; | |
615 | goto abort; | |
616 | } | |
617 | ||
618 | if (0 == mxge_firmware_probe(sc)) | |
619 | return 0; | |
620 | ||
621 | abort: | |
622 | if (aligned) { | |
623 | sc->fw_name = mxge_fw_aligned; | |
624 | sc->tx_boundary = 4096; | |
625 | } else { | |
626 | sc->fw_name = mxge_fw_unaligned; | |
627 | sc->tx_boundary = 2048; | |
628 | } | |
629 | return (mxge_load_firmware(sc, 0)); | |
630 | } | |
631 | ||
632 | union qualhack | |
633 | { | |
634 | const char *ro_char; | |
635 | char *rw_char; | |
636 | }; | |
637 | ||
638 | static int | |
639 | mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) | |
640 | { | |
641 | ||
642 | ||
643 | if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { | |
644 | device_printf(sc->dev, "Bad firmware type: 0x%x\n", | |
645 | be32toh(hdr->mcp_type)); | |
646 | return EIO; | |
647 | } | |
648 | ||
649 | /* save firmware version for sysctl */ | |
650 | strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); | |
651 | if (mxge_verbose) | |
652 | device_printf(sc->dev, "firmware id: %s\n", hdr->version); | |
653 | ||
b6670ba0 | 654 | ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, |
8892ea20 AE |
655 | &sc->fw_ver_minor, &sc->fw_ver_tiny); |
656 | ||
657 | if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR | |
658 | && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { | |
659 | device_printf(sc->dev, "Found firmware version %s\n", | |
660 | sc->fw_version); | |
661 | device_printf(sc->dev, "Driver needs %d.%d\n", | |
662 | MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); | |
663 | return EINVAL; | |
664 | } | |
665 | return 0; | |
666 | ||
667 | } | |
668 | ||
669 | static void * | |
670 | z_alloc(void *nil, u_int items, u_int size) | |
671 | { | |
672 | void *ptr; | |
673 | ||
d777b84f | 674 | ptr = kmalloc(items * size, M_TEMP, M_NOWAIT); |
8892ea20 AE |
675 | return ptr; |
676 | } | |
677 | ||
678 | static void | |
679 | z_free(void *nil, void *ptr) | |
680 | { | |
d777b84f | 681 | kfree(ptr, M_TEMP); |
8892ea20 AE |
682 | } |
683 | ||
684 | ||
685 | static int | |
686 | mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) | |
687 | { | |
688 | z_stream zs; | |
689 | char *inflate_buffer; | |
690 | const struct firmware *fw; | |
691 | const mcp_gen_header_t *hdr; | |
692 | unsigned hdr_offset; | |
693 | int status; | |
694 | unsigned int i; | |
695 | char dummy; | |
696 | size_t fw_len; | |
697 | ||
698 | fw = firmware_get(sc->fw_name); | |
699 | if (fw == NULL) { | |
700 | device_printf(sc->dev, "Could not find firmware image %s\n", | |
701 | sc->fw_name); | |
702 | return ENOENT; | |
703 | } | |
704 | ||
705 | ||
706 | ||
707 | /* setup zlib and decompress f/w */ | |
708 | bzero(&zs, sizeof (zs)); | |
709 | zs.zalloc = z_alloc; | |
710 | zs.zfree = z_free; | |
711 | status = inflateInit(&zs); | |
712 | if (status != Z_OK) { | |
713 | status = EIO; | |
714 | goto abort_with_fw; | |
715 | } | |
716 | ||
717 | /* the uncompressed size is stored as the firmware version, | |
718 | which would otherwise go unused */ | |
719 | fw_len = (size_t) fw->version; | |
d777b84f | 720 | inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT); |
8892ea20 AE |
721 | if (inflate_buffer == NULL) |
722 | goto abort_with_zs; | |
723 | zs.avail_in = fw->datasize; | |
724 | zs.next_in = __DECONST(char *, fw->data); | |
725 | zs.avail_out = fw_len; | |
726 | zs.next_out = inflate_buffer; | |
727 | status = inflate(&zs, Z_FINISH); | |
728 | if (status != Z_STREAM_END) { | |
729 | device_printf(sc->dev, "zlib %d\n", status); | |
730 | status = EIO; | |
731 | goto abort_with_buffer; | |
732 | } | |
733 | ||
734 | /* check id */ | |
735 | hdr_offset = htobe32(*(const uint32_t *) | |
736 | (inflate_buffer + MCP_HEADER_PTR_OFFSET)); | |
737 | if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { | |
738 | device_printf(sc->dev, "Bad firmware file"); | |
739 | status = EIO; | |
740 | goto abort_with_buffer; | |
741 | } | |
742 | hdr = (const void*)(inflate_buffer + hdr_offset); | |
743 | ||
744 | status = mxge_validate_firmware(sc, hdr); | |
745 | if (status != 0) | |
746 | goto abort_with_buffer; | |
747 | ||
748 | /* Copy the inflated firmware to NIC SRAM. */ | |
749 | for (i = 0; i < fw_len; i += 256) { | |
750 | mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, | |
751 | inflate_buffer + i, | |
752 | min(256U, (unsigned)(fw_len - i))); | |
753 | wmb(); | |
754 | dummy = *sc->sram; | |
755 | wmb(); | |
756 | } | |
757 | ||
758 | *limit = fw_len; | |
759 | status = 0; | |
760 | abort_with_buffer: | |
d777b84f | 761 | kfree(inflate_buffer, M_TEMP); |
8892ea20 AE |
762 | abort_with_zs: |
763 | inflateEnd(&zs); | |
764 | abort_with_fw: | |
765 | firmware_put(fw, FIRMWARE_UNLOAD); | |
766 | return status; | |
767 | } | |
768 | ||
769 | /* | |
770 | * Enable or disable periodic RDMAs from the host to make certain | |
771 | * chipsets resend dropped PCIe messages | |
772 | */ | |
773 | ||
774 | static void | |
775 | mxge_dummy_rdma(mxge_softc_t *sc, int enable) | |
776 | { | |
777 | char buf_bytes[72]; | |
778 | volatile uint32_t *confirm; | |
779 | volatile char *submit; | |
780 | uint32_t *buf, dma_low, dma_high; | |
781 | int i; | |
782 | ||
783 | buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
784 | ||
785 | /* clear confirmation addr */ | |
786 | confirm = (volatile uint32_t *)sc->cmd; | |
787 | *confirm = 0; | |
788 | wmb(); | |
789 | ||
790 | /* send an rdma command to the PCIe engine, and wait for the | |
791 | response in the confirmation address. The firmware should | |
792 | write a -1 there to indicate it is alive and well | |
793 | */ | |
794 | ||
795 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
796 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
797 | buf[0] = htobe32(dma_high); /* confirm addr MSW */ | |
798 | buf[1] = htobe32(dma_low); /* confirm addr LSW */ | |
799 | buf[2] = htobe32(0xffffffff); /* confirm data */ | |
800 | dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); | |
801 | dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); | |
802 | buf[3] = htobe32(dma_high); /* dummy addr MSW */ | |
803 | buf[4] = htobe32(dma_low); /* dummy addr LSW */ | |
804 | buf[5] = htobe32(enable); /* enable? */ | |
805 | ||
806 | ||
807 | submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); | |
808 | ||
809 | mxge_pio_copy(submit, buf, 64); | |
810 | wmb(); | |
811 | DELAY(1000); | |
812 | wmb(); | |
813 | i = 0; | |
814 | while (*confirm != 0xffffffff && i < 20) { | |
815 | DELAY(1000); | |
816 | i++; | |
817 | } | |
818 | if (*confirm != 0xffffffff) { | |
819 | device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", | |
820 | (enable ? "enable" : "disable"), confirm, | |
821 | *confirm); | |
822 | } | |
823 | return; | |
824 | } | |
825 | ||
826 | static int | |
827 | mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) | |
828 | { | |
829 | mcp_cmd_t *buf; | |
830 | char buf_bytes[sizeof(*buf) + 8]; | |
831 | volatile mcp_cmd_response_t *response = sc->cmd; | |
832 | volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; | |
833 | uint32_t dma_low, dma_high; | |
834 | int err, sleep_total = 0; | |
835 | ||
836 | /* ensure buf is aligned to 8 bytes */ | |
837 | buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
838 | ||
839 | buf->data0 = htobe32(data->data0); | |
840 | buf->data1 = htobe32(data->data1); | |
841 | buf->data2 = htobe32(data->data2); | |
842 | buf->cmd = htobe32(cmd); | |
843 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
844 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
845 | ||
846 | buf->response_addr.low = htobe32(dma_low); | |
847 | buf->response_addr.high = htobe32(dma_high); | |
e8a47a7f | 848 | lockmgr(&sc->cmd_lock, LK_EXCLUSIVE); |
8892ea20 AE |
849 | response->result = 0xffffffff; |
850 | wmb(); | |
851 | mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); | |
852 | ||
853 | /* wait up to 20ms */ | |
854 | err = EAGAIN; | |
855 | for (sleep_total = 0; sleep_total < 20; sleep_total++) { | |
856 | bus_dmamap_sync(sc->cmd_dma.dmat, | |
857 | sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); | |
858 | wmb(); | |
859 | switch (be32toh(response->result)) { | |
860 | case 0: | |
861 | data->data0 = be32toh(response->data); | |
862 | err = 0; | |
863 | break; | |
864 | case 0xffffffff: | |
865 | DELAY(1000); | |
866 | break; | |
867 | case MXGEFW_CMD_UNKNOWN: | |
868 | err = ENOSYS; | |
869 | break; | |
870 | case MXGEFW_CMD_ERROR_UNALIGNED: | |
871 | err = E2BIG; | |
872 | break; | |
873 | case MXGEFW_CMD_ERROR_BUSY: | |
874 | err = EBUSY; | |
875 | break; | |
876 | default: | |
877 | device_printf(sc->dev, | |
878 | "mxge: command %d " | |
879 | "failed, result = %d\n", | |
880 | cmd, be32toh(response->result)); | |
881 | err = ENXIO; | |
882 | break; | |
883 | } | |
884 | if (err != EAGAIN) | |
885 | break; | |
886 | } | |
887 | if (err == EAGAIN) | |
888 | device_printf(sc->dev, "mxge: command %d timed out" | |
889 | "result = %d\n", | |
890 | cmd, be32toh(response->result)); | |
e8a47a7f | 891 | lockmgr(&sc->cmd_lock, LK_RELEASE); |
8892ea20 AE |
892 | return err; |
893 | } | |
894 | ||
895 | static int | |
896 | mxge_adopt_running_firmware(mxge_softc_t *sc) | |
897 | { | |
898 | struct mcp_gen_header *hdr; | |
899 | const size_t bytes = sizeof (struct mcp_gen_header); | |
900 | size_t hdr_offset; | |
901 | int status; | |
902 | ||
903 | /* find running firmware header */ | |
904 | hdr_offset = htobe32(*(volatile uint32_t *) | |
905 | (sc->sram + MCP_HEADER_PTR_OFFSET)); | |
906 | ||
907 | if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { | |
908 | device_printf(sc->dev, | |
909 | "Running firmware has bad header offset (%d)\n", | |
910 | (int)hdr_offset); | |
911 | return EIO; | |
912 | } | |
913 | ||
914 | /* copy header of running firmware from SRAM to host memory to | |
915 | * validate firmware */ | |
d777b84f | 916 | hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT); |
8892ea20 | 917 | if (hdr == NULL) { |
d777b84f | 918 | device_printf(sc->dev, "could not kmalloc firmware hdr\n"); |
8892ea20 AE |
919 | return ENOMEM; |
920 | } | |
921 | bus_space_read_region_1(rman_get_bustag(sc->mem_res), | |
922 | rman_get_bushandle(sc->mem_res), | |
923 | hdr_offset, (char *)hdr, bytes); | |
924 | status = mxge_validate_firmware(sc, hdr); | |
d777b84f | 925 | kfree(hdr, M_DEVBUF); |
8892ea20 AE |
926 | |
927 | /* | |
928 | * check to see if adopted firmware has bug where adopting | |
929 | * it will cause broadcasts to be filtered unless the NIC | |
930 | * is kept in ALLMULTI mode | |
931 | */ | |
932 | if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && | |
933 | sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { | |
934 | sc->adopted_rx_filter_bug = 1; | |
935 | device_printf(sc->dev, "Adopting fw %d.%d.%d: " | |
936 | "working around rx filter bug\n", | |
937 | sc->fw_ver_major, sc->fw_ver_minor, | |
938 | sc->fw_ver_tiny); | |
939 | } | |
940 | ||
941 | return status; | |
942 | } | |
943 | ||
944 | ||
945 | static int | |
946 | mxge_load_firmware(mxge_softc_t *sc, int adopt) | |
947 | { | |
948 | volatile uint32_t *confirm; | |
949 | volatile char *submit; | |
950 | char buf_bytes[72]; | |
951 | uint32_t *buf, size, dma_low, dma_high; | |
952 | int status, i; | |
953 | ||
954 | buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); | |
955 | ||
956 | size = sc->sram_size; | |
957 | status = mxge_load_firmware_helper(sc, &size); | |
958 | if (status) { | |
959 | if (!adopt) | |
960 | return status; | |
961 | /* Try to use the currently running firmware, if | |
962 | it is new enough */ | |
963 | status = mxge_adopt_running_firmware(sc); | |
964 | if (status) { | |
965 | device_printf(sc->dev, | |
966 | "failed to adopt running firmware\n"); | |
967 | return status; | |
968 | } | |
969 | device_printf(sc->dev, | |
970 | "Successfully adopted running firmware\n"); | |
971 | if (sc->tx_boundary == 4096) { | |
972 | device_printf(sc->dev, | |
973 | "Using firmware currently running on NIC" | |
974 | ". For optimal\n"); | |
975 | device_printf(sc->dev, | |
976 | "performance consider loading optimized " | |
977 | "firmware\n"); | |
978 | } | |
979 | sc->fw_name = mxge_fw_unaligned; | |
980 | sc->tx_boundary = 2048; | |
981 | return 0; | |
982 | } | |
983 | /* clear confirmation addr */ | |
984 | confirm = (volatile uint32_t *)sc->cmd; | |
985 | *confirm = 0; | |
986 | wmb(); | |
987 | /* send a reload command to the bootstrap MCP, and wait for the | |
988 | response in the confirmation address. The firmware should | |
989 | write a -1 there to indicate it is alive and well | |
990 | */ | |
991 | ||
992 | dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); | |
993 | dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); | |
994 | ||
995 | buf[0] = htobe32(dma_high); /* confirm addr MSW */ | |
996 | buf[1] = htobe32(dma_low); /* confirm addr LSW */ | |
997 | buf[2] = htobe32(0xffffffff); /* confirm data */ | |
998 | ||
999 | /* FIX: All newest firmware should un-protect the bottom of | |
1000 | the sram before handoff. However, the very first interfaces | |
1001 | do not. Therefore the handoff copy must skip the first 8 bytes | |
1002 | */ | |
1003 | /* where the code starts*/ | |
1004 | buf[3] = htobe32(MXGE_FW_OFFSET + 8); | |
1005 | buf[4] = htobe32(size - 8); /* length of code */ | |
1006 | buf[5] = htobe32(8); /* where to copy to */ | |
1007 | buf[6] = htobe32(0); /* where to jump to */ | |
1008 | ||
1009 | submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); | |
1010 | mxge_pio_copy(submit, buf, 64); | |
1011 | wmb(); | |
1012 | DELAY(1000); | |
1013 | wmb(); | |
1014 | i = 0; | |
1015 | while (*confirm != 0xffffffff && i < 20) { | |
1016 | DELAY(1000*10); | |
1017 | i++; | |
1018 | bus_dmamap_sync(sc->cmd_dma.dmat, | |
1019 | sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); | |
1020 | } | |
1021 | if (*confirm != 0xffffffff) { | |
1022 | device_printf(sc->dev,"handoff failed (%p = 0x%x)", | |
1023 | confirm, *confirm); | |
1024 | ||
1025 | return ENXIO; | |
1026 | } | |
1027 | return 0; | |
1028 | } | |
1029 | ||
1030 | static int | |
1031 | mxge_update_mac_address(mxge_softc_t *sc) | |
1032 | { | |
1033 | mxge_cmd_t cmd; | |
1034 | uint8_t *addr = sc->mac_addr; | |
1035 | int status; | |
1036 | ||
1037 | ||
1038 | cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) | |
1039 | | (addr[2] << 8) | addr[3]); | |
1040 | ||
1041 | cmd.data1 = ((addr[4] << 8) | (addr[5])); | |
1042 | ||
1043 | status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); | |
1044 | return status; | |
1045 | } | |
1046 | ||
1047 | static int | |
1048 | mxge_change_pause(mxge_softc_t *sc, int pause) | |
1049 | { | |
1050 | mxge_cmd_t cmd; | |
1051 | int status; | |
1052 | ||
1053 | if (pause) | |
1054 | status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, | |
1055 | &cmd); | |
1056 | else | |
1057 | status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, | |
1058 | &cmd); | |
1059 | ||
1060 | if (status) { | |
1061 | device_printf(sc->dev, "Failed to set flow control mode\n"); | |
1062 | return ENXIO; | |
1063 | } | |
1064 | sc->pause = pause; | |
1065 | return 0; | |
1066 | } | |
1067 | ||
1068 | static void | |
1069 | mxge_change_promisc(mxge_softc_t *sc, int promisc) | |
1070 | { | |
1071 | mxge_cmd_t cmd; | |
1072 | int status; | |
1073 | ||
1074 | if (mxge_always_promisc) | |
1075 | promisc = 1; | |
1076 | ||
1077 | if (promisc) | |
1078 | status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, | |
1079 | &cmd); | |
1080 | else | |
1081 | status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, | |
1082 | &cmd); | |
1083 | ||
1084 | if (status) { | |
1085 | device_printf(sc->dev, "Failed to set promisc mode\n"); | |
1086 | } | |
1087 | } | |
1088 | ||
1089 | static void | |
1090 | mxge_set_multicast_list(mxge_softc_t *sc) | |
1091 | { | |
1092 | mxge_cmd_t cmd; | |
1093 | struct ifmultiaddr *ifma; | |
1094 | struct ifnet *ifp = sc->ifp; | |
1095 | int err; | |
1096 | ||
1097 | /* This firmware is known to not support multicast */ | |
1098 | if (!sc->fw_multicast_support) | |
1099 | return; | |
1100 | ||
1101 | /* Disable multicast filtering while we play with the lists*/ | |
1102 | err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); | |
1103 | if (err != 0) { | |
1104 | device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," | |
1105 | " error status: %d\n", err); | |
1106 | return; | |
1107 | } | |
1108 | ||
1109 | if (sc->adopted_rx_filter_bug) | |
1110 | return; | |
1111 | ||
1112 | if (ifp->if_flags & IFF_ALLMULTI) | |
1113 | /* request to disable multicast filtering, so quit here */ | |
1114 | return; | |
1115 | ||
1116 | /* Flush all the filters */ | |
1117 | ||
1118 | err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); | |
1119 | if (err != 0) { | |
1120 | device_printf(sc->dev, | |
1121 | "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" | |
1122 | ", error status: %d\n", err); | |
1123 | return; | |
1124 | } | |
1125 | ||
1126 | /* Walk the multicast list, and add each address */ | |
1127 | ||
1128 | if_maddr_rlock(ifp); | |
1129 | TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { | |
1130 | if (ifma->ifma_addr->sa_family != AF_LINK) | |
1131 | continue; | |
1132 | bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), | |
1133 | &cmd.data0, 4); | |
1134 | bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, | |
1135 | &cmd.data1, 2); | |
1136 | cmd.data0 = htonl(cmd.data0); | |
1137 | cmd.data1 = htonl(cmd.data1); | |
1138 | err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); | |
1139 | if (err != 0) { | |
1140 | device_printf(sc->dev, "Failed " | |
1141 | "MXGEFW_JOIN_MULTICAST_GROUP, error status:" | |
1142 | "%d\t", err); | |
1143 | /* abort, leaving multicast filtering off */ | |
1144 | if_maddr_runlock(ifp); | |
1145 | return; | |
1146 | } | |
1147 | } | |
1148 | if_maddr_runlock(ifp); | |
1149 | /* Enable multicast filtering */ | |
1150 | err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); | |
1151 | if (err != 0) { | |
1152 | device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" | |
1153 | ", error status: %d\n", err); | |
1154 | } | |
1155 | } | |
1156 | ||
1157 | static int | |
1158 | mxge_max_mtu(mxge_softc_t *sc) | |
1159 | { | |
1160 | mxge_cmd_t cmd; | |
1161 | int status; | |
1162 | ||
1163 | if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) | |
1164 | return MXGEFW_MAX_MTU - MXGEFW_PAD; | |
1165 | ||
1166 | /* try to set nbufs to see if it we can | |
1167 | use virtually contiguous jumbos */ | |
1168 | cmd.data0 = 0; | |
1169 | status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, | |
1170 | &cmd); | |
1171 | if (status == 0) | |
1172 | return MXGEFW_MAX_MTU - MXGEFW_PAD; | |
1173 | ||
1174 | /* otherwise, we're limited to MJUMPAGESIZE */ | |
1175 | return MJUMPAGESIZE - MXGEFW_PAD; | |
1176 | } | |
1177 | ||
1178 | static int | |
1179 | mxge_reset(mxge_softc_t *sc, int interrupts_setup) | |
1180 | { | |
1181 | struct mxge_slice_state *ss; | |
1182 | mxge_rx_done_t *rx_done; | |
1183 | volatile uint32_t *irq_claim; | |
1184 | mxge_cmd_t cmd; | |
1185 | int slice, status; | |
1186 | ||
1187 | /* try to send a reset command to the card to see if it | |
1188 | is alive */ | |
1189 | memset(&cmd, 0, sizeof (cmd)); | |
1190 | status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); | |
1191 | if (status != 0) { | |
1192 | device_printf(sc->dev, "failed reset\n"); | |
1193 | return ENXIO; | |
1194 | } | |
1195 | ||
1196 | mxge_dummy_rdma(sc, 1); | |
1197 | ||
1198 | ||
1199 | /* set the intrq size */ | |
1200 | cmd.data0 = sc->rx_ring_size; | |
1201 | status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); | |
1202 | ||
1203 | /* | |
1204 | * Even though we already know how many slices are supported | |
1205 | * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES | |
1206 | * has magic side effects, and must be called after a reset. | |
1207 | * It must be called prior to calling any RSS related cmds, | |
1208 | * including assigning an interrupt queue for anything but | |
1209 | * slice 0. It must also be called *after* | |
1210 | * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by | |
1211 | * the firmware to compute offsets. | |
1212 | */ | |
1213 | ||
1214 | if (sc->num_slices > 1) { | |
1215 | /* ask the maximum number of slices it supports */ | |
1216 | status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, | |
1217 | &cmd); | |
1218 | if (status != 0) { | |
1219 | device_printf(sc->dev, | |
1220 | "failed to get number of slices\n"); | |
1221 | return status; | |
1222 | } | |
1223 | /* | |
1224 | * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior | |
1225 | * to setting up the interrupt queue DMA | |
1226 | */ | |
1227 | cmd.data0 = sc->num_slices; | |
1228 | cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; | |
1229 | #ifdef IFNET_BUF_RING | |
1230 | cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; | |
1231 | #endif | |
1232 | status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, | |
1233 | &cmd); | |
1234 | if (status != 0) { | |
1235 | device_printf(sc->dev, | |
1236 | "failed to set number of slices\n"); | |
1237 | return status; | |
1238 | } | |
1239 | } | |
1240 | ||
1241 | ||
1242 | if (interrupts_setup) { | |
1243 | /* Now exchange information about interrupts */ | |
1244 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1245 | rx_done = &sc->ss[slice].rx_done; | |
1246 | memset(rx_done->entry, 0, sc->rx_ring_size); | |
1247 | cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); | |
1248 | cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); | |
1249 | cmd.data2 = slice; | |
1250 | status |= mxge_send_cmd(sc, | |
1251 | MXGEFW_CMD_SET_INTRQ_DMA, | |
1252 | &cmd); | |
1253 | } | |
1254 | } | |
1255 | ||
1256 | status |= mxge_send_cmd(sc, | |
1257 | MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); | |
1258 | ||
1259 | ||
1260 | sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1261 | ||
1262 | status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); | |
1263 | irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1264 | ||
1265 | ||
1266 | status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, | |
1267 | &cmd); | |
1268 | sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); | |
1269 | if (status != 0) { | |
1270 | device_printf(sc->dev, "failed set interrupt parameters\n"); | |
1271 | return status; | |
1272 | } | |
1273 | ||
1274 | ||
1275 | *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); | |
1276 | ||
1277 | ||
1278 | /* run a DMA benchmark */ | |
1279 | (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); | |
1280 | ||
1281 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1282 | ss = &sc->ss[slice]; | |
1283 | ||
1284 | ss->irq_claim = irq_claim + (2 * slice); | |
1285 | /* reset mcp/driver shared state back to 0 */ | |
1286 | ss->rx_done.idx = 0; | |
1287 | ss->rx_done.cnt = 0; | |
1288 | ss->tx.req = 0; | |
1289 | ss->tx.done = 0; | |
1290 | ss->tx.pkt_done = 0; | |
1291 | ss->tx.queue_active = 0; | |
1292 | ss->tx.activate = 0; | |
1293 | ss->tx.deactivate = 0; | |
1294 | ss->tx.wake = 0; | |
1295 | ss->tx.defrag = 0; | |
1296 | ss->tx.stall = 0; | |
1297 | ss->rx_big.cnt = 0; | |
1298 | ss->rx_small.cnt = 0; | |
1299 | ss->lro_bad_csum = 0; | |
1300 | ss->lro_queued = 0; | |
1301 | ss->lro_flushed = 0; | |
1302 | if (ss->fw_stats != NULL) { | |
1303 | ss->fw_stats->valid = 0; | |
1304 | ss->fw_stats->send_done_count = 0; | |
1305 | } | |
1306 | } | |
1307 | sc->rdma_tags_available = 15; | |
1308 | status = mxge_update_mac_address(sc); | |
1309 | mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); | |
1310 | mxge_change_pause(sc, sc->pause); | |
1311 | mxge_set_multicast_list(sc); | |
1312 | return status; | |
1313 | } | |
1314 | ||
1315 | static int | |
1316 | mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) | |
1317 | { | |
1318 | mxge_softc_t *sc; | |
1319 | unsigned int intr_coal_delay; | |
1320 | int err; | |
1321 | ||
1322 | sc = arg1; | |
1323 | intr_coal_delay = sc->intr_coal_delay; | |
1324 | err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); | |
1325 | if (err != 0) { | |
1326 | return err; | |
1327 | } | |
1328 | if (intr_coal_delay == sc->intr_coal_delay) | |
1329 | return 0; | |
1330 | ||
1331 | if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) | |
1332 | return EINVAL; | |
1333 | ||
e8a47a7f | 1334 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 AE |
1335 | *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); |
1336 | sc->intr_coal_delay = intr_coal_delay; | |
1337 | ||
e8a47a7f | 1338 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
1339 | return err; |
1340 | } | |
1341 | ||
1342 | static int | |
1343 | mxge_change_flow_control(SYSCTL_HANDLER_ARGS) | |
1344 | { | |
1345 | mxge_softc_t *sc; | |
1346 | unsigned int enabled; | |
1347 | int err; | |
1348 | ||
1349 | sc = arg1; | |
1350 | enabled = sc->pause; | |
1351 | err = sysctl_handle_int(oidp, &enabled, arg2, req); | |
1352 | if (err != 0) { | |
1353 | return err; | |
1354 | } | |
1355 | if (enabled == sc->pause) | |
1356 | return 0; | |
1357 | ||
e8a47a7f | 1358 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 | 1359 | err = mxge_change_pause(sc, enabled); |
e8a47a7f | 1360 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
1361 | return err; |
1362 | } | |
1363 | ||
1364 | static int | |
1365 | mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) | |
1366 | { | |
1367 | struct ifnet *ifp; | |
1368 | int err = 0; | |
1369 | ||
1370 | ifp = sc->ifp; | |
1371 | if (lro_cnt == 0) | |
1372 | ifp->if_capenable &= ~IFCAP_LRO; | |
1373 | else | |
1374 | ifp->if_capenable |= IFCAP_LRO; | |
1375 | sc->lro_cnt = lro_cnt; | |
1376 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
1377 | mxge_close(sc); | |
1378 | err = mxge_open(sc); | |
1379 | } | |
1380 | return err; | |
1381 | } | |
1382 | ||
1383 | static int | |
1384 | mxge_change_lro(SYSCTL_HANDLER_ARGS) | |
1385 | { | |
1386 | mxge_softc_t *sc; | |
1387 | unsigned int lro_cnt; | |
1388 | int err; | |
1389 | ||
1390 | sc = arg1; | |
1391 | lro_cnt = sc->lro_cnt; | |
1392 | err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); | |
1393 | if (err != 0) | |
1394 | return err; | |
1395 | ||
1396 | if (lro_cnt == sc->lro_cnt) | |
1397 | return 0; | |
1398 | ||
1399 | if (lro_cnt > 128) | |
1400 | return EINVAL; | |
1401 | ||
e8a47a7f | 1402 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 | 1403 | err = mxge_change_lro_locked(sc, lro_cnt); |
e8a47a7f | 1404 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
1405 | return err; |
1406 | } | |
1407 | ||
1408 | static int | |
1409 | mxge_handle_be32(SYSCTL_HANDLER_ARGS) | |
1410 | { | |
1411 | int err; | |
1412 | ||
1413 | if (arg1 == NULL) | |
1414 | return EFAULT; | |
1415 | arg2 = be32toh(*(int *)arg1); | |
1416 | arg1 = NULL; | |
1417 | err = sysctl_handle_int(oidp, arg1, arg2, req); | |
1418 | ||
1419 | return err; | |
1420 | } | |
1421 | ||
1422 | static void | |
1423 | mxge_rem_sysctls(mxge_softc_t *sc) | |
1424 | { | |
1425 | struct mxge_slice_state *ss; | |
1426 | int slice; | |
1427 | ||
1428 | if (sc->slice_sysctl_tree == NULL) | |
1429 | return; | |
1430 | ||
1431 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1432 | ss = &sc->ss[slice]; | |
1433 | if (ss == NULL || ss->sysctl_tree == NULL) | |
1434 | continue; | |
1435 | sysctl_ctx_free(&ss->sysctl_ctx); | |
1436 | ss->sysctl_tree = NULL; | |
1437 | } | |
1438 | sysctl_ctx_free(&sc->slice_sysctl_ctx); | |
1439 | sc->slice_sysctl_tree = NULL; | |
1440 | } | |
1441 | ||
1442 | static void | |
1443 | mxge_add_sysctls(mxge_softc_t *sc) | |
1444 | { | |
1445 | struct sysctl_ctx_list *ctx; | |
1446 | struct sysctl_oid_list *children; | |
1447 | mcp_irq_data_t *fw; | |
1448 | struct mxge_slice_state *ss; | |
1449 | int slice; | |
1450 | char slice_num[8]; | |
1451 | ||
b6737651 AE |
1452 | ctx = &sc->sysctl_ctx; |
1453 | sysctl_ctx_init(ctx); | |
1454 | sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), | |
1455 | OID_AUTO, | |
1456 | device_get_nameunit(sc->dev), | |
1457 | CTLFLAG_RD, 0, ""); | |
1458 | if (sc->sysctl_tree == NULL) { | |
1459 | device_printf(sc->dev, "can't add sysctl node\n"); | |
1460 | return; | |
1461 | } | |
1462 | ||
1463 | children = SYSCTL_CHILDREN(sc->sysctl_tree); | |
8892ea20 AE |
1464 | fw = sc->ss[0].fw_stats; |
1465 | ||
1466 | /* random information */ | |
1467 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1468 | "firmware_version", | |
1469 | CTLFLAG_RD, &sc->fw_version, | |
1470 | 0, "firmware version"); | |
1471 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1472 | "serial_number", | |
1473 | CTLFLAG_RD, &sc->serial_number_string, | |
1474 | 0, "serial number"); | |
1475 | SYSCTL_ADD_STRING(ctx, children, OID_AUTO, | |
1476 | "product_code", | |
1477 | CTLFLAG_RD, &sc->product_code_string, | |
1478 | 0, "product_code"); | |
1479 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1480 | "pcie_link_width", | |
1481 | CTLFLAG_RD, &sc->link_width, | |
1482 | 0, "tx_boundary"); | |
1483 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1484 | "tx_boundary", | |
1485 | CTLFLAG_RD, &sc->tx_boundary, | |
1486 | 0, "tx_boundary"); | |
1487 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1488 | "write_combine", | |
1489 | CTLFLAG_RD, &sc->wc, | |
1490 | 0, "write combining PIO?"); | |
1491 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1492 | "read_dma_MBs", | |
1493 | CTLFLAG_RD, &sc->read_dma, | |
1494 | 0, "DMA Read speed in MB/s"); | |
1495 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1496 | "write_dma_MBs", | |
1497 | CTLFLAG_RD, &sc->write_dma, | |
1498 | 0, "DMA Write speed in MB/s"); | |
1499 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1500 | "read_write_dma_MBs", | |
1501 | CTLFLAG_RD, &sc->read_write_dma, | |
1502 | 0, "DMA concurrent Read/Write speed in MB/s"); | |
1503 | ||
1504 | ||
1505 | /* performance related tunables */ | |
1506 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1507 | "intr_coal_delay", | |
1508 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1509 | 0, mxge_change_intr_coal, | |
1510 | "I", "interrupt coalescing delay in usecs"); | |
1511 | ||
1512 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1513 | "flow_control_enabled", | |
1514 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1515 | 0, mxge_change_flow_control, | |
1516 | "I", "interrupt coalescing delay in usecs"); | |
1517 | ||
1518 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1519 | "deassert_wait", | |
1520 | CTLFLAG_RW, &mxge_deassert_wait, | |
1521 | 0, "Wait for IRQ line to go low in ihandler"); | |
1522 | ||
1523 | /* stats block from firmware is in network byte order. | |
1524 | Need to swap it */ | |
1525 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1526 | "link_up", | |
1527 | CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, | |
1528 | 0, mxge_handle_be32, | |
1529 | "I", "link up"); | |
1530 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1531 | "rdma_tags_available", | |
1532 | CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, | |
1533 | 0, mxge_handle_be32, | |
1534 | "I", "rdma_tags_available"); | |
1535 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1536 | "dropped_bad_crc32", | |
1537 | CTLTYPE_INT|CTLFLAG_RD, | |
1538 | &fw->dropped_bad_crc32, | |
1539 | 0, mxge_handle_be32, | |
1540 | "I", "dropped_bad_crc32"); | |
1541 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1542 | "dropped_bad_phy", | |
1543 | CTLTYPE_INT|CTLFLAG_RD, | |
1544 | &fw->dropped_bad_phy, | |
1545 | 0, mxge_handle_be32, | |
1546 | "I", "dropped_bad_phy"); | |
1547 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1548 | "dropped_link_error_or_filtered", | |
1549 | CTLTYPE_INT|CTLFLAG_RD, | |
1550 | &fw->dropped_link_error_or_filtered, | |
1551 | 0, mxge_handle_be32, | |
1552 | "I", "dropped_link_error_or_filtered"); | |
1553 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1554 | "dropped_link_overflow", | |
1555 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, | |
1556 | 0, mxge_handle_be32, | |
1557 | "I", "dropped_link_overflow"); | |
1558 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1559 | "dropped_multicast_filtered", | |
1560 | CTLTYPE_INT|CTLFLAG_RD, | |
1561 | &fw->dropped_multicast_filtered, | |
1562 | 0, mxge_handle_be32, | |
1563 | "I", "dropped_multicast_filtered"); | |
1564 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1565 | "dropped_no_big_buffer", | |
1566 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, | |
1567 | 0, mxge_handle_be32, | |
1568 | "I", "dropped_no_big_buffer"); | |
1569 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1570 | "dropped_no_small_buffer", | |
1571 | CTLTYPE_INT|CTLFLAG_RD, | |
1572 | &fw->dropped_no_small_buffer, | |
1573 | 0, mxge_handle_be32, | |
1574 | "I", "dropped_no_small_buffer"); | |
1575 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1576 | "dropped_overrun", | |
1577 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, | |
1578 | 0, mxge_handle_be32, | |
1579 | "I", "dropped_overrun"); | |
1580 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1581 | "dropped_pause", | |
1582 | CTLTYPE_INT|CTLFLAG_RD, | |
1583 | &fw->dropped_pause, | |
1584 | 0, mxge_handle_be32, | |
1585 | "I", "dropped_pause"); | |
1586 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1587 | "dropped_runt", | |
1588 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, | |
1589 | 0, mxge_handle_be32, | |
1590 | "I", "dropped_runt"); | |
1591 | ||
1592 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1593 | "dropped_unicast_filtered", | |
1594 | CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, | |
1595 | 0, mxge_handle_be32, | |
1596 | "I", "dropped_unicast_filtered"); | |
1597 | ||
1598 | /* verbose printing? */ | |
1599 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1600 | "verbose", | |
1601 | CTLFLAG_RW, &mxge_verbose, | |
1602 | 0, "verbose printing"); | |
1603 | ||
1604 | /* lro */ | |
1605 | SYSCTL_ADD_PROC(ctx, children, OID_AUTO, | |
1606 | "lro_cnt", | |
1607 | CTLTYPE_INT|CTLFLAG_RW, sc, | |
1608 | 0, mxge_change_lro, | |
1609 | "I", "number of lro merge queues"); | |
1610 | ||
1611 | ||
1612 | /* add counters exported for debugging from all slices */ | |
1613 | sysctl_ctx_init(&sc->slice_sysctl_ctx); | |
1614 | sc->slice_sysctl_tree = | |
1615 | SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, | |
1616 | "slice", CTLFLAG_RD, 0, ""); | |
1617 | ||
1618 | for (slice = 0; slice < sc->num_slices; slice++) { | |
1619 | ss = &sc->ss[slice]; | |
1620 | sysctl_ctx_init(&ss->sysctl_ctx); | |
1621 | ctx = &ss->sysctl_ctx; | |
1622 | children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); | |
b6737651 | 1623 | ksprintf(slice_num, "%d", slice); |
8892ea20 AE |
1624 | ss->sysctl_tree = |
1625 | SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, | |
1626 | CTLFLAG_RD, 0, ""); | |
1627 | children = SYSCTL_CHILDREN(ss->sysctl_tree); | |
1628 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1629 | "rx_small_cnt", | |
1630 | CTLFLAG_RD, &ss->rx_small.cnt, | |
1631 | 0, "rx_small_cnt"); | |
1632 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1633 | "rx_big_cnt", | |
1634 | CTLFLAG_RD, &ss->rx_big.cnt, | |
1635 | 0, "rx_small_cnt"); | |
1636 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1637 | "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, | |
1638 | 0, "number of lro merge queues flushed"); | |
1639 | ||
1640 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1641 | "lro_queued", CTLFLAG_RD, &ss->lro_queued, | |
1642 | 0, "number of frames appended to lro merge" | |
1643 | "queues"); | |
1644 | ||
1645 | #ifndef IFNET_BUF_RING | |
1646 | /* only transmit from slice 0 for now */ | |
1647 | if (slice > 0) | |
1648 | continue; | |
1649 | #endif | |
1650 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1651 | "tx_req", | |
1652 | CTLFLAG_RD, &ss->tx.req, | |
1653 | 0, "tx_req"); | |
1654 | ||
1655 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1656 | "tx_done", | |
1657 | CTLFLAG_RD, &ss->tx.done, | |
1658 | 0, "tx_done"); | |
1659 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1660 | "tx_pkt_done", | |
1661 | CTLFLAG_RD, &ss->tx.pkt_done, | |
1662 | 0, "tx_done"); | |
1663 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1664 | "tx_stall", | |
1665 | CTLFLAG_RD, &ss->tx.stall, | |
1666 | 0, "tx_stall"); | |
1667 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1668 | "tx_wake", | |
1669 | CTLFLAG_RD, &ss->tx.wake, | |
1670 | 0, "tx_wake"); | |
1671 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1672 | "tx_defrag", | |
1673 | CTLFLAG_RD, &ss->tx.defrag, | |
1674 | 0, "tx_defrag"); | |
1675 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1676 | "tx_queue_active", | |
1677 | CTLFLAG_RD, &ss->tx.queue_active, | |
1678 | 0, "tx_queue_active"); | |
1679 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1680 | "tx_activate", | |
1681 | CTLFLAG_RD, &ss->tx.activate, | |
1682 | 0, "tx_activate"); | |
1683 | SYSCTL_ADD_INT(ctx, children, OID_AUTO, | |
1684 | "tx_deactivate", | |
1685 | CTLFLAG_RD, &ss->tx.deactivate, | |
1686 | 0, "tx_deactivate"); | |
1687 | } | |
1688 | } | |
1689 | ||
1690 | /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy | |
1691 | backwards one at a time and handle ring wraps */ | |
1692 | ||
1693 | static inline void | |
1694 | mxge_submit_req_backwards(mxge_tx_ring_t *tx, | |
1695 | mcp_kreq_ether_send_t *src, int cnt) | |
1696 | { | |
1697 | int idx, starting_slot; | |
1698 | starting_slot = tx->req; | |
1699 | while (cnt > 1) { | |
1700 | cnt--; | |
1701 | idx = (starting_slot + cnt) & tx->mask; | |
1702 | mxge_pio_copy(&tx->lanai[idx], | |
1703 | &src[cnt], sizeof(*src)); | |
1704 | wmb(); | |
1705 | } | |
1706 | } | |
1707 | ||
1708 | /* | |
1709 | * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy | |
1710 | * at most 32 bytes at a time, so as to avoid involving the software | |
1711 | * pio handler in the nic. We re-write the first segment's flags | |
1712 | * to mark them valid only after writing the entire chain | |
1713 | */ | |
1714 | ||
1715 | static inline void | |
1716 | mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, | |
1717 | int cnt) | |
1718 | { | |
1719 | int idx, i; | |
1720 | uint32_t *src_ints; | |
1721 | volatile uint32_t *dst_ints; | |
1722 | mcp_kreq_ether_send_t *srcp; | |
1723 | volatile mcp_kreq_ether_send_t *dstp, *dst; | |
1724 | uint8_t last_flags; | |
1725 | ||
1726 | idx = tx->req & tx->mask; | |
1727 | ||
1728 | last_flags = src->flags; | |
1729 | src->flags = 0; | |
1730 | wmb(); | |
1731 | dst = dstp = &tx->lanai[idx]; | |
1732 | srcp = src; | |
1733 | ||
1734 | if ((idx + cnt) < tx->mask) { | |
1735 | for (i = 0; i < (cnt - 1); i += 2) { | |
1736 | mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); | |
1737 | wmb(); /* force write every 32 bytes */ | |
1738 | srcp += 2; | |
1739 | dstp += 2; | |
1740 | } | |
1741 | } else { | |
1742 | /* submit all but the first request, and ensure | |
1743 | that it is submitted below */ | |
1744 | mxge_submit_req_backwards(tx, src, cnt); | |
1745 | i = 0; | |
1746 | } | |
1747 | if (i < cnt) { | |
1748 | /* submit the first request */ | |
1749 | mxge_pio_copy(dstp, srcp, sizeof(*src)); | |
1750 | wmb(); /* barrier before setting valid flag */ | |
1751 | } | |
1752 | ||
1753 | /* re-write the last 32-bits with the valid flags */ | |
1754 | src->flags = last_flags; | |
1755 | src_ints = (uint32_t *)src; | |
1756 | src_ints+=3; | |
1757 | dst_ints = (volatile uint32_t *)dst; | |
1758 | dst_ints+=3; | |
1759 | *dst_ints = *src_ints; | |
1760 | tx->req += cnt; | |
1761 | wmb(); | |
1762 | } | |
1763 | ||
1764 | #if IFCAP_TSO4 | |
1765 | ||
1766 | static void | |
1767 | mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, | |
1768 | int busdma_seg_cnt, int ip_off) | |
1769 | { | |
1770 | mxge_tx_ring_t *tx; | |
1771 | mcp_kreq_ether_send_t *req; | |
1772 | bus_dma_segment_t *seg; | |
1773 | struct ip *ip; | |
1774 | struct tcphdr *tcp; | |
1775 | uint32_t low, high_swapped; | |
1776 | int len, seglen, cum_len, cum_len_next; | |
1777 | int next_is_first, chop, cnt, rdma_count, small; | |
1778 | uint16_t pseudo_hdr_offset, cksum_offset, mss; | |
1779 | uint8_t flags, flags_next; | |
1780 | static int once; | |
1781 | ||
1782 | mss = m->m_pkthdr.tso_segsz; | |
1783 | ||
1784 | /* negative cum_len signifies to the | |
1785 | * send loop that we are still in the | |
1786 | * header portion of the TSO packet. | |
1787 | */ | |
1788 | ||
1789 | /* ensure we have the ethernet, IP and TCP | |
1790 | header together in the first mbuf, copy | |
1791 | it to a scratch buffer if not */ | |
1792 | if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { | |
1793 | m_copydata(m, 0, ip_off + sizeof (*ip), | |
1794 | ss->scratch); | |
1795 | ip = (struct ip *)(ss->scratch + ip_off); | |
1796 | } else { | |
1797 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
1798 | } | |
1799 | if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) | |
1800 | + sizeof (*tcp))) { | |
1801 | m_copydata(m, 0, ip_off + (ip->ip_hl << 2) | |
1802 | + sizeof (*tcp), ss->scratch); | |
1803 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
1804 | } | |
1805 | ||
1806 | tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); | |
1807 | cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); | |
1808 | ||
1809 | /* TSO implies checksum offload on this hardware */ | |
1810 | cksum_offset = ip_off + (ip->ip_hl << 2); | |
1811 | flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; | |
1812 | ||
1813 | ||
1814 | /* for TSO, pseudo_hdr_offset holds mss. | |
1815 | * The firmware figures out where to put | |
1816 | * the checksum by parsing the header. */ | |
1817 | pseudo_hdr_offset = htobe16(mss); | |
1818 | ||
1819 | tx = &ss->tx; | |
1820 | req = tx->req_list; | |
1821 | seg = tx->seg_list; | |
1822 | cnt = 0; | |
1823 | rdma_count = 0; | |
1824 | /* "rdma_count" is the number of RDMAs belonging to the | |
1825 | * current packet BEFORE the current send request. For | |
1826 | * non-TSO packets, this is equal to "count". | |
1827 | * For TSO packets, rdma_count needs to be reset | |
1828 | * to 0 after a segment cut. | |
1829 | * | |
1830 | * The rdma_count field of the send request is | |
1831 | * the number of RDMAs of the packet starting at | |
1832 | * that request. For TSO send requests with one ore more cuts | |
1833 | * in the middle, this is the number of RDMAs starting | |
1834 | * after the last cut in the request. All previous | |
1835 | * segments before the last cut implicitly have 1 RDMA. | |
1836 | * | |
1837 | * Since the number of RDMAs is not known beforehand, | |
1838 | * it must be filled-in retroactively - after each | |
1839 | * segmentation cut or at the end of the entire packet. | |
1840 | */ | |
1841 | ||
1842 | while (busdma_seg_cnt) { | |
1843 | /* Break the busdma segment up into pieces*/ | |
1844 | low = MXGE_LOWPART_TO_U32(seg->ds_addr); | |
1845 | high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
1846 | len = seg->ds_len; | |
1847 | ||
1848 | while (len) { | |
1849 | flags_next = flags & ~MXGEFW_FLAGS_FIRST; | |
1850 | seglen = len; | |
1851 | cum_len_next = cum_len + seglen; | |
1852 | (req-rdma_count)->rdma_count = rdma_count + 1; | |
1853 | if (__predict_true(cum_len >= 0)) { | |
1854 | /* payload */ | |
1855 | chop = (cum_len_next > mss); | |
1856 | cum_len_next = cum_len_next % mss; | |
1857 | next_is_first = (cum_len_next == 0); | |
1858 | flags |= chop * MXGEFW_FLAGS_TSO_CHOP; | |
1859 | flags_next |= next_is_first * | |
1860 | MXGEFW_FLAGS_FIRST; | |
1861 | rdma_count |= -(chop | next_is_first); | |
1862 | rdma_count += chop & !next_is_first; | |
1863 | } else if (cum_len_next >= 0) { | |
1864 | /* header ends */ | |
1865 | rdma_count = -1; | |
1866 | cum_len_next = 0; | |
1867 | seglen = -cum_len; | |
1868 | small = (mss <= MXGEFW_SEND_SMALL_SIZE); | |
1869 | flags_next = MXGEFW_FLAGS_TSO_PLD | | |
1870 | MXGEFW_FLAGS_FIRST | | |
1871 | (small * MXGEFW_FLAGS_SMALL); | |
1872 | } | |
1873 | ||
1874 | req->addr_high = high_swapped; | |
1875 | req->addr_low = htobe32(low); | |
1876 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
1877 | req->pad = 0; | |
1878 | req->rdma_count = 1; | |
1879 | req->length = htobe16(seglen); | |
1880 | req->cksum_offset = cksum_offset; | |
1881 | req->flags = flags | ((cum_len & 1) * | |
1882 | MXGEFW_FLAGS_ALIGN_ODD); | |
1883 | low += seglen; | |
1884 | len -= seglen; | |
1885 | cum_len = cum_len_next; | |
1886 | flags = flags_next; | |
1887 | req++; | |
1888 | cnt++; | |
1889 | rdma_count++; | |
1890 | if (__predict_false(cksum_offset > seglen)) | |
1891 | cksum_offset -= seglen; | |
1892 | else | |
1893 | cksum_offset = 0; | |
1894 | if (__predict_false(cnt > tx->max_desc)) | |
1895 | goto drop; | |
1896 | } | |
1897 | busdma_seg_cnt--; | |
1898 | seg++; | |
1899 | } | |
1900 | (req-rdma_count)->rdma_count = rdma_count; | |
1901 | ||
1902 | do { | |
1903 | req--; | |
1904 | req->flags |= MXGEFW_FLAGS_TSO_LAST; | |
1905 | } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); | |
1906 | ||
1907 | tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; | |
1908 | mxge_submit_req(tx, tx->req_list, cnt); | |
1909 | #ifdef IFNET_BUF_RING | |
1910 | if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { | |
1911 | /* tell the NIC to start polling this slice */ | |
1912 | *tx->send_go = 1; | |
1913 | tx->queue_active = 1; | |
1914 | tx->activate++; | |
1915 | wmb(); | |
1916 | } | |
1917 | #endif | |
1918 | return; | |
1919 | ||
1920 | drop: | |
1921 | bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); | |
1922 | m_freem(m); | |
1923 | ss->oerrors++; | |
1924 | if (!once) { | |
1925 | printf("tx->max_desc exceeded via TSO!\n"); | |
1926 | printf("mss = %d, %ld, %d!\n", mss, | |
1927 | (long)seg - (long)tx->seg_list, tx->max_desc); | |
1928 | once = 1; | |
1929 | } | |
1930 | return; | |
1931 | ||
1932 | } | |
1933 | ||
1934 | #endif /* IFCAP_TSO4 */ | |
1935 | ||
1936 | #ifdef MXGE_NEW_VLAN_API | |
1937 | /* | |
1938 | * We reproduce the software vlan tag insertion from | |
1939 | * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" | |
1940 | * vlan tag insertion. We need to advertise this in order to have the | |
1941 | * vlan interface respect our csum offload flags. | |
1942 | */ | |
1943 | static struct mbuf * | |
1944 | mxge_vlan_tag_insert(struct mbuf *m) | |
1945 | { | |
1946 | struct ether_vlan_header *evl; | |
1947 | ||
1948 | M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); | |
1949 | if (__predict_false(m == NULL)) | |
1950 | return NULL; | |
1951 | if (m->m_len < sizeof(*evl)) { | |
1952 | m = m_pullup(m, sizeof(*evl)); | |
1953 | if (__predict_false(m == NULL)) | |
1954 | return NULL; | |
1955 | } | |
1956 | /* | |
1957 | * Transform the Ethernet header into an Ethernet header | |
1958 | * with 802.1Q encapsulation. | |
1959 | */ | |
1960 | evl = mtod(m, struct ether_vlan_header *); | |
1961 | bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, | |
1962 | (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); | |
1963 | evl->evl_encap_proto = htons(ETHERTYPE_VLAN); | |
1964 | evl->evl_tag = htons(m->m_pkthdr.ether_vtag); | |
1965 | m->m_flags &= ~M_VLANTAG; | |
1966 | return m; | |
1967 | } | |
1968 | #endif /* MXGE_NEW_VLAN_API */ | |
1969 | ||
1970 | static void | |
1971 | mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) | |
1972 | { | |
1973 | mxge_softc_t *sc; | |
1974 | mcp_kreq_ether_send_t *req; | |
1975 | bus_dma_segment_t *seg; | |
1976 | struct mbuf *m_tmp; | |
1977 | struct ifnet *ifp; | |
1978 | mxge_tx_ring_t *tx; | |
1979 | struct ip *ip; | |
1980 | int cnt, cum_len, err, i, idx, odd_flag, ip_off; | |
1981 | uint16_t pseudo_hdr_offset; | |
1982 | uint8_t flags, cksum_offset; | |
1983 | ||
1984 | ||
1985 | sc = ss->sc; | |
1986 | ifp = sc->ifp; | |
1987 | tx = &ss->tx; | |
1988 | ||
1989 | ip_off = sizeof (struct ether_header); | |
1990 | #ifdef MXGE_NEW_VLAN_API | |
1991 | if (m->m_flags & M_VLANTAG) { | |
1992 | m = mxge_vlan_tag_insert(m); | |
1993 | if (__predict_false(m == NULL)) | |
1994 | goto drop; | |
1995 | ip_off += ETHER_VLAN_ENCAP_LEN; | |
1996 | } | |
1997 | #endif | |
1998 | /* (try to) map the frame for DMA */ | |
1999 | idx = tx->req & tx->mask; | |
2000 | err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, | |
2001 | m, tx->seg_list, &cnt, | |
2002 | BUS_DMA_NOWAIT); | |
2003 | if (__predict_false(err == EFBIG)) { | |
2004 | /* Too many segments in the chain. Try | |
2005 | to defrag */ | |
2006 | m_tmp = m_defrag(m, M_NOWAIT); | |
2007 | if (m_tmp == NULL) { | |
2008 | goto drop; | |
2009 | } | |
2010 | ss->tx.defrag++; | |
2011 | m = m_tmp; | |
2012 | err = bus_dmamap_load_mbuf_sg(tx->dmat, | |
2013 | tx->info[idx].map, | |
2014 | m, tx->seg_list, &cnt, | |
2015 | BUS_DMA_NOWAIT); | |
2016 | } | |
2017 | if (__predict_false(err != 0)) { | |
2018 | device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" | |
2019 | " packet len = %d\n", err, m->m_pkthdr.len); | |
2020 | goto drop; | |
2021 | } | |
2022 | bus_dmamap_sync(tx->dmat, tx->info[idx].map, | |
2023 | BUS_DMASYNC_PREWRITE); | |
2024 | tx->info[idx].m = m; | |
2025 | ||
2026 | #if IFCAP_TSO4 | |
2027 | /* TSO is different enough, we handle it in another routine */ | |
2028 | if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { | |
2029 | mxge_encap_tso(ss, m, cnt, ip_off); | |
2030 | return; | |
2031 | } | |
2032 | #endif | |
2033 | ||
2034 | req = tx->req_list; | |
2035 | cksum_offset = 0; | |
2036 | pseudo_hdr_offset = 0; | |
2037 | flags = MXGEFW_FLAGS_NO_TSO; | |
2038 | ||
2039 | /* checksum offloading? */ | |
2040 | if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { | |
2041 | /* ensure ip header is in first mbuf, copy | |
2042 | it to a scratch buffer if not */ | |
2043 | if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { | |
2044 | m_copydata(m, 0, ip_off + sizeof (*ip), | |
2045 | ss->scratch); | |
2046 | ip = (struct ip *)(ss->scratch + ip_off); | |
2047 | } else { | |
2048 | ip = (struct ip *)(mtod(m, char *) + ip_off); | |
2049 | } | |
2050 | cksum_offset = ip_off + (ip->ip_hl << 2); | |
2051 | pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; | |
2052 | pseudo_hdr_offset = htobe16(pseudo_hdr_offset); | |
2053 | req->cksum_offset = cksum_offset; | |
2054 | flags |= MXGEFW_FLAGS_CKSUM; | |
2055 | odd_flag = MXGEFW_FLAGS_ALIGN_ODD; | |
2056 | } else { | |
2057 | odd_flag = 0; | |
2058 | } | |
2059 | if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) | |
2060 | flags |= MXGEFW_FLAGS_SMALL; | |
2061 | ||
2062 | /* convert segments into a request list */ | |
2063 | cum_len = 0; | |
2064 | seg = tx->seg_list; | |
2065 | req->flags = MXGEFW_FLAGS_FIRST; | |
2066 | for (i = 0; i < cnt; i++) { | |
2067 | req->addr_low = | |
2068 | htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); | |
2069 | req->addr_high = | |
2070 | htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
2071 | req->length = htobe16(seg->ds_len); | |
2072 | req->cksum_offset = cksum_offset; | |
2073 | if (cksum_offset > seg->ds_len) | |
2074 | cksum_offset -= seg->ds_len; | |
2075 | else | |
2076 | cksum_offset = 0; | |
2077 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
2078 | req->pad = 0; /* complete solid 16-byte block */ | |
2079 | req->rdma_count = 1; | |
2080 | req->flags |= flags | ((cum_len & 1) * odd_flag); | |
2081 | cum_len += seg->ds_len; | |
2082 | seg++; | |
2083 | req++; | |
2084 | req->flags = 0; | |
2085 | } | |
2086 | req--; | |
2087 | /* pad runts to 60 bytes */ | |
2088 | if (cum_len < 60) { | |
2089 | req++; | |
2090 | req->addr_low = | |
2091 | htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); | |
2092 | req->addr_high = | |
2093 | htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); | |
2094 | req->length = htobe16(60 - cum_len); | |
2095 | req->cksum_offset = 0; | |
2096 | req->pseudo_hdr_offset = pseudo_hdr_offset; | |
2097 | req->pad = 0; /* complete solid 16-byte block */ | |
2098 | req->rdma_count = 1; | |
2099 | req->flags |= flags | ((cum_len & 1) * odd_flag); | |
2100 | cnt++; | |
2101 | } | |
2102 | ||
2103 | tx->req_list[0].rdma_count = cnt; | |
2104 | #if 0 | |
2105 | /* print what the firmware will see */ | |
2106 | for (i = 0; i < cnt; i++) { | |
2107 | printf("%d: addr: 0x%x 0x%x len:%d pso%d," | |
2108 | "cso:%d, flags:0x%x, rdma:%d\n", | |
2109 | i, (int)ntohl(tx->req_list[i].addr_high), | |
2110 | (int)ntohl(tx->req_list[i].addr_low), | |
2111 | (int)ntohs(tx->req_list[i].length), | |
2112 | (int)ntohs(tx->req_list[i].pseudo_hdr_offset), | |
2113 | tx->req_list[i].cksum_offset, tx->req_list[i].flags, | |
2114 | tx->req_list[i].rdma_count); | |
2115 | } | |
2116 | printf("--------------\n"); | |
2117 | #endif | |
2118 | tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; | |
2119 | mxge_submit_req(tx, tx->req_list, cnt); | |
2120 | #ifdef IFNET_BUF_RING | |
2121 | if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { | |
2122 | /* tell the NIC to start polling this slice */ | |
2123 | *tx->send_go = 1; | |
2124 | tx->queue_active = 1; | |
2125 | tx->activate++; | |
2126 | wmb(); | |
2127 | } | |
2128 | #endif | |
2129 | return; | |
2130 | ||
2131 | drop: | |
2132 | m_freem(m); | |
2133 | ss->oerrors++; | |
2134 | return; | |
2135 | } | |
2136 | ||
2137 | #ifdef IFNET_BUF_RING | |
2138 | static void | |
2139 | mxge_qflush(struct ifnet *ifp) | |
2140 | { | |
2141 | mxge_softc_t *sc = ifp->if_softc; | |
2142 | mxge_tx_ring_t *tx; | |
2143 | struct mbuf *m; | |
2144 | int slice; | |
2145 | ||
2146 | for (slice = 0; slice < sc->num_slices; slice++) { | |
2147 | tx = &sc->ss[slice].tx; | |
e8a47a7f | 2148 | lockmgr(&tx->lock, LK_EXCLUSIVE); |
8892ea20 AE |
2149 | while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) |
2150 | m_freem(m); | |
e8a47a7f | 2151 | lockmgr(&tx->lock, LK_RELEASE); |
8892ea20 AE |
2152 | } |
2153 | if_qflush(ifp); | |
2154 | } | |
2155 | ||
2156 | static inline void | |
2157 | mxge_start_locked(struct mxge_slice_state *ss) | |
2158 | { | |
2159 | mxge_softc_t *sc; | |
2160 | struct mbuf *m; | |
2161 | struct ifnet *ifp; | |
2162 | mxge_tx_ring_t *tx; | |
2163 | ||
2164 | sc = ss->sc; | |
2165 | ifp = sc->ifp; | |
2166 | tx = &ss->tx; | |
2167 | ||
2168 | while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { | |
2169 | m = drbr_dequeue(ifp, tx->br); | |
2170 | if (m == NULL) { | |
2171 | return; | |
2172 | } | |
2173 | /* let BPF see it */ | |
2174 | BPF_MTAP(ifp, m); | |
2175 | ||
2176 | /* give it to the nic */ | |
2177 | mxge_encap(ss, m); | |
2178 | } | |
2179 | /* ran out of transmit slots */ | |
2180 | if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) | |
2181 | && (!drbr_empty(ifp, tx->br))) { | |
2182 | ss->if_drv_flags |= IFF_DRV_OACTIVE; | |
2183 | tx->stall++; | |
2184 | } | |
2185 | } | |
2186 | ||
2187 | static int | |
2188 | mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) | |
2189 | { | |
2190 | mxge_softc_t *sc; | |
2191 | struct ifnet *ifp; | |
2192 | mxge_tx_ring_t *tx; | |
2193 | int err; | |
2194 | ||
2195 | sc = ss->sc; | |
2196 | ifp = sc->ifp; | |
2197 | tx = &ss->tx; | |
2198 | ||
2199 | if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != | |
2200 | IFF_DRV_RUNNING) { | |
2201 | err = drbr_enqueue(ifp, tx->br, m); | |
2202 | return (err); | |
2203 | } | |
2204 | ||
2205 | if (drbr_empty(ifp, tx->br) && | |
2206 | ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { | |
2207 | /* let BPF see it */ | |
2208 | BPF_MTAP(ifp, m); | |
2209 | /* give it to the nic */ | |
2210 | mxge_encap(ss, m); | |
2211 | } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { | |
2212 | return (err); | |
2213 | } | |
2214 | if (!drbr_empty(ifp, tx->br)) | |
2215 | mxge_start_locked(ss); | |
2216 | return (0); | |
2217 | } | |
2218 | ||
2219 | static int | |
2220 | mxge_transmit(struct ifnet *ifp, struct mbuf *m) | |
2221 | { | |
2222 | mxge_softc_t *sc = ifp->if_softc; | |
2223 | struct mxge_slice_state *ss; | |
2224 | mxge_tx_ring_t *tx; | |
2225 | int err = 0; | |
2226 | int slice; | |
2227 | ||
2228 | slice = m->m_pkthdr.flowid; | |
2229 | slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ | |
2230 | ||
2231 | ss = &sc->ss[slice]; | |
2232 | tx = &ss->tx; | |
2233 | ||
e8a47a7f | 2234 | if (lockmgr(&tx->lock, LK_EXCLUSIVE|LK_NOWAIT)) { |
8892ea20 | 2235 | err = mxge_transmit_locked(ss, m); |
e8a47a7f | 2236 | lockmgr(&tx->lock, LK_RELEASE); |
8892ea20 AE |
2237 | } else { |
2238 | err = drbr_enqueue(ifp, tx->br, m); | |
2239 | } | |
2240 | ||
2241 | return (err); | |
2242 | } | |
2243 | ||
2244 | #else | |
2245 | ||
2246 | static inline void | |
2247 | mxge_start_locked(struct mxge_slice_state *ss) | |
2248 | { | |
2249 | mxge_softc_t *sc; | |
2250 | struct mbuf *m; | |
2251 | struct ifnet *ifp; | |
2252 | mxge_tx_ring_t *tx; | |
2253 | ||
2254 | sc = ss->sc; | |
2255 | ifp = sc->ifp; | |
2256 | tx = &ss->tx; | |
2257 | while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { | |
2258 | IFQ_DRV_DEQUEUE(&ifp->if_snd, m); | |
2259 | if (m == NULL) { | |
2260 | return; | |
2261 | } | |
2262 | /* let BPF see it */ | |
2263 | BPF_MTAP(ifp, m); | |
2264 | ||
2265 | /* give it to the nic */ | |
2266 | mxge_encap(ss, m); | |
2267 | } | |
2268 | /* ran out of transmit slots */ | |
2269 | if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { | |
2270 | sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; | |
2271 | tx->stall++; | |
2272 | } | |
2273 | } | |
2274 | #endif | |
2275 | static void | |
2276 | mxge_start(struct ifnet *ifp) | |
2277 | { | |
2278 | mxge_softc_t *sc = ifp->if_softc; | |
2279 | struct mxge_slice_state *ss; | |
2280 | ||
2281 | /* only use the first slice for now */ | |
2282 | ss = &sc->ss[0]; | |
e8a47a7f | 2283 | lockmgr(&ss->tx.lock, LK_EXCLUSIVE); |
8892ea20 | 2284 | mxge_start_locked(ss); |
e8a47a7f | 2285 | lockmgr(&ss->tx.lock, LK_RELEASE); |
8892ea20 AE |
2286 | } |
2287 | ||
2288 | /* | |
2289 | * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy | |
2290 | * at most 32 bytes at a time, so as to avoid involving the software | |
2291 | * pio handler in the nic. We re-write the first segment's low | |
2292 | * DMA address to mark it valid only after we write the entire chunk | |
2293 | * in a burst | |
2294 | */ | |
2295 | static inline void | |
2296 | mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, | |
2297 | mcp_kreq_ether_recv_t *src) | |
2298 | { | |
2299 | uint32_t low; | |
2300 | ||
2301 | low = src->addr_low; | |
2302 | src->addr_low = 0xffffffff; | |
2303 | mxge_pio_copy(dst, src, 4 * sizeof (*src)); | |
2304 | wmb(); | |
2305 | mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); | |
2306 | wmb(); | |
2307 | src->addr_low = low; | |
2308 | dst->addr_low = low; | |
2309 | wmb(); | |
2310 | } | |
2311 | ||
2312 | static int | |
2313 | mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) | |
2314 | { | |
2315 | bus_dma_segment_t seg; | |
2316 | struct mbuf *m; | |
2317 | mxge_rx_ring_t *rx = &ss->rx_small; | |
2318 | int cnt, err; | |
2319 | ||
2320 | m = m_gethdr(M_DONTWAIT, MT_DATA); | |
2321 | if (m == NULL) { | |
2322 | rx->alloc_fail++; | |
2323 | err = ENOBUFS; | |
2324 | goto done; | |
2325 | } | |
2326 | m->m_len = MHLEN; | |
2327 | err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, | |
2328 | &seg, &cnt, BUS_DMA_NOWAIT); | |
2329 | if (err != 0) { | |
2330 | m_free(m); | |
2331 | goto done; | |
2332 | } | |
2333 | rx->info[idx].m = m; | |
2334 | rx->shadow[idx].addr_low = | |
2335 | htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); | |
2336 | rx->shadow[idx].addr_high = | |
2337 | htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); | |
2338 | ||
2339 | done: | |
2340 | if ((idx & 7) == 7) | |
2341 | mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); | |
2342 | return err; | |
2343 | } | |
2344 | ||
2345 | static int | |
2346 | mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) | |
2347 | { | |
2348 | bus_dma_segment_t seg[3]; | |
2349 | struct mbuf *m; | |
2350 | mxge_rx_ring_t *rx = &ss->rx_big; | |
2351 | int cnt, err, i; | |
2352 | ||
2353 | if (rx->cl_size == MCLBYTES) | |
2354 | m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); | |
2355 | else | |
2356 | m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); | |
2357 | if (m == NULL) { | |
2358 | rx->alloc_fail++; | |
2359 | err = ENOBUFS; | |
2360 | goto done; | |
2361 | } | |
2362 | m->m_len = rx->mlen; | |
2363 | err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, | |
2364 | seg, &cnt, BUS_DMA_NOWAIT); | |
2365 | if (err != 0) { | |
2366 | m_free(m); | |
2367 | goto done; | |
2368 | } | |
2369 | rx->info[idx].m = m; | |
2370 | rx->shadow[idx].addr_low = | |
2371 | htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); | |
2372 | rx->shadow[idx].addr_high = | |
2373 | htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); | |
2374 | ||
2375 | #if MXGE_VIRT_JUMBOS | |
2376 | for (i = 1; i < cnt; i++) { | |
2377 | rx->shadow[idx + i].addr_low = | |
2378 | htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); | |
2379 | rx->shadow[idx + i].addr_high = | |
2380 | htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); | |
2381 | } | |
2382 | #endif | |
2383 | ||
2384 | done: | |
2385 | for (i = 0; i < rx->nbufs; i++) { | |
2386 | if ((idx & 7) == 7) { | |
2387 | mxge_submit_8rx(&rx->lanai[idx - 7], | |
2388 | &rx->shadow[idx - 7]); | |
2389 | } | |
2390 | idx++; | |
2391 | } | |
2392 | return err; | |
2393 | } | |
2394 | ||
2395 | /* | |
2396 | * Myri10GE hardware checksums are not valid if the sender | |
2397 | * padded the frame with non-zero padding. This is because | |
2398 | * the firmware just does a simple 16-bit 1s complement | |
2399 | * checksum across the entire frame, excluding the first 14 | |
2400 | * bytes. It is best to simply to check the checksum and | |
2401 | * tell the stack about it only if the checksum is good | |
2402 | */ | |
2403 | ||
2404 | static inline uint16_t | |
2405 | mxge_rx_csum(struct mbuf *m, int csum) | |
2406 | { | |
2407 | struct ether_header *eh; | |
2408 | struct ip *ip; | |
2409 | uint16_t c; | |
2410 | ||
2411 | eh = mtod(m, struct ether_header *); | |
2412 | ||
2413 | /* only deal with IPv4 TCP & UDP for now */ | |
2414 | if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) | |
2415 | return 1; | |
2416 | ip = (struct ip *)(eh + 1); | |
2417 | if (__predict_false(ip->ip_p != IPPROTO_TCP && | |
2418 | ip->ip_p != IPPROTO_UDP)) | |
2419 | return 1; | |
2420 | #ifdef INET | |
2421 | c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | |
2422 | htonl(ntohs(csum) + ntohs(ip->ip_len) + | |
2423 | - (ip->ip_hl << 2) + ip->ip_p)); | |
2424 | #else | |
2425 | c = 1; | |
2426 | #endif | |
2427 | c ^= 0xffff; | |
2428 | return (c); | |
2429 | } | |
2430 | ||
2431 | static void | |
2432 | mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) | |
2433 | { | |
2434 | struct ether_vlan_header *evl; | |
2435 | struct ether_header *eh; | |
2436 | uint32_t partial; | |
2437 | ||
2438 | evl = mtod(m, struct ether_vlan_header *); | |
2439 | eh = mtod(m, struct ether_header *); | |
2440 | ||
2441 | /* | |
2442 | * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes | |
2443 | * after what the firmware thought was the end of the ethernet | |
2444 | * header. | |
2445 | */ | |
2446 | ||
2447 | /* put checksum into host byte order */ | |
2448 | *csum = ntohs(*csum); | |
2449 | partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); | |
2450 | (*csum) += ~partial; | |
2451 | (*csum) += ((*csum) < ~partial); | |
2452 | (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); | |
2453 | (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); | |
2454 | ||
2455 | /* restore checksum to network byte order; | |
2456 | later consumers expect this */ | |
2457 | *csum = htons(*csum); | |
2458 | ||
2459 | /* save the tag */ | |
2460 | #ifdef MXGE_NEW_VLAN_API | |
2461 | m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); | |
2462 | #else | |
2463 | { | |
2464 | struct m_tag *mtag; | |
2465 | mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), | |
2466 | M_NOWAIT); | |
2467 | if (mtag == NULL) | |
2468 | return; | |
2469 | VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); | |
2470 | m_tag_prepend(m, mtag); | |
2471 | } | |
2472 | ||
2473 | #endif | |
2474 | m->m_flags |= M_VLANTAG; | |
2475 | ||
2476 | /* | |
2477 | * Remove the 802.1q header by copying the Ethernet | |
2478 | * addresses over it and adjusting the beginning of | |
2479 | * the data in the mbuf. The encapsulated Ethernet | |
2480 | * type field is already in place. | |
2481 | */ | |
2482 | bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, | |
2483 | ETHER_HDR_LEN - ETHER_TYPE_LEN); | |
2484 | m_adj(m, ETHER_VLAN_ENCAP_LEN); | |
2485 | } | |
2486 | ||
2487 | ||
2488 | static inline void | |
2489 | mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) | |
2490 | { | |
2491 | mxge_softc_t *sc; | |
2492 | struct ifnet *ifp; | |
2493 | struct mbuf *m; | |
2494 | struct ether_header *eh; | |
2495 | mxge_rx_ring_t *rx; | |
2496 | bus_dmamap_t old_map; | |
2497 | int idx; | |
2498 | uint16_t tcpudp_csum; | |
2499 | ||
2500 | sc = ss->sc; | |
2501 | ifp = sc->ifp; | |
2502 | rx = &ss->rx_big; | |
2503 | idx = rx->cnt & rx->mask; | |
2504 | rx->cnt += rx->nbufs; | |
2505 | /* save a pointer to the received mbuf */ | |
2506 | m = rx->info[idx].m; | |
2507 | /* try to replace the received mbuf */ | |
2508 | if (mxge_get_buf_big(ss, rx->extra_map, idx)) { | |
2509 | /* drop the frame -- the old mbuf is re-cycled */ | |
2510 | ifp->if_ierrors++; | |
2511 | return; | |
2512 | } | |
2513 | ||
2514 | /* unmap the received buffer */ | |
2515 | old_map = rx->info[idx].map; | |
2516 | bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); | |
2517 | bus_dmamap_unload(rx->dmat, old_map); | |
2518 | ||
2519 | /* swap the bus_dmamap_t's */ | |
2520 | rx->info[idx].map = rx->extra_map; | |
2521 | rx->extra_map = old_map; | |
2522 | ||
2523 | /* mcp implicitly skips 1st 2 bytes so that packet is properly | |
2524 | * aligned */ | |
2525 | m->m_data += MXGEFW_PAD; | |
2526 | ||
2527 | m->m_pkthdr.rcvif = ifp; | |
2528 | m->m_len = m->m_pkthdr.len = len; | |
2529 | ss->ipackets++; | |
2530 | eh = mtod(m, struct ether_header *); | |
2531 | if (eh->ether_type == htons(ETHERTYPE_VLAN)) { | |
2532 | mxge_vlan_tag_remove(m, &csum); | |
2533 | } | |
2534 | /* if the checksum is valid, mark it in the mbuf header */ | |
2535 | if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { | |
2536 | if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) | |
2537 | return; | |
2538 | /* otherwise, it was a UDP frame, or a TCP frame which | |
2539 | we could not do LRO on. Tell the stack that the | |
2540 | checksum is good */ | |
2541 | m->m_pkthdr.csum_data = 0xffff; | |
2542 | m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; | |
2543 | } | |
2544 | /* flowid only valid if RSS hashing is enabled */ | |
2545 | if (sc->num_slices > 1) { | |
2546 | m->m_pkthdr.flowid = (ss - sc->ss); | |
2547 | m->m_flags |= M_FLOWID; | |
2548 | } | |
2549 | /* pass the frame up the stack */ | |
2550 | (*ifp->if_input)(ifp, m); | |
2551 | } | |
2552 | ||
2553 | static inline void | |
2554 | mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) | |
2555 | { | |
2556 | mxge_softc_t *sc; | |
2557 | struct ifnet *ifp; | |
2558 | struct ether_header *eh; | |
2559 | struct mbuf *m; | |
2560 | mxge_rx_ring_t *rx; | |
2561 | bus_dmamap_t old_map; | |
2562 | int idx; | |
2563 | uint16_t tcpudp_csum; | |
2564 | ||
2565 | sc = ss->sc; | |
2566 | ifp = sc->ifp; | |
2567 | rx = &ss->rx_small; | |
2568 | idx = rx->cnt & rx->mask; | |
2569 | rx->cnt++; | |
2570 | /* save a pointer to the received mbuf */ | |
2571 | m = rx->info[idx].m; | |
2572 | /* try to replace the received mbuf */ | |
2573 | if (mxge_get_buf_small(ss, rx->extra_map, idx)) { | |
2574 | /* drop the frame -- the old mbuf is re-cycled */ | |
2575 | ifp->if_ierrors++; | |
2576 | return; | |
2577 | } | |
2578 | ||
2579 | /* unmap the received buffer */ | |
2580 | old_map = rx->info[idx].map; | |
2581 | bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); | |
2582 | bus_dmamap_unload(rx->dmat, old_map); | |
2583 | ||
2584 | /* swap the bus_dmamap_t's */ | |
2585 | rx->info[idx].map = rx->extra_map; | |
2586 | rx->extra_map = old_map; | |
2587 | ||
2588 | /* mcp implicitly skips 1st 2 bytes so that packet is properly | |
2589 | * aligned */ | |
2590 | m->m_data += MXGEFW_PAD; | |
2591 | ||
2592 | m->m_pkthdr.rcvif = ifp; | |
2593 | m->m_len = m->m_pkthdr.len = len; | |
2594 | ss->ipackets++; | |
2595 | eh = mtod(m, struct ether_header *); | |
2596 | if (eh->ether_type == htons(ETHERTYPE_VLAN)) { | |
2597 | mxge_vlan_tag_remove(m, &csum); | |
2598 | } | |
2599 | /* if the checksum is valid, mark it in the mbuf header */ | |
2600 | if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { | |
2601 | if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) | |
2602 | return; | |
2603 | /* otherwise, it was a UDP frame, or a TCP frame which | |
2604 | we could not do LRO on. Tell the stack that the | |
2605 | checksum is good */ | |
2606 | m->m_pkthdr.csum_data = 0xffff; | |
2607 | m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; | |
2608 | } | |
2609 | /* flowid only valid if RSS hashing is enabled */ | |
2610 | if (sc->num_slices > 1) { | |
2611 | m->m_pkthdr.flowid = (ss - sc->ss); | |
2612 | m->m_flags |= M_FLOWID; | |
2613 | } | |
2614 | /* pass the frame up the stack */ | |
2615 | (*ifp->if_input)(ifp, m); | |
2616 | } | |
2617 | ||
2618 | static inline void | |
2619 | mxge_clean_rx_done(struct mxge_slice_state *ss) | |
2620 | { | |
2621 | mxge_rx_done_t *rx_done = &ss->rx_done; | |
2622 | int limit = 0; | |
2623 | uint16_t length; | |
2624 | uint16_t checksum; | |
2625 | ||
2626 | ||
2627 | while (rx_done->entry[rx_done->idx].length != 0) { | |
2628 | length = ntohs(rx_done->entry[rx_done->idx].length); | |
2629 | rx_done->entry[rx_done->idx].length = 0; | |
2630 | checksum = rx_done->entry[rx_done->idx].checksum; | |
2631 | if (length <= (MHLEN - MXGEFW_PAD)) | |
2632 | mxge_rx_done_small(ss, length, checksum); | |
2633 | else | |
2634 | mxge_rx_done_big(ss, length, checksum); | |
2635 | rx_done->cnt++; | |
2636 | rx_done->idx = rx_done->cnt & rx_done->mask; | |
2637 | ||
2638 | /* limit potential for livelock */ | |
2639 | if (__predict_false(++limit > rx_done->mask / 2)) | |
2640 | break; | |
2641 | } | |
2642 | #ifdef INET | |
2643 | while (!SLIST_EMPTY(&ss->lro_active)) { | |
2644 | struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); | |
2645 | SLIST_REMOVE_HEAD(&ss->lro_active, next); | |
2646 | mxge_lro_flush(ss, lro); | |
2647 | } | |
2648 | #endif | |
2649 | } | |
2650 | ||
2651 | ||
2652 | static inline void | |
2653 | mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) | |
2654 | { | |
2655 | struct ifnet *ifp; | |
2656 | mxge_tx_ring_t *tx; | |
2657 | struct mbuf *m; | |
2658 | bus_dmamap_t map; | |
2659 | int idx; | |
2660 | int *flags; | |
2661 | ||
2662 | tx = &ss->tx; | |
2663 | ifp = ss->sc->ifp; | |
2664 | while (tx->pkt_done != mcp_idx) { | |
2665 | idx = tx->done & tx->mask; | |
2666 | tx->done++; | |
2667 | m = tx->info[idx].m; | |
2668 | /* mbuf and DMA map only attached to the first | |
2669 | segment per-mbuf */ | |
2670 | if (m != NULL) { | |
2671 | ss->obytes += m->m_pkthdr.len; | |
2672 | if (m->m_flags & M_MCAST) | |
2673 | ss->omcasts++; | |
2674 | ss->opackets++; | |
2675 | tx->info[idx].m = NULL; | |
2676 | map = tx->info[idx].map; | |
2677 | bus_dmamap_unload(tx->dmat, map); | |
2678 | m_freem(m); | |
2679 | } | |
2680 | if (tx->info[idx].flag) { | |
2681 | tx->info[idx].flag = 0; | |
2682 | tx->pkt_done++; | |
2683 | } | |
2684 | } | |
2685 | ||
2686 | /* If we have space, clear IFF_OACTIVE to tell the stack that | |
2687 | its OK to send packets */ | |
2688 | #ifdef IFNET_BUF_RING | |
2689 | flags = &ss->if_drv_flags; | |
2690 | #else | |
2691 | flags = &ifp->if_drv_flags; | |
2692 | #endif | |
e8a47a7f | 2693 | lockmgr(&ss->tx.lock, LK_EXCLUSIVE); |
8892ea20 AE |
2694 | if ((*flags) & IFF_DRV_OACTIVE && |
2695 | tx->req - tx->done < (tx->mask + 1)/4) { | |
2696 | *(flags) &= ~IFF_DRV_OACTIVE; | |
2697 | ss->tx.wake++; | |
2698 | mxge_start_locked(ss); | |
2699 | } | |
2700 | #ifdef IFNET_BUF_RING | |
2701 | if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { | |
2702 | /* let the NIC stop polling this queue, since there | |
2703 | * are no more transmits pending */ | |
2704 | if (tx->req == tx->done) { | |
2705 | *tx->send_stop = 1; | |
2706 | tx->queue_active = 0; | |
2707 | tx->deactivate++; | |
2708 | wmb(); | |
2709 | } | |
2710 | } | |
2711 | #endif | |
e8a47a7f | 2712 | lockmgr(&ss->tx.lock, LK_RELEASE); |
8892ea20 AE |
2713 | |
2714 | } | |
2715 | ||
2716 | static struct mxge_media_type mxge_xfp_media_types[] = | |
2717 | { | |
2718 | {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, | |
2719 | {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, | |
2720 | {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, | |
2721 | {0, (1 << 5), "10GBASE-ER"}, | |
2722 | {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, | |
2723 | {0, (1 << 3), "10GBASE-SW"}, | |
2724 | {0, (1 << 2), "10GBASE-LW"}, | |
2725 | {0, (1 << 1), "10GBASE-EW"}, | |
2726 | {0, (1 << 0), "Reserved"} | |
2727 | }; | |
2728 | static struct mxge_media_type mxge_sfp_media_types[] = | |
2729 | { | |
2730 | {0, (1 << 7), "Reserved"}, | |
2731 | {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, | |
2732 | {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, | |
2733 | {IFM_10G_SR, (1 << 4), "10GBASE-SR"} | |
2734 | }; | |
2735 | ||
2736 | static void | |
2737 | mxge_set_media(mxge_softc_t *sc, int type) | |
2738 | { | |
2739 | sc->media_flags |= type; | |
2740 | ifmedia_add(&sc->media, sc->media_flags, 0, NULL); | |
2741 | ifmedia_set(&sc->media, sc->media_flags); | |
2742 | } | |
2743 | ||
2744 | ||
2745 | /* | |
2746 | * Determine the media type for a NIC. Some XFPs will identify | |
2747 | * themselves only when their link is up, so this is initiated via a | |
2748 | * link up interrupt. However, this can potentially take up to | |
2749 | * several milliseconds, so it is run via the watchdog routine, rather | |
2750 | * than in the interrupt handler itself. This need only be done | |
2751 | * once, not each time the link is up. | |
2752 | */ | |
2753 | static void | |
2754 | mxge_media_probe(mxge_softc_t *sc) | |
2755 | { | |
2756 | mxge_cmd_t cmd; | |
2757 | char *cage_type; | |
2758 | char *ptr; | |
2759 | struct mxge_media_type *mxge_media_types = NULL; | |
2760 | int i, err, ms, mxge_media_type_entries; | |
2761 | uint32_t byte; | |
2762 | ||
2763 | sc->need_media_probe = 0; | |
2764 | ||
2765 | /* if we've already set a media type, we're done */ | |
2766 | if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) | |
2767 | return; | |
2768 | ||
2769 | /* | |
2770 | * parse the product code to deterimine the interface type | |
2771 | * (CX4, XFP, Quad Ribbon Fiber) by looking at the character | |
2772 | * after the 3rd dash in the driver's cached copy of the | |
2773 | * EEPROM's product code string. | |
2774 | */ | |
2775 | ptr = sc->product_code_string; | |
2776 | if (ptr == NULL) { | |
2777 | device_printf(sc->dev, "Missing product code\n"); | |
2778 | } | |
2779 | ||
2780 | for (i = 0; i < 3; i++, ptr++) { | |
2781 | ptr = index(ptr, '-'); | |
2782 | if (ptr == NULL) { | |
2783 | device_printf(sc->dev, | |
2784 | "only %d dashes in PC?!?\n", i); | |
2785 | return; | |
2786 | } | |
2787 | } | |
2788 | if (*ptr == 'C') { | |
2789 | /* -C is CX4 */ | |
2790 | mxge_set_media(sc, IFM_10G_CX4); | |
2791 | return; | |
2792 | } | |
2793 | else if (*ptr == 'Q') { | |
2794 | /* -Q is Quad Ribbon Fiber */ | |
2795 | device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); | |
2796 | /* FreeBSD has no media type for Quad ribbon fiber */ | |
2797 | return; | |
2798 | } | |
2799 | ||
2800 | if (*ptr == 'R') { | |
2801 | /* -R is XFP */ | |
2802 | mxge_media_types = mxge_xfp_media_types; | |
2803 | mxge_media_type_entries = | |
2804 | sizeof (mxge_xfp_media_types) / | |
2805 | sizeof (mxge_xfp_media_types[0]); | |
2806 | byte = MXGE_XFP_COMPLIANCE_BYTE; | |
2807 | cage_type = "XFP"; | |
2808 | } | |
2809 | ||
2810 | if (*ptr == 'S' || *(ptr +1) == 'S') { | |
2811 | /* -S or -2S is SFP+ */ | |
2812 | mxge_media_types = mxge_sfp_media_types; | |
2813 | mxge_media_type_entries = | |
2814 | sizeof (mxge_sfp_media_types) / | |
2815 | sizeof (mxge_sfp_media_types[0]); | |
2816 | cage_type = "SFP+"; | |
2817 | byte = 3; | |
2818 | } | |
2819 | ||
2820 | if (mxge_media_types == NULL) { | |
2821 | device_printf(sc->dev, "Unknown media type: %c\n", *ptr); | |
2822 | return; | |
2823 | } | |
2824 | ||
2825 | /* | |
2826 | * At this point we know the NIC has an XFP cage, so now we | |
2827 | * try to determine what is in the cage by using the | |
2828 | * firmware's XFP I2C commands to read the XFP 10GbE compilance | |
2829 | * register. We read just one byte, which may take over | |
2830 | * a millisecond | |
2831 | */ | |
2832 | ||
2833 | cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ | |
2834 | cmd.data1 = byte; | |
2835 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); | |
2836 | if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { | |
2837 | device_printf(sc->dev, "failed to read XFP\n"); | |
2838 | } | |
2839 | if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { | |
2840 | device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); | |
2841 | } | |
2842 | if (err != MXGEFW_CMD_OK) { | |
2843 | return; | |
2844 | } | |
2845 | ||
2846 | /* now we wait for the data to be cached */ | |
2847 | cmd.data0 = byte; | |
2848 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); | |
2849 | for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { | |
2850 | DELAY(1000); | |
2851 | cmd.data0 = byte; | |
2852 | err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); | |
2853 | } | |
2854 | if (err != MXGEFW_CMD_OK) { | |
2855 | device_printf(sc->dev, "failed to read %s (%d, %dms)\n", | |
2856 | cage_type, err, ms); | |
2857 | return; | |
2858 | } | |
2859 | ||
2860 | if (cmd.data0 == mxge_media_types[0].bitmask) { | |
2861 | if (mxge_verbose) | |
2862 | device_printf(sc->dev, "%s:%s\n", cage_type, | |
2863 | mxge_media_types[0].name); | |
2864 | mxge_set_media(sc, IFM_10G_CX4); | |
2865 | return; | |
2866 | } | |
2867 | for (i = 1; i < mxge_media_type_entries; i++) { | |
2868 | if (cmd.data0 & mxge_media_types[i].bitmask) { | |
2869 | if (mxge_verbose) | |
2870 | device_printf(sc->dev, "%s:%s\n", | |
2871 | cage_type, | |
2872 | mxge_media_types[i].name); | |
2873 | ||
2874 | mxge_set_media(sc, mxge_media_types[i].flag); | |
2875 | return; | |
2876 | } | |
2877 | } | |
2878 | device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, | |
2879 | cmd.data0); | |
2880 | ||
2881 | return; | |
2882 | } | |
2883 | ||
2884 | static void | |
2885 | mxge_intr(void *arg) | |
2886 | { | |
2887 | struct mxge_slice_state *ss = arg; | |
2888 | mxge_softc_t *sc = ss->sc; | |
2889 | mcp_irq_data_t *stats = ss->fw_stats; | |
2890 | mxge_tx_ring_t *tx = &ss->tx; | |
2891 | mxge_rx_done_t *rx_done = &ss->rx_done; | |
2892 | uint32_t send_done_count; | |
2893 | uint8_t valid; | |
2894 | ||
2895 | ||
2896 | #ifndef IFNET_BUF_RING | |
2897 | /* an interrupt on a non-zero slice is implicitly valid | |
2898 | since MSI-X irqs are not shared */ | |
2899 | if (ss != sc->ss) { | |
2900 | mxge_clean_rx_done(ss); | |
2901 | *ss->irq_claim = be32toh(3); | |
2902 | return; | |
2903 | } | |
2904 | #endif | |
2905 | ||
2906 | /* make sure the DMA has finished */ | |
2907 | if (!stats->valid) { | |
2908 | return; | |
2909 | } | |
2910 | valid = stats->valid; | |
2911 | ||
2912 | if (sc->legacy_irq) { | |
2913 | /* lower legacy IRQ */ | |
2914 | *sc->irq_deassert = 0; | |
2915 | if (!mxge_deassert_wait) | |
2916 | /* don't wait for conf. that irq is low */ | |
2917 | stats->valid = 0; | |
2918 | } else { | |
2919 | stats->valid = 0; | |
2920 | } | |
2921 | ||
2922 | /* loop while waiting for legacy irq deassertion */ | |
2923 | do { | |
2924 | /* check for transmit completes and receives */ | |
2925 | send_done_count = be32toh(stats->send_done_count); | |
2926 | while ((send_done_count != tx->pkt_done) || | |
2927 | (rx_done->entry[rx_done->idx].length != 0)) { | |
2928 | if (send_done_count != tx->pkt_done) | |
2929 | mxge_tx_done(ss, (int)send_done_count); | |
2930 | mxge_clean_rx_done(ss); | |
2931 | send_done_count = be32toh(stats->send_done_count); | |
2932 | } | |
2933 | if (sc->legacy_irq && mxge_deassert_wait) | |
2934 | wmb(); | |
2935 | } while (*((volatile uint8_t *) &stats->valid)); | |
2936 | ||
2937 | /* fw link & error stats meaningful only on the first slice */ | |
2938 | if (__predict_false((ss == sc->ss) && stats->stats_updated)) { | |
2939 | if (sc->link_state != stats->link_up) { | |
2940 | sc->link_state = stats->link_up; | |
2941 | if (sc->link_state) { | |
73a22abe AE |
2942 | sc->ifp->if_link_state = LINK_STATE_UP; |
2943 | if_link_state_change(sc->ifp); | |
8892ea20 AE |
2944 | if (mxge_verbose) |
2945 | device_printf(sc->dev, "link up\n"); | |
2946 | } else { | |
73a22abe AE |
2947 | sc->ifp->if_link_state = LINK_STATE_DOWN; |
2948 | if_link_state_change(sc->ifp); | |
8892ea20 AE |
2949 | if (mxge_verbose) |
2950 | device_printf(sc->dev, "link down\n"); | |
2951 | } | |
2952 | sc->need_media_probe = 1; | |
2953 | } | |
2954 | if (sc->rdma_tags_available != | |
2955 | be32toh(stats->rdma_tags_available)) { | |
2956 | sc->rdma_tags_available = | |
2957 | be32toh(stats->rdma_tags_available); | |
2958 | device_printf(sc->dev, "RDMA timed out! %d tags " | |
2959 | "left\n", sc->rdma_tags_available); | |
2960 | } | |
2961 | ||
2962 | if (stats->link_down) { | |
2963 | sc->down_cnt += stats->link_down; | |
2964 | sc->link_state = 0; | |
2965 | if_link_state_change(sc->ifp, LINK_STATE_DOWN); | |
2966 | } | |
2967 | } | |
2968 | ||
2969 | /* check to see if we have rx token to pass back */ | |
2970 | if (valid & 0x1) | |
2971 | *ss->irq_claim = be32toh(3); | |
2972 | *(ss->irq_claim + 1) = be32toh(3); | |
2973 | } | |
2974 | ||
2975 | static void | |
2976 | mxge_init(void *arg) | |
2977 | { | |
2978 | } | |
2979 | ||
2980 | ||
2981 | ||
2982 | static void | |
2983 | mxge_free_slice_mbufs(struct mxge_slice_state *ss) | |
2984 | { | |
2985 | struct lro_entry *lro_entry; | |
2986 | int i; | |
2987 | ||
2988 | while (!SLIST_EMPTY(&ss->lro_free)) { | |
2989 | lro_entry = SLIST_FIRST(&ss->lro_free); | |
2990 | SLIST_REMOVE_HEAD(&ss->lro_free, next); | |
d777b84f | 2991 | kfree(lro_entry, M_DEVBUF); |
8892ea20 AE |
2992 | } |
2993 | ||
2994 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
2995 | if (ss->rx_big.info[i].m == NULL) | |
2996 | continue; | |
2997 | bus_dmamap_unload(ss->rx_big.dmat, | |
2998 | ss->rx_big.info[i].map); | |
2999 | m_freem(ss->rx_big.info[i].m); | |
3000 | ss->rx_big.info[i].m = NULL; | |
3001 | } | |
3002 | ||
3003 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3004 | if (ss->rx_small.info[i].m == NULL) | |
3005 | continue; | |
3006 | bus_dmamap_unload(ss->rx_small.dmat, | |
3007 | ss->rx_small.info[i].map); | |
3008 | m_freem(ss->rx_small.info[i].m); | |
3009 | ss->rx_small.info[i].m = NULL; | |
3010 | } | |
3011 | ||
3012 | /* transmit ring used only on the first slice */ | |
3013 | if (ss->tx.info == NULL) | |
3014 | return; | |
3015 | ||
3016 | for (i = 0; i <= ss->tx.mask; i++) { | |
3017 | ss->tx.info[i].flag = 0; | |
3018 | if (ss->tx.info[i].m == NULL) | |
3019 | continue; | |
3020 | bus_dmamap_unload(ss->tx.dmat, | |
3021 | ss->tx.info[i].map); | |
3022 | m_freem(ss->tx.info[i].m); | |
3023 | ss->tx.info[i].m = NULL; | |
3024 | } | |
3025 | } | |
3026 | ||
3027 | static void | |
3028 | mxge_free_mbufs(mxge_softc_t *sc) | |
3029 | { | |
3030 | int slice; | |
3031 | ||
3032 | for (slice = 0; slice < sc->num_slices; slice++) | |
3033 | mxge_free_slice_mbufs(&sc->ss[slice]); | |
3034 | } | |
3035 | ||
3036 | static void | |
3037 | mxge_free_slice_rings(struct mxge_slice_state *ss) | |
3038 | { | |
3039 | int i; | |
3040 | ||
3041 | ||
3042 | if (ss->rx_done.entry != NULL) | |
3043 | mxge_dma_free(&ss->rx_done.dma); | |
3044 | ss->rx_done.entry = NULL; | |
3045 | ||
3046 | if (ss->tx.req_bytes != NULL) | |
d777b84f | 3047 | kfree(ss->tx.req_bytes, M_DEVBUF); |
8892ea20 AE |
3048 | ss->tx.req_bytes = NULL; |
3049 | ||
3050 | if (ss->tx.seg_list != NULL) | |
d777b84f | 3051 | kfree(ss->tx.seg_list, M_DEVBUF); |
8892ea20 AE |
3052 | ss->tx.seg_list = NULL; |
3053 | ||
3054 | if (ss->rx_small.shadow != NULL) | |
d777b84f | 3055 | kfree(ss->rx_small.shadow, M_DEVBUF); |
8892ea20 AE |
3056 | ss->rx_small.shadow = NULL; |
3057 | ||
3058 | if (ss->rx_big.shadow != NULL) | |
d777b84f | 3059 | kfree(ss->rx_big.shadow, M_DEVBUF); |
8892ea20 AE |
3060 | ss->rx_big.shadow = NULL; |
3061 | ||
3062 | if (ss->tx.info != NULL) { | |
3063 | if (ss->tx.dmat != NULL) { | |
3064 | for (i = 0; i <= ss->tx.mask; i++) { | |
3065 | bus_dmamap_destroy(ss->tx.dmat, | |
3066 | ss->tx.info[i].map); | |
3067 | } | |
3068 | bus_dma_tag_destroy(ss->tx.dmat); | |
3069 | } | |
d777b84f | 3070 | kfree(ss->tx.info, M_DEVBUF); |
8892ea20 AE |
3071 | } |
3072 | ss->tx.info = NULL; | |
3073 | ||
3074 | if (ss->rx_small.info != NULL) { | |
3075 | if (ss->rx_small.dmat != NULL) { | |
3076 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3077 | bus_dmamap_destroy(ss->rx_small.dmat, | |
3078 | ss->rx_small.info[i].map); | |
3079 | } | |
3080 | bus_dmamap_destroy(ss->rx_small.dmat, | |
3081 | ss->rx_small.extra_map); | |
3082 | bus_dma_tag_destroy(ss->rx_small.dmat); | |
3083 | } | |
d777b84f | 3084 | kfree(ss->rx_small.info, M_DEVBUF); |
8892ea20 AE |
3085 | } |
3086 | ss->rx_small.info = NULL; | |
3087 | ||
3088 | if (ss->rx_big.info != NULL) { | |
3089 | if (ss->rx_big.dmat != NULL) { | |
3090 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3091 | bus_dmamap_destroy(ss->rx_big.dmat, | |
3092 | ss->rx_big.info[i].map); | |
3093 | } | |
3094 | bus_dmamap_destroy(ss->rx_big.dmat, | |
3095 | ss->rx_big.extra_map); | |
3096 | bus_dma_tag_destroy(ss->rx_big.dmat); | |
3097 | } | |
d777b84f | 3098 | kfree(ss->rx_big.info, M_DEVBUF); |
8892ea20 AE |
3099 | } |
3100 | ss->rx_big.info = NULL; | |
3101 | } | |
3102 | ||
3103 | static void | |
3104 | mxge_free_rings(mxge_softc_t *sc) | |
3105 | { | |
3106 | int slice; | |
3107 | ||
3108 | for (slice = 0; slice < sc->num_slices; slice++) | |
3109 | mxge_free_slice_rings(&sc->ss[slice]); | |
3110 | } | |
3111 | ||
3112 | static int | |
3113 | mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, | |
3114 | int tx_ring_entries) | |
3115 | { | |
3116 | mxge_softc_t *sc = ss->sc; | |
3117 | size_t bytes; | |
3118 | int err, i; | |
3119 | ||
3120 | err = ENOMEM; | |
3121 | ||
3122 | /* allocate per-slice receive resources */ | |
3123 | ||
3124 | ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; | |
3125 | ss->rx_done.mask = (2 * rx_ring_entries) - 1; | |
3126 | ||
3127 | /* allocate the rx shadow rings */ | |
3128 | bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); | |
d777b84f | 3129 | ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); |
8892ea20 AE |
3130 | if (ss->rx_small.shadow == NULL) |
3131 | return err;; | |
3132 | ||
3133 | bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); | |
d777b84f | 3134 | ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); |
8892ea20 AE |
3135 | if (ss->rx_big.shadow == NULL) |
3136 | return err;; | |
3137 | ||
3138 | /* allocate the rx host info rings */ | |
3139 | bytes = rx_ring_entries * sizeof (*ss->rx_small.info); | |
d777b84f | 3140 | ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); |
8892ea20 AE |
3141 | if (ss->rx_small.info == NULL) |
3142 | return err;; | |
3143 | ||
3144 | bytes = rx_ring_entries * sizeof (*ss->rx_big.info); | |
d777b84f | 3145 | ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); |
8892ea20 AE |
3146 | if (ss->rx_big.info == NULL) |
3147 | return err;; | |
3148 | ||
3149 | /* allocate the rx busdma resources */ | |
3150 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3151 | 1, /* alignment */ | |
3152 | 4096, /* boundary */ | |
3153 | BUS_SPACE_MAXADDR, /* low */ | |
3154 | BUS_SPACE_MAXADDR, /* high */ | |
3155 | NULL, NULL, /* filter */ | |
3156 | MHLEN, /* maxsize */ | |
3157 | 1, /* num segs */ | |
3158 | MHLEN, /* maxsegsize */ | |
3159 | BUS_DMA_ALLOCNOW, /* flags */ | |
3160 | NULL, NULL, /* lock */ | |
3161 | &ss->rx_small.dmat); /* tag */ | |
3162 | if (err != 0) { | |
3163 | device_printf(sc->dev, "Err %d allocating rx_small dmat\n", | |
3164 | err); | |
3165 | return err;; | |
3166 | } | |
3167 | ||
3168 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3169 | 1, /* alignment */ | |
3170 | #if MXGE_VIRT_JUMBOS | |
3171 | 4096, /* boundary */ | |
3172 | #else | |
3173 | 0, /* boundary */ | |
3174 | #endif | |
3175 | BUS_SPACE_MAXADDR, /* low */ | |
3176 | BUS_SPACE_MAXADDR, /* high */ | |
3177 | NULL, NULL, /* filter */ | |
3178 | 3*4096, /* maxsize */ | |
3179 | #if MXGE_VIRT_JUMBOS | |
3180 | 3, /* num segs */ | |
3181 | 4096, /* maxsegsize*/ | |
3182 | #else | |
3183 | 1, /* num segs */ | |
3184 | MJUM9BYTES, /* maxsegsize*/ | |
3185 | #endif | |
3186 | BUS_DMA_ALLOCNOW, /* flags */ | |
3187 | NULL, NULL, /* lock */ | |
3188 | &ss->rx_big.dmat); /* tag */ | |
3189 | if (err != 0) { | |
3190 | device_printf(sc->dev, "Err %d allocating rx_big dmat\n", | |
3191 | err); | |
3192 | return err;; | |
3193 | } | |
3194 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3195 | err = bus_dmamap_create(ss->rx_small.dmat, 0, | |
3196 | &ss->rx_small.info[i].map); | |
3197 | if (err != 0) { | |
3198 | device_printf(sc->dev, "Err %d rx_small dmamap\n", | |
3199 | err); | |
3200 | return err;; | |
3201 | } | |
3202 | } | |
3203 | err = bus_dmamap_create(ss->rx_small.dmat, 0, | |
3204 | &ss->rx_small.extra_map); | |
3205 | if (err != 0) { | |
3206 | device_printf(sc->dev, "Err %d extra rx_small dmamap\n", | |
3207 | err); | |
3208 | return err;; | |
3209 | } | |
3210 | ||
3211 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3212 | err = bus_dmamap_create(ss->rx_big.dmat, 0, | |
3213 | &ss->rx_big.info[i].map); | |
3214 | if (err != 0) { | |
3215 | device_printf(sc->dev, "Err %d rx_big dmamap\n", | |
3216 | err); | |
3217 | return err;; | |
3218 | } | |
3219 | } | |
3220 | err = bus_dmamap_create(ss->rx_big.dmat, 0, | |
3221 | &ss->rx_big.extra_map); | |
3222 | if (err != 0) { | |
3223 | device_printf(sc->dev, "Err %d extra rx_big dmamap\n", | |
3224 | err); | |
3225 | return err;; | |
3226 | } | |
3227 | ||
3228 | /* now allocate TX resouces */ | |
3229 | ||
3230 | #ifndef IFNET_BUF_RING | |
3231 | /* only use a single TX ring for now */ | |
3232 | if (ss != ss->sc->ss) | |
3233 | return 0; | |
3234 | #endif | |
3235 | ||
3236 | ss->tx.mask = tx_ring_entries - 1; | |
3237 | ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); | |
3238 | ||
3239 | ||
3240 | /* allocate the tx request copy block */ | |
3241 | bytes = 8 + | |
3242 | sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); | |
d777b84f | 3243 | ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK); |
8892ea20 AE |
3244 | if (ss->tx.req_bytes == NULL) |
3245 | return err;; | |
3246 | /* ensure req_list entries are aligned to 8 bytes */ | |
3247 | ss->tx.req_list = (mcp_kreq_ether_send_t *) | |
3248 | ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); | |
3249 | ||
3250 | /* allocate the tx busdma segment list */ | |
3251 | bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; | |
3252 | ss->tx.seg_list = (bus_dma_segment_t *) | |
d777b84f | 3253 | kmalloc(bytes, M_DEVBUF, M_WAITOK); |
8892ea20 AE |
3254 | if (ss->tx.seg_list == NULL) |
3255 | return err;; | |
3256 | ||
3257 | /* allocate the tx host info ring */ | |
3258 | bytes = tx_ring_entries * sizeof (*ss->tx.info); | |
d777b84f | 3259 | ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); |
8892ea20 AE |
3260 | if (ss->tx.info == NULL) |
3261 | return err;; | |
3262 | ||
3263 | /* allocate the tx busdma resources */ | |
3264 | err = bus_dma_tag_create(sc->parent_dmat, /* parent */ | |
3265 | 1, /* alignment */ | |
3266 | sc->tx_boundary, /* boundary */ | |
3267 | BUS_SPACE_MAXADDR, /* low */ | |
3268 | BUS_SPACE_MAXADDR, /* high */ | |
3269 | NULL, NULL, /* filter */ | |
3270 | 65536 + 256, /* maxsize */ | |
3271 | ss->tx.max_desc - 2, /* num segs */ | |
3272 | sc->tx_boundary, /* maxsegsz */ | |
3273 | BUS_DMA_ALLOCNOW, /* flags */ | |
3274 | NULL, NULL, /* lock */ | |
3275 | &ss->tx.dmat); /* tag */ | |
3276 | ||
3277 | if (err != 0) { | |
3278 | device_printf(sc->dev, "Err %d allocating tx dmat\n", | |
3279 | err); | |
3280 | return err;; | |
3281 | } | |
3282 | ||
3283 | /* now use these tags to setup dmamaps for each slot | |
3284 | in the ring */ | |
3285 | for (i = 0; i <= ss->tx.mask; i++) { | |
3286 | err = bus_dmamap_create(ss->tx.dmat, 0, | |
3287 | &ss->tx.info[i].map); | |
3288 | if (err != 0) { | |
3289 | device_printf(sc->dev, "Err %d tx dmamap\n", | |
3290 | err); | |
3291 | return err;; | |
3292 | } | |
3293 | } | |
3294 | return 0; | |
3295 | ||
3296 | } | |
3297 | ||
3298 | static int | |
3299 | mxge_alloc_rings(mxge_softc_t *sc) | |
3300 | { | |
3301 | mxge_cmd_t cmd; | |
3302 | int tx_ring_size; | |
3303 | int tx_ring_entries, rx_ring_entries; | |
3304 | int err, slice; | |
3305 | ||
3306 | /* get ring sizes */ | |
3307 | err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); | |
3308 | tx_ring_size = cmd.data0; | |
3309 | if (err != 0) { | |
3310 | device_printf(sc->dev, "Cannot determine tx ring sizes\n"); | |
3311 | goto abort; | |
3312 | } | |
3313 | ||
3314 | tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); | |
3315 | rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); | |
3316 | IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); | |
3317 | sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; | |
3318 | IFQ_SET_READY(&sc->ifp->if_snd); | |
3319 | ||
3320 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3321 | err = mxge_alloc_slice_rings(&sc->ss[slice], | |
3322 | rx_ring_entries, | |
3323 | tx_ring_entries); | |
3324 | if (err != 0) | |
3325 | goto abort; | |
3326 | } | |
3327 | return 0; | |
3328 | ||
3329 | abort: | |
3330 | mxge_free_rings(sc); | |
3331 | return err; | |
3332 | ||
3333 | } | |
3334 | ||
3335 | ||
3336 | static void | |
3337 | mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) | |
3338 | { | |
3339 | int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; | |
3340 | ||
3341 | if (bufsize < MCLBYTES) { | |
3342 | /* easy, everything fits in a single buffer */ | |
3343 | *big_buf_size = MCLBYTES; | |
3344 | *cl_size = MCLBYTES; | |
3345 | *nbufs = 1; | |
3346 | return; | |
3347 | } | |
3348 | ||
3349 | if (bufsize < MJUMPAGESIZE) { | |
3350 | /* still easy, everything still fits in a single buffer */ | |
3351 | *big_buf_size = MJUMPAGESIZE; | |
3352 | *cl_size = MJUMPAGESIZE; | |
3353 | *nbufs = 1; | |
3354 | return; | |
3355 | } | |
3356 | #if MXGE_VIRT_JUMBOS | |
3357 | /* now we need to use virtually contiguous buffers */ | |
3358 | *cl_size = MJUM9BYTES; | |
3359 | *big_buf_size = 4096; | |
3360 | *nbufs = mtu / 4096 + 1; | |
3361 | /* needs to be a power of two, so round up */ | |
3362 | if (*nbufs == 3) | |
3363 | *nbufs = 4; | |
3364 | #else | |
3365 | *cl_size = MJUM9BYTES; | |
3366 | *big_buf_size = MJUM9BYTES; | |
3367 | *nbufs = 1; | |
3368 | #endif | |
3369 | } | |
3370 | ||
3371 | static int | |
3372 | mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) | |
3373 | { | |
3374 | mxge_softc_t *sc; | |
3375 | mxge_cmd_t cmd; | |
3376 | bus_dmamap_t map; | |
3377 | struct lro_entry *lro_entry; | |
3378 | int err, i, slice; | |
3379 | ||
3380 | ||
3381 | sc = ss->sc; | |
3382 | slice = ss - sc->ss; | |
3383 | ||
3384 | SLIST_INIT(&ss->lro_free); | |
3385 | SLIST_INIT(&ss->lro_active); | |
3386 | ||
3387 | for (i = 0; i < sc->lro_cnt; i++) { | |
3388 | lro_entry = (struct lro_entry *) | |
d777b84f | 3389 | kmalloc(sizeof (*lro_entry), M_DEVBUF, |
8892ea20 AE |
3390 | M_NOWAIT | M_ZERO); |
3391 | if (lro_entry == NULL) { | |
3392 | sc->lro_cnt = i; | |
3393 | break; | |
3394 | } | |
3395 | SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); | |
3396 | } | |
3397 | /* get the lanai pointers to the send and receive rings */ | |
3398 | ||
3399 | err = 0; | |
3400 | #ifndef IFNET_BUF_RING | |
3401 | /* We currently only send from the first slice */ | |
3402 | if (slice == 0) { | |
3403 | #endif | |
3404 | cmd.data0 = slice; | |
3405 | err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); | |
3406 | ss->tx.lanai = | |
3407 | (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); | |
3408 | ss->tx.send_go = (volatile uint32_t *) | |
3409 | (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); | |
3410 | ss->tx.send_stop = (volatile uint32_t *) | |
3411 | (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); | |
3412 | #ifndef IFNET_BUF_RING | |
3413 | } | |
3414 | #endif | |
3415 | cmd.data0 = slice; | |
3416 | err |= mxge_send_cmd(sc, | |
3417 | MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); | |
3418 | ss->rx_small.lanai = | |
3419 | (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); | |
3420 | cmd.data0 = slice; | |
3421 | err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); | |
3422 | ss->rx_big.lanai = | |
3423 | (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); | |
3424 | ||
3425 | if (err != 0) { | |
3426 | device_printf(sc->dev, | |
3427 | "failed to get ring sizes or locations\n"); | |
3428 | return EIO; | |
3429 | } | |
3430 | ||
3431 | /* stock receive rings */ | |
3432 | for (i = 0; i <= ss->rx_small.mask; i++) { | |
3433 | map = ss->rx_small.info[i].map; | |
3434 | err = mxge_get_buf_small(ss, map, i); | |
3435 | if (err) { | |
3436 | device_printf(sc->dev, "alloced %d/%d smalls\n", | |
3437 | i, ss->rx_small.mask + 1); | |
3438 | return ENOMEM; | |
3439 | } | |
3440 | } | |
3441 | for (i = 0; i <= ss->rx_big.mask; i++) { | |
3442 | ss->rx_big.shadow[i].addr_low = 0xffffffff; | |
3443 | ss->rx_big.shadow[i].addr_high = 0xffffffff; | |
3444 | } | |
3445 | ss->rx_big.nbufs = nbufs; | |
3446 | ss->rx_big.cl_size = cl_size; | |
3447 | ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + | |
3448 | ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; | |
3449 | for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { | |
3450 | map = ss->rx_big.info[i].map; | |
3451 | err = mxge_get_buf_big(ss, map, i); | |
3452 | if (err) { | |
3453 | device_printf(sc->dev, "alloced %d/%d bigs\n", | |
3454 | i, ss->rx_big.mask + 1); | |
3455 | return ENOMEM; | |
3456 | } | |
3457 | } | |
3458 | return 0; | |
3459 | } | |
3460 | ||
3461 | static int | |
3462 | mxge_open(mxge_softc_t *sc) | |
3463 | { | |
3464 | mxge_cmd_t cmd; | |
3465 | int err, big_bytes, nbufs, slice, cl_size, i; | |
3466 | bus_addr_t bus; | |
3467 | volatile uint8_t *itable; | |
3468 | struct mxge_slice_state *ss; | |
3469 | ||
3470 | /* Copy the MAC address in case it was overridden */ | |
3471 | bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); | |
3472 | ||
3473 | err = mxge_reset(sc, 1); | |
3474 | if (err != 0) { | |
3475 | device_printf(sc->dev, "failed to reset\n"); | |
3476 | return EIO; | |
3477 | } | |
3478 | ||
3479 | if (sc->num_slices > 1) { | |
3480 | /* setup the indirection table */ | |
3481 | cmd.data0 = sc->num_slices; | |
3482 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, | |
3483 | &cmd); | |
3484 | ||
3485 | err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, | |
3486 | &cmd); | |
3487 | if (err != 0) { | |
3488 | device_printf(sc->dev, | |
3489 | "failed to setup rss tables\n"); | |
3490 | return err; | |
3491 | } | |
3492 | ||
3493 | /* just enable an identity mapping */ | |
3494 | itable = sc->sram + cmd.data0; | |
3495 | for (i = 0; i < sc->num_slices; i++) | |
3496 | itable[i] = (uint8_t)i; | |
3497 | ||
3498 | cmd.data0 = 1; | |
3499 | cmd.data1 = mxge_rss_hash_type; | |
3500 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); | |
3501 | if (err != 0) { | |
3502 | device_printf(sc->dev, "failed to enable slices\n"); | |
3503 | return err; | |
3504 | } | |
3505 | } | |
3506 | ||
3507 | ||
3508 | mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); | |
3509 | ||
3510 | cmd.data0 = nbufs; | |
3511 | err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, | |
3512 | &cmd); | |
3513 | /* error is only meaningful if we're trying to set | |
3514 | MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ | |
3515 | if (err && nbufs > 1) { | |
3516 | device_printf(sc->dev, | |
3517 | "Failed to set alway-use-n to %d\n", | |
3518 | nbufs); | |
3519 | return EIO; | |
3520 | } | |
3521 | /* Give the firmware the mtu and the big and small buffer | |
3522 | sizes. The firmware wants the big buf size to be a power | |
3523 | of two. Luckily, FreeBSD's clusters are powers of two */ | |
3524 | cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; | |
3525 | err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); | |
3526 | cmd.data0 = MHLEN - MXGEFW_PAD; | |
3527 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, | |
3528 | &cmd); | |
3529 | cmd.data0 = big_bytes; | |
3530 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); | |
3531 | ||
3532 | if (err != 0) { | |
3533 | device_printf(sc->dev, "failed to setup params\n"); | |
3534 | goto abort; | |
3535 | } | |
3536 | ||
3537 | /* Now give him the pointer to the stats block */ | |
3538 | for (slice = 0; | |
3539 | #ifdef IFNET_BUF_RING | |
3540 | slice < sc->num_slices; | |
3541 | #else | |
3542 | slice < 1; | |
3543 | #endif | |
3544 | slice++) { | |
3545 | ss = &sc->ss[slice]; | |
3546 | cmd.data0 = | |
3547 | MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); | |
3548 | cmd.data1 = | |
3549 | MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); | |
3550 | cmd.data2 = sizeof(struct mcp_irq_data); | |
3551 | cmd.data2 |= (slice << 16); | |
3552 | err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); | |
3553 | } | |
3554 | ||
3555 | if (err != 0) { | |
3556 | bus = sc->ss->fw_stats_dma.bus_addr; | |
3557 | bus += offsetof(struct mcp_irq_data, send_done_count); | |
3558 | cmd.data0 = MXGE_LOWPART_TO_U32(bus); | |
3559 | cmd.data1 = MXGE_HIGHPART_TO_U32(bus); | |
3560 | err = mxge_send_cmd(sc, | |
3561 | MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, | |
3562 | &cmd); | |
3563 | /* Firmware cannot support multicast without STATS_DMA_V2 */ | |
3564 | sc->fw_multicast_support = 0; | |
3565 | } else { | |
3566 | sc->fw_multicast_support = 1; | |
3567 | } | |
3568 | ||
3569 | if (err != 0) { | |
3570 | device_printf(sc->dev, "failed to setup params\n"); | |
3571 | goto abort; | |
3572 | } | |
3573 | ||
3574 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3575 | err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); | |
3576 | if (err != 0) { | |
3577 | device_printf(sc->dev, "couldn't open slice %d\n", | |
3578 | slice); | |
3579 | goto abort; | |
3580 | } | |
3581 | } | |
3582 | ||
3583 | /* Finally, start the firmware running */ | |
3584 | err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); | |
3585 | if (err) { | |
3586 | device_printf(sc->dev, "Couldn't bring up link\n"); | |
3587 | goto abort; | |
3588 | } | |
3589 | #ifdef IFNET_BUF_RING | |
3590 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3591 | ss = &sc->ss[slice]; | |
3592 | ss->if_drv_flags |= IFF_DRV_RUNNING; | |
3593 | ss->if_drv_flags &= ~IFF_DRV_OACTIVE; | |
3594 | } | |
3595 | #endif | |
3596 | sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; | |
3597 | sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; | |
3598 | callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); | |
3599 | ||
3600 | return 0; | |
3601 | ||
3602 | ||
3603 | abort: | |
3604 | mxge_free_mbufs(sc); | |
3605 | ||
3606 | return err; | |
3607 | } | |
3608 | ||
3609 | static int | |
3610 | mxge_close(mxge_softc_t *sc) | |
3611 | { | |
3612 | mxge_cmd_t cmd; | |
3613 | int err, old_down_cnt; | |
3614 | #ifdef IFNET_BUF_RING | |
3615 | struct mxge_slice_state *ss; | |
3616 | int slice; | |
3617 | #endif | |
3618 | ||
3619 | callout_stop(&sc->co_hdl); | |
3620 | #ifdef IFNET_BUF_RING | |
3621 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3622 | ss = &sc->ss[slice]; | |
3623 | ss->if_drv_flags &= ~IFF_DRV_RUNNING; | |
3624 | } | |
3625 | #endif | |
3626 | sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; | |
3627 | old_down_cnt = sc->down_cnt; | |
3628 | wmb(); | |
3629 | err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); | |
3630 | if (err) { | |
3631 | device_printf(sc->dev, "Couldn't bring down link\n"); | |
3632 | } | |
3633 | if (old_down_cnt == sc->down_cnt) { | |
3634 | /* wait for down irq */ | |
3635 | DELAY(10 * sc->intr_coal_delay); | |
3636 | } | |
3637 | wmb(); | |
3638 | if (old_down_cnt == sc->down_cnt) { | |
3639 | device_printf(sc->dev, "never got down irq\n"); | |
3640 | } | |
3641 | ||
3642 | mxge_free_mbufs(sc); | |
3643 | ||
3644 | return 0; | |
3645 | } | |
3646 | ||
3647 | static void | |
3648 | mxge_setup_cfg_space(mxge_softc_t *sc) | |
3649 | { | |
3650 | device_t dev = sc->dev; | |
3651 | int reg; | |
3652 | uint16_t cmd, lnk, pectl; | |
3653 | ||
3654 | /* find the PCIe link width and set max read request to 4KB*/ | |
3655 | if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { | |
3656 | lnk = pci_read_config(dev, reg + 0x12, 2); | |
3657 | sc->link_width = (lnk >> 4) & 0x3f; | |
3658 | ||
3659 | pectl = pci_read_config(dev, reg + 0x8, 2); | |
3660 | pectl = (pectl & ~0x7000) | (5 << 12); | |
3661 | pci_write_config(dev, reg + 0x8, pectl, 2); | |
3662 | } | |
3663 | ||
3664 | /* Enable DMA and Memory space access */ | |
3665 | pci_enable_busmaster(dev); | |
3666 | cmd = pci_read_config(dev, PCIR_COMMAND, 2); | |
3667 | cmd |= PCIM_CMD_MEMEN; | |
3668 | pci_write_config(dev, PCIR_COMMAND, cmd, 2); | |
3669 | } | |
3670 | ||
3671 | static uint32_t | |
3672 | mxge_read_reboot(mxge_softc_t *sc) | |
3673 | { | |
3674 | device_t dev = sc->dev; | |
3675 | uint32_t vs; | |
3676 | ||
3677 | /* find the vendor specific offset */ | |
3678 | if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { | |
3679 | device_printf(sc->dev, | |
3680 | "could not find vendor specific offset\n"); | |
3681 | return (uint32_t)-1; | |
3682 | } | |
3683 | /* enable read32 mode */ | |
3684 | pci_write_config(dev, vs + 0x10, 0x3, 1); | |
3685 | /* tell NIC which register to read */ | |
3686 | pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); | |
3687 | return (pci_read_config(dev, vs + 0x14, 4)); | |
3688 | } | |
3689 | ||
3690 | static int | |
3691 | mxge_watchdog_reset(mxge_softc_t *sc, int slice) | |
3692 | { | |
3693 | struct pci_devinfo *dinfo; | |
3694 | mxge_tx_ring_t *tx; | |
3695 | int err; | |
3696 | uint32_t reboot; | |
3697 | uint16_t cmd; | |
3698 | ||
3699 | err = ENXIO; | |
3700 | ||
3701 | device_printf(sc->dev, "Watchdog reset!\n"); | |
3702 | ||
3703 | /* | |
3704 | * check to see if the NIC rebooted. If it did, then all of | |
3705 | * PCI config space has been reset, and things like the | |
3706 | * busmaster bit will be zero. If this is the case, then we | |
3707 | * must restore PCI config space before the NIC can be used | |
3708 | * again | |
3709 | */ | |
3710 | cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); | |
3711 | if (cmd == 0xffff) { | |
3712 | /* | |
3713 | * maybe the watchdog caught the NIC rebooting; wait | |
3714 | * up to 100ms for it to finish. If it does not come | |
3715 | * back, then give up | |
3716 | */ | |
3717 | DELAY(1000*100); | |
3718 | cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); | |
3719 | if (cmd == 0xffff) { | |
3720 | device_printf(sc->dev, "NIC disappeared!\n"); | |
3721 | return (err); | |
3722 | } | |
3723 | } | |
3724 | if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { | |
3725 | /* print the reboot status */ | |
3726 | reboot = mxge_read_reboot(sc); | |
3727 | device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", | |
3728 | reboot); | |
3729 | /* restore PCI configuration space */ | |
3730 | dinfo = device_get_ivars(sc->dev); | |
3731 | pci_cfg_restore(sc->dev, dinfo); | |
3732 | ||
3733 | /* and redo any changes we made to our config space */ | |
3734 | mxge_setup_cfg_space(sc); | |
3735 | ||
3736 | if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3737 | mxge_close(sc); | |
3738 | err = mxge_open(sc); | |
3739 | } | |
3740 | } else { | |
3741 | tx = &sc->ss[slice].tx; | |
3742 | device_printf(sc->dev, | |
3743 | "NIC did not reboot, slice %d ring state:\n", | |
3744 | slice); | |
3745 | device_printf(sc->dev, | |
3746 | "tx.req=%d tx.done=%d, tx.queue_active=%d\n", | |
3747 | tx->req, tx->done, tx->queue_active); | |
3748 | device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", | |
3749 | tx->activate, tx->deactivate); | |
3750 | device_printf(sc->dev, "pkt_done=%d fw=%d\n", | |
3751 | tx->pkt_done, | |
3752 | be32toh(sc->ss->fw_stats->send_done_count)); | |
3753 | device_printf(sc->dev, "not resetting\n"); | |
3754 | } | |
3755 | return (err); | |
3756 | } | |
3757 | ||
3758 | static int | |
3759 | mxge_watchdog(mxge_softc_t *sc) | |
3760 | { | |
3761 | mxge_tx_ring_t *tx; | |
3762 | uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); | |
3763 | int i, err = 0; | |
3764 | ||
3765 | /* see if we have outstanding transmits, which | |
3766 | have been pending for more than mxge_ticks */ | |
3767 | for (i = 0; | |
3768 | #ifdef IFNET_BUF_RING | |
3769 | (i < sc->num_slices) && (err == 0); | |
3770 | #else | |
3771 | (i < 1) && (err == 0); | |
3772 | #endif | |
3773 | i++) { | |
3774 | tx = &sc->ss[i].tx; | |
3775 | if (tx->req != tx->done && | |
3776 | tx->watchdog_req != tx->watchdog_done && | |
3777 | tx->done == tx->watchdog_done) { | |
3778 | /* check for pause blocking before resetting */ | |
3779 | if (tx->watchdog_rx_pause == rx_pause) | |
3780 | err = mxge_watchdog_reset(sc, i); | |
3781 | else | |
3782 | device_printf(sc->dev, "Flow control blocking " | |
3783 | "xmits, check link partner\n"); | |
3784 | } | |
3785 | ||
3786 | tx->watchdog_req = tx->req; | |
3787 | tx->watchdog_done = tx->done; | |
3788 | tx->watchdog_rx_pause = rx_pause; | |
3789 | } | |
3790 | ||
3791 | if (sc->need_media_probe) | |
3792 | mxge_media_probe(sc); | |
3793 | return (err); | |
3794 | } | |
3795 | ||
3796 | static void | |
3797 | mxge_update_stats(mxge_softc_t *sc) | |
3798 | { | |
3799 | struct mxge_slice_state *ss; | |
3800 | u_long ipackets = 0; | |
3801 | u_long opackets = 0; | |
3802 | #ifdef IFNET_BUF_RING | |
3803 | u_long obytes = 0; | |
3804 | u_long omcasts = 0; | |
3805 | u_long odrops = 0; | |
3806 | #endif | |
3807 | u_long oerrors = 0; | |
3808 | int slice; | |
3809 | ||
3810 | for (slice = 0; slice < sc->num_slices; slice++) { | |
3811 | ss = &sc->ss[slice]; | |
3812 | ipackets += ss->ipackets; | |
3813 | opackets += ss->opackets; | |
3814 | #ifdef IFNET_BUF_RING | |
3815 | obytes += ss->obytes; | |
3816 | omcasts += ss->omcasts; | |
3817 | odrops += ss->tx.br->br_drops; | |
3818 | #endif | |
3819 | oerrors += ss->oerrors; | |
3820 | } | |
3821 | sc->ifp->if_ipackets = ipackets; | |
3822 | sc->ifp->if_opackets = opackets; | |
3823 | #ifdef IFNET_BUF_RING | |
3824 | sc->ifp->if_obytes = obytes; | |
3825 | sc->ifp->if_omcasts = omcasts; | |
3826 | sc->ifp->if_snd.ifq_drops = odrops; | |
3827 | #endif | |
3828 | sc->ifp->if_oerrors = oerrors; | |
3829 | } | |
3830 | ||
3831 | static void | |
3832 | mxge_tick(void *arg) | |
3833 | { | |
3834 | mxge_softc_t *sc = arg; | |
3835 | int err = 0; | |
3836 | ||
6a6f4694 | 3837 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 AE |
3838 | /* aggregate stats from different slices */ |
3839 | mxge_update_stats(sc); | |
3840 | if (!sc->watchdog_countdown) { | |
3841 | err = mxge_watchdog(sc); | |
3842 | sc->watchdog_countdown = 4; | |
3843 | } | |
3844 | sc->watchdog_countdown--; | |
3845 | if (err == 0) | |
3846 | callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); | |
6a6f4694 | 3847 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3848 | } |
3849 | ||
3850 | static int | |
3851 | mxge_media_change(struct ifnet *ifp) | |
3852 | { | |
3853 | return EINVAL; | |
3854 | } | |
3855 | ||
3856 | static int | |
3857 | mxge_change_mtu(mxge_softc_t *sc, int mtu) | |
3858 | { | |
3859 | struct ifnet *ifp = sc->ifp; | |
3860 | int real_mtu, old_mtu; | |
3861 | int err = 0; | |
3862 | ||
3863 | ||
3864 | real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; | |
3865 | if ((real_mtu > sc->max_mtu) || real_mtu < 60) | |
3866 | return EINVAL; | |
e8a47a7f | 3867 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 AE |
3868 | old_mtu = ifp->if_mtu; |
3869 | ifp->if_mtu = mtu; | |
3870 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3871 | mxge_close(sc); | |
3872 | err = mxge_open(sc); | |
3873 | if (err != 0) { | |
3874 | ifp->if_mtu = old_mtu; | |
3875 | mxge_close(sc); | |
3876 | (void) mxge_open(sc); | |
3877 | } | |
3878 | } | |
e8a47a7f | 3879 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3880 | return err; |
3881 | } | |
3882 | ||
3883 | static void | |
3884 | mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) | |
3885 | { | |
3886 | mxge_softc_t *sc = ifp->if_softc; | |
3887 | ||
3888 | ||
3889 | if (sc == NULL) | |
3890 | return; | |
3891 | ifmr->ifm_status = IFM_AVALID; | |
3892 | ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; | |
3893 | ifmr->ifm_active = IFM_AUTO | IFM_ETHER; | |
3894 | ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; | |
3895 | } | |
3896 | ||
3897 | static int | |
3898 | mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) | |
3899 | { | |
3900 | mxge_softc_t *sc = ifp->if_softc; | |
3901 | struct ifreq *ifr = (struct ifreq *)data; | |
3902 | int err, mask; | |
3903 | ||
3904 | err = 0; | |
3905 | switch (command) { | |
3906 | case SIOCSIFADDR: | |
3907 | case SIOCGIFADDR: | |
3908 | err = ether_ioctl(ifp, command, data); | |
3909 | break; | |
3910 | ||
3911 | case SIOCSIFMTU: | |
3912 | err = mxge_change_mtu(sc, ifr->ifr_mtu); | |
3913 | break; | |
3914 | ||
3915 | case SIOCSIFFLAGS: | |
e8a47a7f | 3916 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 | 3917 | if (sc->dying) { |
e8a47a7f | 3918 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3919 | return EINVAL; |
3920 | } | |
3921 | if (ifp->if_flags & IFF_UP) { | |
3922 | if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { | |
3923 | err = mxge_open(sc); | |
3924 | } else { | |
3925 | /* take care of promis can allmulti | |
3926 | flag chages */ | |
3927 | mxge_change_promisc(sc, | |
3928 | ifp->if_flags & IFF_PROMISC); | |
3929 | mxge_set_multicast_list(sc); | |
3930 | } | |
3931 | } else { | |
3932 | if (ifp->if_drv_flags & IFF_DRV_RUNNING) { | |
3933 | mxge_close(sc); | |
3934 | } | |
3935 | } | |
e8a47a7f | 3936 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3937 | break; |
3938 | ||
3939 | case SIOCADDMULTI: | |
3940 | case SIOCDELMULTI: | |
e8a47a7f | 3941 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 | 3942 | mxge_set_multicast_list(sc); |
e8a47a7f | 3943 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3944 | break; |
3945 | ||
3946 | case SIOCSIFCAP: | |
e8a47a7f | 3947 | lockmgr(&sc->driver_lock, LK_EXCLUSIVE); |
8892ea20 AE |
3948 | mask = ifr->ifr_reqcap ^ ifp->if_capenable; |
3949 | if (mask & IFCAP_TXCSUM) { | |
3950 | if (IFCAP_TXCSUM & ifp->if_capenable) { | |
3951 | ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); | |
3952 | ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | |
3953 | | CSUM_TSO); | |
3954 | } else { | |
3955 | ifp->if_capenable |= IFCAP_TXCSUM; | |
3956 | ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); | |
3957 | } | |
3958 | } else if (mask & IFCAP_RXCSUM) { | |
3959 | if (IFCAP_RXCSUM & ifp->if_capenable) { | |
3960 | ifp->if_capenable &= ~IFCAP_RXCSUM; | |
3961 | sc->csum_flag = 0; | |
3962 | } else { | |
3963 | ifp->if_capenable |= IFCAP_RXCSUM; | |
3964 | sc->csum_flag = 1; | |
3965 | } | |
3966 | } | |
3967 | if (mask & IFCAP_TSO4) { | |
3968 | if (IFCAP_TSO4 & ifp->if_capenable) { | |
3969 | ifp->if_capenable &= ~IFCAP_TSO4; | |
3970 | ifp->if_hwassist &= ~CSUM_TSO; | |
3971 | } else if (IFCAP_TXCSUM & ifp->if_capenable) { | |
3972 | ifp->if_capenable |= IFCAP_TSO4; | |
3973 | ifp->if_hwassist |= CSUM_TSO; | |
3974 | } else { | |
3975 | printf("mxge requires tx checksum offload" | |
3976 | " be enabled to use TSO\n"); | |
3977 | err = EINVAL; | |
3978 | } | |
3979 | } | |
3980 | if (mask & IFCAP_LRO) { | |
3981 | if (IFCAP_LRO & ifp->if_capenable) | |
3982 | err = mxge_change_lro_locked(sc, 0); | |
3983 | else | |
3984 | err = mxge_change_lro_locked(sc, mxge_lro_cnt); | |
3985 | } | |
3986 | if (mask & IFCAP_VLAN_HWTAGGING) | |
3987 | ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; | |
e8a47a7f | 3988 | lockmgr(&sc->driver_lock, LK_RELEASE); |
8892ea20 AE |
3989 | VLAN_CAPABILITIES(ifp); |
3990 | ||
3991 | break; | |
3992 | ||
3993 | case SIOCGIFMEDIA: | |
3994 | err = ifmedia_ioctl(ifp, (struct ifreq *)data, | |
3995 | &sc->media, command); | |
3996 | break; | |
3997 | ||
3998 | default: | |
3999 | err = ENOTTY; | |
4000 | } | |
4001 | return err; | |
4002 | } | |
4003 | ||
4004 | static void | |
4005 | mxge_fetch_tunables(mxge_softc_t *sc) | |
4006 | { | |
4007 | ||
4008 | TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); | |
4009 | TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", | |
4010 | &mxge_flow_control); | |
4011 | TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", | |
4012 | &mxge_intr_coal_delay); | |
4013 | TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", | |
4014 | &mxge_nvidia_ecrc_enable); | |
4015 | TUNABLE_INT_FETCH("hw.mxge.force_firmware", | |
4016 | &mxge_force_firmware); | |
4017 | TUNABLE_INT_FETCH("hw.mxge.deassert_wait", | |
4018 | &mxge_deassert_wait); | |
4019 | TUNABLE_INT_FETCH("hw.mxge.verbose", | |
4020 | &mxge_verbose); | |
4021 | TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); | |
4022 | TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); | |
4023 | TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); | |
4024 | TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); | |
4025 | TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); | |
4026 | if (sc->lro_cnt != 0) | |
4027 | mxge_lro_cnt = sc->lro_cnt; | |
4028 | ||
4029 | if (bootverbose) | |
4030 | mxge_verbose = 1; | |
4031 | if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) | |
4032 | mxge_intr_coal_delay = 30; | |
4033 | if (mxge_ticks == 0) | |
4034 | mxge_ticks = hz / 2; | |
4035 | sc->pause = mxge_flow_control; | |
4036 | if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 | |
4037 | || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { | |
4038 | mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; | |
4039 | } | |
4040 | if (mxge_initial_mtu > ETHERMTU_JUMBO || | |
4041 | mxge_initial_mtu < ETHER_MIN_LEN) | |
4042 | mxge_initial_mtu = ETHERMTU_JUMBO; | |
4043 | } | |
4044 | ||
4045 | ||
4046 | static void | |
4047 | mxge_free_slices(mxge_softc_t *sc) | |
4048 | { | |
4049 | struct mxge_slice_state *ss; | |
4050 | int i; | |
4051 | ||
4052 | ||
4053 | if (sc->ss == NULL) | |
4054 | return; | |
4055 | ||
4056 | for (i = 0; i < sc->num_slices; i++) { | |
4057 | ss = &sc->ss[i]; | |
4058 | if (ss->fw_stats != NULL) { | |
4059 | mxge_dma_free(&ss->fw_stats_dma); | |
4060 | ss->fw_stats = NULL; | |
4061 | #ifdef IFNET_BUF_RING | |
4062 | if (ss->tx.br != NULL) { | |
4063 | drbr_free(ss->tx.br, M_DEVBUF); | |
4064 | ss->tx.br = NULL; | |
4065 | } | |
4066 | #endif | |
e8a47a7f | 4067 | lockuninit(&ss->tx.lock); |
8892ea20 AE |
4068 | } |
4069 | if (ss->rx_done.entry != NULL) { | |
4070 | mxge_dma_free(&ss->rx_done.dma); | |
4071 | ss->rx_done.entry = NULL; | |
4072 | } | |
4073 | } | |