em/emx/igb/ix: Increase opackets stats in if_start method
[dragonfly.git] / sys / dev / netif / ix / if_ix.c
CommitLineData
79251f5e
SZ
1/*
2 * Copyright (c) 2001-2013, Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the Intel Corporation nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
4a648aef 32#include "opt_ifpoll.h"
79251f5e
SZ
33#include "opt_ix.h"
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/endian.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/proc.h>
43#include <sys/rman.h>
44#include <sys/serialize.h>
45#include <sys/serialize2.h>
46#include <sys/socket.h>
47#include <sys/sockio.h>
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <net/bpf.h>
52#include <net/ethernet.h>
53#include <net/if.h>
54#include <net/if_arp.h>
55#include <net/if_dl.h>
56#include <net/if_media.h>
57#include <net/ifq_var.h>
58#include <net/toeplitz.h>
59#include <net/toeplitz2.h>
60#include <net/vlan/if_vlan_var.h>
61#include <net/vlan/if_vlan_ether.h>
62#include <net/if_poll.h>
63
64#include <netinet/in_systm.h>
65#include <netinet/in.h>
66#include <netinet/ip.h>
67
68#include <bus/pci/pcivar.h>
69#include <bus/pci/pcireg.h>
70
71#include <dev/netif/ix/ixgbe_api.h>
72#include <dev/netif/ix/if_ix.h>
73
74#ifdef IX_RSS_DEBUG
75#define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
76do { \
77 if (sc->rss_debug >= lvl) \
78 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
79} while (0)
80#else /* !IX_RSS_DEBUG */
81#define IX_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0)
82#endif /* IX_RSS_DEBUG */
83
84#define IX_NAME "Intel(R) PRO/10GbE "
85#define IX_DEVICE(id) \
86 { IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
87#define IX_DEVICE_NULL { 0, 0, NULL }
88
89static struct ix_device {
90 uint16_t vid;
91 uint16_t did;
92 const char *desc;
93} ix_devices[] = {
94 IX_DEVICE(82598AF_DUAL_PORT),
95 IX_DEVICE(82598AF_SINGLE_PORT),
96 IX_DEVICE(82598EB_CX4),
97 IX_DEVICE(82598AT),
98 IX_DEVICE(82598AT2),
99 IX_DEVICE(82598),
100 IX_DEVICE(82598_DA_DUAL_PORT),
101 IX_DEVICE(82598_CX4_DUAL_PORT),
102 IX_DEVICE(82598EB_XF_LR),
103 IX_DEVICE(82598_SR_DUAL_PORT_EM),
104 IX_DEVICE(82598EB_SFP_LOM),
105 IX_DEVICE(82599_KX4),
106 IX_DEVICE(82599_KX4_MEZZ),
107 IX_DEVICE(82599_SFP),
108 IX_DEVICE(82599_XAUI_LOM),
109 IX_DEVICE(82599_CX4),
110 IX_DEVICE(82599_T3_LOM),
111 IX_DEVICE(82599_COMBO_BACKPLANE),
112 IX_DEVICE(82599_BACKPLANE_FCOE),
113 IX_DEVICE(82599_SFP_SF2),
114 IX_DEVICE(82599_SFP_FCOE),
115 IX_DEVICE(82599EN_SFP),
116 IX_DEVICE(82599_SFP_SF_QP),
117 IX_DEVICE(X540T),
118
119 /* required last entry */
120 IX_DEVICE_NULL
121};
122
123static int ix_probe(device_t);
124static int ix_attach(device_t);
125static int ix_detach(device_t);
126static int ix_shutdown(device_t);
127
128static void ix_serialize(struct ifnet *, enum ifnet_serialize);
129static void ix_deserialize(struct ifnet *, enum ifnet_serialize);
130static int ix_tryserialize(struct ifnet *, enum ifnet_serialize);
131#ifdef INVARIANTS
132static void ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
133 boolean_t);
134#endif
135static void ix_start(struct ifnet *, struct ifaltq_subque *);
136static void ix_watchdog(struct ifaltq_subque *);
137static int ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
138static void ix_init(void *);
139static void ix_stop(struct ix_softc *);
140static void ix_media_status(struct ifnet *, struct ifmediareq *);
141static int ix_media_change(struct ifnet *);
142static void ix_timer(void *);
4a648aef
SZ
143#ifdef IFPOLL_ENABLE
144static void ix_npoll(struct ifnet *, struct ifpoll_info *);
145static void ix_npoll_rx(struct ifnet *, void *, int);
146static void ix_npoll_tx(struct ifnet *, void *, int);
147static void ix_npoll_status(struct ifnet *);
148#endif
79251f5e
SZ
149
150static void ix_add_sysctl(struct ix_softc *);
189a0ff3
SZ
151static void ix_add_intr_rate_sysctl(struct ix_softc *, int,
152 const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
79251f5e
SZ
153static int ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
154static int ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
155static int ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
156static int ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
157static int ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
189a0ff3
SZ
158static int ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
159static int ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
160static int ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
161static int ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
162static int ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
79251f5e
SZ
163static int ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS);
164#ifdef foo
165static int ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS);
166#endif
167#if 0
168static void ix_add_hw_stats(struct ix_softc *);
169#endif
4a648aef
SZ
170#ifdef IFPOLL_ENABLE
171static int ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS);
172static int ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS);
173#endif
79251f5e
SZ
174
175static void ix_slot_info(struct ix_softc *);
176static int ix_alloc_rings(struct ix_softc *);
177static void ix_free_rings(struct ix_softc *);
178static void ix_setup_ifp(struct ix_softc *);
179static void ix_setup_serialize(struct ix_softc *);
180static void ix_set_ring_inuse(struct ix_softc *, boolean_t);
181static void ix_set_timer_cpuid(struct ix_softc *, boolean_t);
182static void ix_update_stats(struct ix_softc *);
183
184static void ix_set_promisc(struct ix_softc *);
185static void ix_set_multi(struct ix_softc *);
186static void ix_set_vlan(struct ix_softc *);
187static uint8_t *ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
188
189static int ix_get_txring_inuse(const struct ix_softc *, boolean_t);
190static void ix_init_tx_ring(struct ix_tx_ring *);
191static void ix_free_tx_ring(struct ix_tx_ring *);
192static int ix_create_tx_ring(struct ix_tx_ring *);
193static void ix_destroy_tx_ring(struct ix_tx_ring *, int);
194static void ix_init_tx_unit(struct ix_softc *);
195static int ix_encap(struct ix_tx_ring *, struct mbuf **,
196 uint16_t *, int *);
197static int ix_tx_ctx_setup(struct ix_tx_ring *,
198 const struct mbuf *, uint32_t *, uint32_t *);
199static int ix_tso_ctx_setup(struct ix_tx_ring *,
200 const struct mbuf *, uint32_t *, uint32_t *);
189a0ff3 201static void ix_txeof(struct ix_tx_ring *, int);
79251f5e
SZ
202
203static int ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
204static int ix_init_rx_ring(struct ix_rx_ring *);
205static void ix_free_rx_ring(struct ix_rx_ring *);
206static int ix_create_rx_ring(struct ix_rx_ring *);
207static void ix_destroy_rx_ring(struct ix_rx_ring *, int);
208static void ix_init_rx_unit(struct ix_softc *);
209#if 0
210static void ix_setup_hw_rsc(struct ix_rx_ring *);
211#endif
212static int ix_newbuf(struct ix_rx_ring *, int, boolean_t);
4a648aef 213static void ix_rxeof(struct ix_rx_ring *, int);
79251f5e
SZ
214static void ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
215static void ix_enable_rx_drop(struct ix_softc *);
216static void ix_disable_rx_drop(struct ix_softc *);
217
189a0ff3
SZ
218static void ix_alloc_msix(struct ix_softc *);
219static void ix_free_msix(struct ix_softc *, boolean_t);
220static void ix_conf_rx_msix(struct ix_softc *, int, int *, int);
221static void ix_conf_tx_msix(struct ix_softc *, int, int *, int);
222static void ix_setup_msix_eims(const struct ix_softc *, int,
223 uint32_t *, uint32_t *);
79251f5e
SZ
224static int ix_alloc_intr(struct ix_softc *);
225static void ix_free_intr(struct ix_softc *);
226static int ix_setup_intr(struct ix_softc *);
227static void ix_teardown_intr(struct ix_softc *, int);
228static void ix_enable_intr(struct ix_softc *);
229static void ix_disable_intr(struct ix_softc *);
230static void ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
79251f5e 231static void ix_set_eitr(struct ix_softc *, int, int);
189a0ff3 232static void ix_intr_status(struct ix_softc *, uint32_t);
79251f5e 233static void ix_intr(void *);
189a0ff3
SZ
234static void ix_msix_rxtx(void *);
235static void ix_msix_rx(void *);
236static void ix_msix_tx(void *);
237static void ix_msix_status(void *);
79251f5e
SZ
238
239static void ix_config_link(struct ix_softc *);
240static boolean_t ix_sfp_probe(struct ix_softc *);
241static boolean_t ix_is_sfp(const struct ixgbe_hw *);
242static void ix_setup_optics(struct ix_softc *);
243static void ix_update_link_status(struct ix_softc *);
244static void ix_handle_link(struct ix_softc *);
245static void ix_handle_mod(struct ix_softc *);
246static void ix_handle_msf(struct ix_softc *);
247
79251f5e
SZ
248/* XXX Shared code structure requires this for the moment */
249extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *);
250
251static device_method_t ix_methods[] = {
252 /* Device interface */
253 DEVMETHOD(device_probe, ix_probe),
254 DEVMETHOD(device_attach, ix_attach),
255 DEVMETHOD(device_detach, ix_detach),
256 DEVMETHOD(device_shutdown, ix_shutdown),
257 DEVMETHOD_END
258};
259
260static driver_t ix_driver = {
261 "ix",
262 ix_methods,
263 sizeof(struct ix_softc)
264};
265
266static devclass_t ix_devclass;
267
268DECLARE_DUMMY_MODULE(if_ix);
269DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
270
271static int ix_msi_enable = 1;
189a0ff3
SZ
272static int ix_msix_enable = 1;
273static int ix_msix_agg_rxtx = 1;
79251f5e 274static int ix_rxr = 0;
189a0ff3 275static int ix_txr = 0;
79251f5e
SZ
276static int ix_txd = IX_PERF_TXD;
277static int ix_rxd = IX_PERF_RXD;
278static int ix_unsupported_sfp = 0;
279
280TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
189a0ff3
SZ
281TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
282TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx);
79251f5e 283TUNABLE_INT("hw.ix.rxr", &ix_rxr);
189a0ff3 284TUNABLE_INT("hw.ix.txr", &ix_txr);
79251f5e
SZ
285TUNABLE_INT("hw.ix.txd", &ix_txd);
286TUNABLE_INT("hw.ix.rxd", &ix_rxd);
287TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
288
289/*
290 * Smart speed setting, default to on. This only works
291 * as a compile option right now as its during attach,
292 * set this to 'ixgbe_smart_speed_off' to disable.
293 */
294static const enum ixgbe_smart_speed ix_smart_speed =
295 ixgbe_smart_speed_on;
296
297static int
298ix_probe(device_t dev)
299{
300 const struct ix_device *d;
301 uint16_t vid, did;
302
303 vid = pci_get_vendor(dev);
304 did = pci_get_device(dev);
305
306 for (d = ix_devices; d->desc != NULL; ++d) {
307 if (vid == d->vid && did == d->did) {
308 device_set_desc(dev, d->desc);
309 return 0;
310 }
311 }
312 return ENXIO;
313}
314
315static int
316ix_attach(device_t dev)
317{
318 struct ix_softc *sc = device_get_softc(dev);
319 struct ixgbe_hw *hw;
189a0ff3 320 int error, ring_cnt_max;
79251f5e
SZ
321 uint16_t csum;
322 uint32_t ctrl_ext;
4a648aef
SZ
323#ifdef IFPOLL_ENABLE
324 int offset, offset_def;
325#endif
79251f5e
SZ
326
327 sc->dev = sc->osdep.dev = dev;
328 hw = &sc->hw;
329
330 if_initname(&sc->arpcom.ac_if, device_get_name(dev),
331 device_get_unit(dev));
332 ifmedia_init(&sc->media, IFM_IMASK,
333 ix_media_change, ix_media_status);
334
335 /* Save frame size */
336 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
337
338 callout_init_mp(&sc->timer);
339 lwkt_serialize_init(&sc->main_serialize);
340
341 /*
342 * Save off the information about this board
343 */
344 hw->vendor_id = pci_get_vendor(dev);
345 hw->device_id = pci_get_device(dev);
346 hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
347 hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
348 hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
349
350 ixgbe_set_mac_type(hw);
351
352 /* Pick up the 82599 and VF settings */
353 if (hw->mac.type != ixgbe_mac_82598EB)
354 hw->phy.smart_speed = ix_smart_speed;
355
356 /* Enable bus mastering */
357 pci_enable_busmaster(dev);
358
359 /*
360 * Allocate IO memory
361 */
362 sc->mem_rid = PCIR_BAR(0);
363 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
364 &sc->mem_rid, RF_ACTIVE);
365 if (sc->mem_res == NULL) {
366 device_printf(dev, "Unable to allocate bus resource: memory\n");
367 error = ENXIO;
368 goto failed;
369 }
370
371 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
372 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
373
374 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
375 sc->hw.back = &sc->osdep;
376
377 /*
378 * Configure total supported RX/TX ring count
379 */
380 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr);
381 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING);
382 sc->rx_ring_inuse = sc->rx_ring_cnt;
383
189a0ff3
SZ
384 switch (hw->mac.type) {
385 case ixgbe_mac_82598EB:
386 ring_cnt_max = IX_MAX_TXRING_82598;
387 break;
388
389 case ixgbe_mac_82599EB:
390 ring_cnt_max = IX_MAX_TXRING_82599;
391 break;
392
393 case ixgbe_mac_X540:
394 ring_cnt_max = IX_MAX_TXRING_X540;
395 break;
396
397 default:
398 ring_cnt_max = 1;
399 break;
400 }
401 sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr);
402 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max);
79251f5e
SZ
403 sc->tx_ring_inuse = sc->tx_ring_cnt;
404
405 /* Allocate TX/RX rings */
406 error = ix_alloc_rings(sc);
407 if (error)
408 goto failed;
409
4a648aef
SZ
410#ifdef IFPOLL_ENABLE
411 /*
412 * NPOLLING RX CPU offset
413 */
414 if (sc->rx_ring_cnt == ncpus2) {
415 offset = 0;
416 } else {
417 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2;
418 offset = device_getenv_int(dev, "npoll.rxoff", offset_def);
419 if (offset >= ncpus2 ||
420 offset % sc->rx_ring_cnt != 0) {
421 device_printf(dev, "invalid npoll.rxoff %d, use %d\n",
422 offset, offset_def);
423 offset = offset_def;
424 }
425 }
426 sc->rx_npoll_off = offset;
427
428 /*
429 * NPOLLING TX CPU offset
430 */
431 if (sc->tx_ring_cnt == ncpus2) {
432 offset = 0;
433 } else {
434 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2;
435 offset = device_getenv_int(dev, "npoll.txoff", offset_def);
436 if (offset >= ncpus2 ||
437 offset % sc->tx_ring_cnt != 0) {
438 device_printf(dev, "invalid npoll.txoff %d, use %d\n",
439 offset, offset_def);
440 offset = offset_def;
441 }
442 }
443 sc->tx_npoll_off = offset;
444#endif
445
79251f5e
SZ
446 /* Allocate interrupt */
447 error = ix_alloc_intr(sc);
448 if (error)
449 goto failed;
450
451 /* Setup serializes */
452 ix_setup_serialize(sc);
453
454 /* Allocate multicast array memory. */
455 sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR,
456 M_DEVBUF, M_WAITOK);
457
458 /* Initialize the shared code */
459 hw->allow_unsupported_sfp = ix_unsupported_sfp;
460 error = ixgbe_init_shared_code(hw);
461 if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
462 /*
463 * No optics in this port; ask timer routine
464 * to probe for later insertion.
465 */
466 sc->sfp_probe = TRUE;
467 error = 0;
468 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
469 device_printf(dev, "Unsupported SFP+ module detected!\n");
470 error = EIO;
471 goto failed;
472 } else if (error) {
473 device_printf(dev, "Unable to initialize the shared code\n");
474 error = EIO;
475 goto failed;
476 }
477
478 /* Make sure we have a good EEPROM before we read from it */
479 if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) {
480 device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
481 error = EIO;
482 goto failed;
483 }
484
485 error = ixgbe_init_hw(hw);
486 if (error == IXGBE_ERR_EEPROM_VERSION) {
487 device_printf(dev, "Pre-production device detected\n");
488 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
489 device_printf(dev, "Unsupported SFP+ Module\n");
490 error = EIO;
491 goto failed;
492 } else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
493 device_printf(dev, "No SFP+ Module found\n");
494 }
495
496 /* Detect and set physical type */
497 ix_setup_optics(sc);
498
499 /* Setup OS specific network interface */
500 ix_setup_ifp(sc);
501
502 /* Add sysctl tree */
503 ix_add_sysctl(sc);
504
505 error = ix_setup_intr(sc);
506 if (error) {
507 ether_ifdetach(&sc->arpcom.ac_if);
508 goto failed;
509 }
510
511 /* Initialize statistics */
512 ix_update_stats(sc);
513
514 /*
515 * Check PCIE slot type/speed/width
516 */
517 ix_slot_info(sc);
518
519 /* Set an initial default flow control value */
520 sc->fc = ixgbe_fc_full;
521
522 /* Let hardware know driver is loaded */
523 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
524 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
525 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
526
527 return 0;
528failed:
529 ix_detach(dev);
530 return error;
531}
532
533static int
534ix_detach(device_t dev)
535{
536 struct ix_softc *sc = device_get_softc(dev);
537
538 if (device_is_attached(dev)) {
539 struct ifnet *ifp = &sc->arpcom.ac_if;
540 uint32_t ctrl_ext;
541
542 ifnet_serialize_all(ifp);
543
544 ix_stop(sc);
545 ix_teardown_intr(sc, sc->intr_cnt);
546
547 ifnet_deserialize_all(ifp);
548
549 callout_terminate(&sc->timer);
550 ether_ifdetach(ifp);
551
552 /* Let hardware know driver is unloading */
553 ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
554 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
555 IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
556 }
557
558 ifmedia_removeall(&sc->media);
559 bus_generic_detach(dev);
560
79251f5e
SZ
561 ix_free_intr(sc);
562
189a0ff3
SZ
563 if (sc->msix_mem_res != NULL) {
564 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
565 sc->msix_mem_res);
566 }
79251f5e
SZ
567 if (sc->mem_res != NULL) {
568 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
569 sc->mem_res);
570 }
571
572 ix_free_rings(sc);
573
574 if (sc->mta != NULL)
575 kfree(sc->mta, M_DEVBUF);
576 if (sc->serializes != NULL)
577 kfree(sc->serializes, M_DEVBUF);
578
579 return 0;
580}
581
582static int
583ix_shutdown(device_t dev)
584{
585 struct ix_softc *sc = device_get_softc(dev);
586 struct ifnet *ifp = &sc->arpcom.ac_if;
587
588 ifnet_serialize_all(ifp);
589 ix_stop(sc);
590 ifnet_deserialize_all(ifp);
591
592 return 0;
593}
594
595static void
596ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
597{
598 struct ix_softc *sc = ifp->if_softc;
599 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
600 int idx = -1;
601 uint16_t nsegs;
602
603 KKASSERT(txr->tx_ifsq == ifsq);
604 ASSERT_SERIALIZED(&txr->tx_serialize);
605
606 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
607 return;
608
4a648aef 609 if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
79251f5e
SZ
610 ifsq_purge(ifsq);
611 return;
612 }
613
614 while (!ifsq_is_empty(ifsq)) {
615 struct mbuf *m_head;
616
617 if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
618 ifsq_set_oactive(ifsq);
619 txr->tx_watchdog.wd_timer = 5;
620 break;
621 }
622
623 m_head = ifsq_dequeue(ifsq);
624 if (m_head == NULL)
625 break;
626
627 if (ix_encap(txr, &m_head, &nsegs, &idx)) {
628 IFNET_STAT_INC(ifp, oerrors, 1);
629 continue;
630 }
631
608dda76
SZ
632 /*
633 * TX interrupt are aggressively aggregated, so increasing
634 * opackets at TX interrupt time will make the opackets
635 * statistics vastly inaccurate; we do the opackets increment
636 * now.
637 */
638 IFNET_STAT_INC(ifp, opackets, 1);
639
79251f5e
SZ
640 if (nsegs >= txr->tx_wreg_nsegs) {
641 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
642 nsegs = 0;
643 idx = -1;
644 }
645
646 ETHER_BPF_MTAP(ifp, m_head);
647 }
648 if (idx >= 0)
649 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
650}
651
652static int
653ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
654{
655 struct ix_softc *sc = ifp->if_softc;
656 struct ifreq *ifr = (struct ifreq *) data;
657 int error = 0, mask, reinit;
658
659 ASSERT_IFNET_SERIALIZED_ALL(ifp);
660
661 switch (command) {
662 case SIOCSIFMTU:
663 if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
664 error = EINVAL;
665 } else {
666 ifp->if_mtu = ifr->ifr_mtu;
667 sc->max_frame_size =
668 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
669 ix_init(sc);
670 }
671 break;
672
673 case SIOCSIFFLAGS:
674 if (ifp->if_flags & IFF_UP) {
675 if (ifp->if_flags & IFF_RUNNING) {
676 if ((ifp->if_flags ^ sc->if_flags) &
677 (IFF_PROMISC | IFF_ALLMULTI))
678 ix_set_promisc(sc);
679 } else {
680 ix_init(sc);
681 }
682 } else if (ifp->if_flags & IFF_RUNNING) {
683 ix_stop(sc);
684 }
685 sc->if_flags = ifp->if_flags;
686 break;
687
688 case SIOCADDMULTI:
689 case SIOCDELMULTI:
690 if (ifp->if_flags & IFF_RUNNING) {
691 ix_disable_intr(sc);
692 ix_set_multi(sc);
4a648aef
SZ
693#ifdef IFPOLL_ENABLE
694 if ((ifp->if_flags & IFF_NPOLLING) == 0)
695#endif
696 ix_enable_intr(sc);
79251f5e
SZ
697 }
698 break;
699
700 case SIOCSIFMEDIA:
701 case SIOCGIFMEDIA:
702 error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
703 break;
704
705 case SIOCSIFCAP:
706 reinit = 0;
707 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
708 if (mask & IFCAP_RXCSUM) {
709 ifp->if_capenable ^= IFCAP_RXCSUM;
710 reinit = 1;
711 }
712 if (mask & IFCAP_VLAN_HWTAGGING) {
713 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
714 reinit = 1;
715 }
716 if (mask & IFCAP_TXCSUM) {
717 ifp->if_capenable ^= IFCAP_TXCSUM;
718 if (ifp->if_capenable & IFCAP_TXCSUM)
719 ifp->if_hwassist |= CSUM_OFFLOAD;
720 else
721 ifp->if_hwassist &= ~CSUM_OFFLOAD;
722 }
723 if (mask & IFCAP_TSO) {
724 ifp->if_capenable ^= IFCAP_TSO;
725 if (ifp->if_capenable & IFCAP_TSO)
726 ifp->if_hwassist |= CSUM_TSO;
727 else
728 ifp->if_hwassist &= ~CSUM_TSO;
729 }
730 if (mask & IFCAP_RSS)
731 ifp->if_capenable ^= IFCAP_RSS;
732 if (reinit && (ifp->if_flags & IFF_RUNNING))
733 ix_init(sc);
734 break;
735
736#if 0
737 case SIOCGI2C:
738 {
739 struct ixgbe_i2c_req i2c;
740 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
741 if (error)
742 break;
743 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
744 error = EINVAL;
745 break;
746 }
747 hw->phy.ops.read_i2c_byte(hw, i2c.offset,
748 i2c.dev_addr, i2c.data);
749 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
750 break;
751 }
752#endif
753
754 default:
755 error = ether_ioctl(ifp, command, data);
756 break;
757 }
758 return error;
759}
760
761#define IXGBE_MHADD_MFS_SHIFT 16
762
763static void
764ix_init(void *xsc)
765{
766 struct ix_softc *sc = xsc;
767 struct ifnet *ifp = &sc->arpcom.ac_if;
768 struct ixgbe_hw *hw = &sc->hw;
769 uint32_t rxpb, frame, size, tmp;
770 uint32_t gpie, rxctrl;
771 int i, error;
4a648aef 772 boolean_t polling;
79251f5e
SZ
773
774 ASSERT_IFNET_SERIALIZED_ALL(ifp);
775
776 ix_stop(sc);
777
4a648aef
SZ
778 polling = FALSE;
779#ifdef IFPOLL_ENABLE
780 if (ifp->if_flags & IFF_NPOLLING)
781 polling = TRUE;
782#endif
783
79251f5e 784 /* Configure # of used RX/TX rings */
4a648aef 785 ix_set_ring_inuse(sc, polling);
79251f5e
SZ
786 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1);
787
788 /* Get the latest mac address, User can use a LAA */
789 bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
790 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
791 hw->addr_ctrl.rar_used_count = 1;
792
793 /* Prepare transmit descriptors and buffers */
794 for (i = 0; i < sc->tx_ring_inuse; ++i)
795 ix_init_tx_ring(&sc->tx_rings[i]);
796
797 ixgbe_init_hw(hw);
798 ix_init_tx_unit(sc);
799
800 /* Setup Multicast table */
801 ix_set_multi(sc);
802
803 /* Prepare receive descriptors and buffers */
804 for (i = 0; i < sc->rx_ring_inuse; ++i) {
805 error = ix_init_rx_ring(&sc->rx_rings[i]);
806 if (error) {
807 if_printf(ifp, "Could not initialize RX ring%d\n", i);
808 ix_stop(sc);
809 return;
810 }
811 }
812
813 /* Configure RX settings */
814 ix_init_rx_unit(sc);
815
816 gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
817
818 /* Enable Fan Failure Interrupt */
819 gpie |= IXGBE_SDP1_GPIEN;
820
821 /* Add for Module detection */
822 if (hw->mac.type == ixgbe_mac_82599EB)
823 gpie |= IXGBE_SDP2_GPIEN;
824
825 /* Thermal Failure Detection */
826 if (hw->mac.type == ixgbe_mac_X540)
827 gpie |= IXGBE_SDP0_GPIEN;
828
829 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
830 /* Enable Enhanced MSIX mode */
831 gpie |= IXGBE_GPIE_MSIX_MODE;
832 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
833 IXGBE_GPIE_OCD;
834 }
835 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
836
837 /* Set MTU size */
838 if (ifp->if_mtu > ETHERMTU) {
839 uint32_t mhadd;
840
841 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
842 mhadd &= ~IXGBE_MHADD_MFS_MASK;
843 mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
844 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
845 }
846
847 /*
848 * Enable TX rings
849 */
850 for (i = 0; i < sc->tx_ring_inuse; ++i) {
851 uint32_t txdctl;
852
853 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
854 txdctl |= IXGBE_TXDCTL_ENABLE;
855
856 /*
857 * Set WTHRESH to 0, since TX head write-back is used
858 */
859 txdctl &= ~(0x7f << 16);
860
861 /*
862 * When the internal queue falls below PTHRESH (32),
863 * start prefetching as long as there are at least
864 * HTHRESH (1) buffers ready. The values are taken
865 * from the Intel linux driver 3.8.21.
866 * Prefetching enables tx line rate even with 1 queue.
867 */
868 txdctl |= (32 << 0) | (1 << 8);
869 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
870 }
871
872 /*
873 * Enable RX rings
874 */
875 for (i = 0; i < sc->rx_ring_inuse; ++i) {
876 uint32_t rxdctl;
877 int k;
878
879 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
880 if (hw->mac.type == ixgbe_mac_82598EB) {
881 /*
882 * PTHRESH = 21
883 * HTHRESH = 4
884 * WTHRESH = 8
885 */
886 rxdctl &= ~0x3FFFFF;
887 rxdctl |= 0x080420;
888 }
889 rxdctl |= IXGBE_RXDCTL_ENABLE;
890 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
891 for (k = 0; k < 10; ++k) {
892 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
893 IXGBE_RXDCTL_ENABLE)
894 break;
895 else
896 msec_delay(1);
897 }
898 wmb();
899 IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
900 sc->rx_rings[0].rx_ndesc - 1);
901 }
902
903 /* Set up VLAN support and filter */
904 ix_set_vlan(sc);
905
906 /* Enable Receive engine */
907 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
908 if (hw->mac.type == ixgbe_mac_82598EB)
909 rxctrl |= IXGBE_RXCTRL_DMBYPS;
910 rxctrl |= IXGBE_RXCTRL_RXEN;
911 ixgbe_enable_rx_dma(hw, rxctrl);
912
189a0ff3
SZ
913 for (i = 0; i < sc->tx_ring_inuse; ++i) {
914 const struct ix_tx_ring *txr = &sc->tx_rings[i];
915
916 if (txr->tx_intr_vec >= 0) {
917 ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
918 } else {
919 /*
920 * Unconfigured TX interrupt vector could only
921 * happen for MSI-X.
922 */
923 KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
924 ("TX intr vector is not set"));
925 KASSERT(i < sc->rx_ring_inuse,
926 ("invalid TX ring %d, no piggyback RX ring", i));
927 KASSERT(sc->rx_rings[i].rx_txr == txr,
928 ("RX ring %d piggybacked TX ring mismatch", i));
929 if (bootverbose)
930 if_printf(ifp, "IVAR skips TX ring %d\n", i);
931 }
932 }
933 for (i = 0; i < sc->rx_ring_inuse; ++i) {
934 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
935
936 KKASSERT(rxr->rx_intr_vec >= 0);
937 ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
938 if (rxr->rx_txr != NULL) {
939 /*
940 * Piggyback the TX ring interrupt onto the RX
941 * ring interrupt vector.
942 */
943 KASSERT(rxr->rx_txr->tx_intr_vec < 0,
944 ("piggybacked TX ring configured intr vector"));
945 KASSERT(rxr->rx_txr->tx_idx == i,
946 ("RX ring %d piggybacked TX ring %u",
947 i, rxr->rx_txr->tx_idx));
948 ix_set_ivar(sc, i, rxr->rx_intr_vec, 1);
949 if (bootverbose) {
950 if_printf(ifp, "IVAR RX ring %d piggybacks "
951 "TX ring %u\n", i, rxr->rx_txr->tx_idx);
952 }
953 }
954 }
79251f5e 955 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
189a0ff3
SZ
956 /* Set up status MSI-X vector; it is using fixed entry 1 */
957 ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
958
959 /* Set up auto-mask for TX and RX rings */
960 if (hw->mac.type == ixgbe_mac_82598EB) {
961 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
962 } else {
79251f5e
SZ
963 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
964 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
965 }
966 } else {
189a0ff3 967 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
79251f5e 968 }
189a0ff3
SZ
969 for (i = 0; i < sc->intr_cnt; ++i)
970 ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
79251f5e
SZ
971
972 /*
973 * Check on any SFP devices that need to be kick-started
974 */
975 if (hw->phy.type == ixgbe_phy_none) {
976 error = hw->phy.ops.identify(hw);
977 if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
978 if_printf(ifp,
979 "Unsupported SFP+ module type was detected.\n");
980 /* XXX stop */
981 return;
982 }
983 }
984
79251f5e
SZ
985 /* Config/Enable Link */
986 ix_config_link(sc);
987
988 /*
989 * Hardware Packet Buffer & Flow Control setup
990 */
991 frame = sc->max_frame_size;
992
993 /* Calculate High Water */
994 if (hw->mac.type == ixgbe_mac_X540)
995 tmp = IXGBE_DV_X540(frame, frame);
996 else
997 tmp = IXGBE_DV(frame, frame);
998 size = IXGBE_BT2KB(tmp);
999 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1000 hw->fc.high_water[0] = rxpb - size;
1001
1002 /* Now calculate Low Water */
1003 if (hw->mac.type == ixgbe_mac_X540)
1004 tmp = IXGBE_LOW_DV_X540(frame);
1005 else
1006 tmp = IXGBE_LOW_DV(frame);
1007 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1008
1009 hw->fc.requested_mode = sc->fc;
1010 hw->fc.pause_time = IX_FC_PAUSE;
1011 hw->fc.send_xon = TRUE;
1012
1013 /* Initialize the FC settings */
1014 ixgbe_start_hw(hw);
1015
4a648aef
SZ
1016 /*
1017 * Only enable interrupts if we are not polling, make sure
1018 * they are off otherwise.
1019 */
1020 if (polling)
1021 ix_disable_intr(sc);
1022 else
1023 ix_enable_intr(sc);
79251f5e
SZ
1024
1025 ifp->if_flags |= IFF_RUNNING;
1026 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1027 ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq);
1028 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog);
1029 }
1030
4a648aef 1031 ix_set_timer_cpuid(sc, polling);
79251f5e
SZ
1032 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1033}
1034
79251f5e
SZ
1035static void
1036ix_intr(void *xsc)
1037{
1038 struct ix_softc *sc = xsc;
1039 struct ixgbe_hw *hw = &sc->hw;
1040 uint32_t eicr;
1041
1042 ASSERT_SERIALIZED(&sc->main_serialize);
1043
1044 eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1045 if (eicr == 0) {
1046 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1047 return;
1048 }
1049
1050 if (eicr & IX_RX0_INTR_MASK) {
1051 struct ix_rx_ring *rxr = &sc->rx_rings[0];
1052
1053 lwkt_serialize_enter(&rxr->rx_serialize);
4a648aef 1054 ix_rxeof(rxr, -1);
79251f5e
SZ
1055 lwkt_serialize_exit(&rxr->rx_serialize);
1056 }
1057 if (eicr & IX_RX1_INTR_MASK) {
1058 struct ix_rx_ring *rxr;
1059
1060 KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1061 rxr = &sc->rx_rings[1];
1062
1063 lwkt_serialize_enter(&rxr->rx_serialize);
4a648aef 1064 ix_rxeof(rxr, -1);
79251f5e
SZ
1065 lwkt_serialize_exit(&rxr->rx_serialize);
1066 }
1067
1068 if (eicr & IX_TX_INTR_MASK) {
1069 struct ix_tx_ring *txr = &sc->tx_rings[0];
1070
1071 lwkt_serialize_enter(&txr->tx_serialize);
189a0ff3 1072 ix_txeof(txr, *(txr->tx_hdr));
79251f5e
SZ
1073 if (!ifsq_is_empty(txr->tx_ifsq))
1074 ifsq_devstart(txr->tx_ifsq);
1075 lwkt_serialize_exit(&txr->tx_serialize);
1076 }
1077
189a0ff3
SZ
1078 if (__predict_false(eicr & IX_EICR_STATUS))
1079 ix_intr_status(sc, eicr);
79251f5e
SZ
1080
1081 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1082}
1083
79251f5e
SZ
1084static void
1085ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1086{
1087 struct ix_softc *sc = ifp->if_softc;
1088
1089 ix_update_link_status(sc);
1090
1091 ifmr->ifm_status = IFM_AVALID;
1092 ifmr->ifm_active = IFM_ETHER;
1093
1094 if (!sc->link_active)
1095 return;
1096
1097 ifmr->ifm_status |= IFM_ACTIVE;
1098
1099 switch (sc->link_speed) {
1100 case IXGBE_LINK_SPEED_100_FULL:
1101 ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1102 break;
1103 case IXGBE_LINK_SPEED_1GB_FULL:
1104 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1105 break;
1106 case IXGBE_LINK_SPEED_10GB_FULL:
1107 ifmr->ifm_active |= sc->optics | IFM_FDX;
1108 break;
1109 }
1110}
1111
1112static int
1113ix_media_change(struct ifnet *ifp)
1114{
1115 struct ix_softc *sc = ifp->if_softc;
1116 struct ifmedia *ifm = &sc->media;
1117
1118 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1119 return EINVAL;
1120
1121 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1122 case IFM_AUTO:
1123 sc->hw.phy.autoneg_advertised =
1124 IXGBE_LINK_SPEED_100_FULL |
1125 IXGBE_LINK_SPEED_1GB_FULL |
1126 IXGBE_LINK_SPEED_10GB_FULL;
1127 break;
1128 default:
1129 if_printf(ifp, "Only auto media type\n");
1130 return EINVAL;
1131 }
1132 return 0;
1133}
1134
1135static __inline int
1136ix_tso_pullup(struct mbuf **mp)
1137{
1138 int hoff, iphlen, thoff;
1139 struct mbuf *m;
1140
1141 m = *mp;
1142 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1143
1144 iphlen = m->m_pkthdr.csum_iphlen;
1145 thoff = m->m_pkthdr.csum_thlen;
1146 hoff = m->m_pkthdr.csum_lhlen;
1147
1148 KASSERT(iphlen > 0, ("invalid ip hlen"));
1149 KASSERT(thoff > 0, ("invalid tcp hlen"));
1150 KASSERT(hoff > 0, ("invalid ether hlen"));
1151
1152 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1153 m = m_pullup(m, hoff + iphlen + thoff);
1154 if (m == NULL) {
1155 *mp = NULL;
1156 return ENOBUFS;
1157 }
1158 *mp = m;
1159 }
1160 return 0;
1161}
1162
1163static int
1164ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1165 uint16_t *segs_used, int *idx)
1166{
1167 uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1168 int i, j, error, nsegs, first, maxsegs;
1169 struct mbuf *m_head = *m_headp;
1170 bus_dma_segment_t segs[IX_MAX_SCATTER];
1171 bus_dmamap_t map;
1172 struct ix_tx_buf *txbuf;
1173 union ixgbe_adv_tx_desc *txd = NULL;
1174
1175 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1176 error = ix_tso_pullup(m_headp);
1177 if (__predict_false(error))
1178 return error;
1179 m_head = *m_headp;
1180 }
1181
1182 /* Basic descriptor defines */
1183 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1184 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1185
1186 if (m_head->m_flags & M_VLANTAG)
1187 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1188
1189 /*
1190 * Important to capture the first descriptor
1191 * used because it will contain the index of
1192 * the one we tell the hardware to report back
1193 */
1194 first = txr->tx_next_avail;
1195 txbuf = &txr->tx_buf[first];
1196 map = txbuf->map;
1197
1198 /*
1199 * Map the packet for DMA.
1200 */
1201 maxsegs = txr->tx_avail - IX_TX_RESERVED;
1202 if (maxsegs > IX_MAX_SCATTER)
1203 maxsegs = IX_MAX_SCATTER;
1204
1205 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1206 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1207 if (__predict_false(error)) {
1208 m_freem(*m_headp);
1209 *m_headp = NULL;
1210 return error;
1211 }
1212 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1213
1214 m_head = *m_headp;
1215
1216 /*
1217 * Set up the appropriate offload context if requested,
1218 * this may consume one TX descriptor.
1219 */
1220 if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1221 (*segs_used)++;
1222 txr->tx_nsegs++;
1223 }
1224
1225 *segs_used += nsegs;
1226 txr->tx_nsegs += nsegs;
1227 if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1228 /*
1229 * Report Status (RS) is turned on every intr_nsegs
1230 * descriptors (roughly).
1231 */
1232 txr->tx_nsegs = 0;
1233 cmd_rs = IXGBE_TXD_CMD_RS;
1234 }
1235
1236 i = txr->tx_next_avail;
1237 for (j = 0; j < nsegs; j++) {
1238 bus_size_t seglen;
1239 bus_addr_t segaddr;
1240
1241 txbuf = &txr->tx_buf[i];
1242 txd = &txr->tx_base[i];
1243 seglen = segs[j].ds_len;
1244 segaddr = htole64(segs[j].ds_addr);
1245
1246 txd->read.buffer_addr = segaddr;
1247 txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1248 cmd_type_len |seglen);
1249 txd->read.olinfo_status = htole32(olinfo_status);
1250
1251 if (++i == txr->tx_ndesc)
1252 i = 0;
1253 }
1254 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1255
1256 txr->tx_avail -= nsegs;
1257 txr->tx_next_avail = i;
1258
1259 txbuf->m_head = m_head;
1260 txr->tx_buf[first].map = txbuf->map;
1261 txbuf->map = map;
1262
1263 /*
1264 * Defer TDT updating, until enough descrptors are setup
1265 */
1266 *idx = i;
1267
1268 return 0;
1269}
1270
1271static void
1272ix_set_promisc(struct ix_softc *sc)
1273{
1274 struct ifnet *ifp = &sc->arpcom.ac_if;
1275 uint32_t reg_rctl;
1276 int mcnt = 0;
1277
1278 reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1279 reg_rctl &= ~IXGBE_FCTRL_UPE;
1280 if (ifp->if_flags & IFF_ALLMULTI) {
1281 mcnt = IX_MAX_MCASTADDR;
1282 } else {
1283 struct ifmultiaddr *ifma;
1284
1285 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1286 if (ifma->ifma_addr->sa_family != AF_LINK)
1287 continue;
1288 if (mcnt == IX_MAX_MCASTADDR)
1289 break;
1290 mcnt++;
1291 }
1292 }
1293 if (mcnt < IX_MAX_MCASTADDR)
1294 reg_rctl &= ~IXGBE_FCTRL_MPE;
1295 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1296
1297 if (ifp->if_flags & IFF_PROMISC) {
1298 reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1299 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1300 } else if (ifp->if_flags & IFF_ALLMULTI) {
1301 reg_rctl |= IXGBE_FCTRL_MPE;
1302 reg_rctl &= ~IXGBE_FCTRL_UPE;
1303 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1304 }
1305}
1306
1307static void
1308ix_set_multi(struct ix_softc *sc)
1309{
1310 struct ifnet *ifp = &sc->arpcom.ac_if;
1311 struct ifmultiaddr *ifma;
1312 uint32_t fctrl;
1313 uint8_t *mta;
1314 int mcnt = 0;
1315
1316 mta = sc->mta;
1317 bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR);
1318
1319 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1320 if (ifma->ifma_addr->sa_family != AF_LINK)
1321 continue;
1322 if (mcnt == IX_MAX_MCASTADDR)
1323 break;
1324 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1325 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1326 IXGBE_ETH_LENGTH_OF_ADDRESS);
1327 mcnt++;
1328 }
1329
1330 fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1331 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1332 if (ifp->if_flags & IFF_PROMISC) {
1333 fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1334 } else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1335 fctrl |= IXGBE_FCTRL_MPE;
1336 fctrl &= ~IXGBE_FCTRL_UPE;
1337 } else {
1338 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1339 }
1340 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1341
1342 if (mcnt < IX_MAX_MCASTADDR) {
1343 ixgbe_update_mc_addr_list(&sc->hw,
1344 mta, mcnt, ix_mc_array_itr, TRUE);
1345 }
1346}
1347
1348/*
1349 * This is an iterator function now needed by the multicast
1350 * shared code. It simply feeds the shared code routine the
1351 * addresses in the array of ix_set_multi() one by one.
1352 */
1353static uint8_t *
1354ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1355{
1356 uint8_t *addr = *update_ptr;
1357 uint8_t *newptr;
1358 *vmdq = 0;
1359
1360 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1361 *update_ptr = newptr;
1362 return addr;
1363}
1364
1365static void
1366ix_timer(void *arg)
1367{
1368 struct ix_softc *sc = arg;
1369
1370 lwkt_serialize_enter(&sc->main_serialize);
1371
1372 if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1373 lwkt_serialize_exit(&sc->main_serialize);
1374 return;
1375 }
1376
1377 /* Check for pluggable optics */
1378 if (sc->sfp_probe) {
1379 if (!ix_sfp_probe(sc))
1380 goto done; /* Nothing to do */
1381 }
1382
1383 ix_update_link_status(sc);
1384 ix_update_stats(sc);
1385
1386done:
1387 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1388 lwkt_serialize_exit(&sc->main_serialize);
1389}
1390
1391static void
1392ix_update_link_status(struct ix_softc *sc)
1393{
1394 struct ifnet *ifp = &sc->arpcom.ac_if;
1395
1396 if (sc->link_up) {
1397 if (sc->link_active == FALSE) {
1398 if (bootverbose) {
1399 if_printf(ifp, "Link is up %d Gbps %s\n",
1400 sc->link_speed == 128 ? 10 : 1,
1401 "Full Duplex");
1402 }
1403 sc->link_active = TRUE;
1404
1405 /* Update any Flow Control changes */
1406 ixgbe_fc_enable(&sc->hw);
1407
1408 ifp->if_link_state = LINK_STATE_UP;
1409 if_link_state_change(ifp);
1410 }
1411 } else { /* Link down */
1412 if (sc->link_active == TRUE) {
1413 if (bootverbose)
1414 if_printf(ifp, "Link is Down\n");
1415 ifp->if_link_state = LINK_STATE_DOWN;
1416 if_link_state_change(ifp);
1417
1418 sc->link_active = FALSE;
1419 }
1420 }
1421}
1422
1423static void
1424ix_stop(struct ix_softc *sc)
1425{
1426 struct ixgbe_hw *hw = &sc->hw;
1427 struct ifnet *ifp = &sc->arpcom.ac_if;
1428 int i;
1429
1430 ASSERT_IFNET_SERIALIZED_ALL(ifp);
1431
1432 ix_disable_intr(sc);
1433 callout_stop(&sc->timer);
1434
1435 ifp->if_flags &= ~IFF_RUNNING;
1436 for (i = 0; i < sc->tx_ring_cnt; ++i) {
4a648aef
SZ
1437 struct ix_tx_ring *txr = &sc->tx_rings[i];
1438
1439 ifsq_clr_oactive(txr->tx_ifsq);
1440 ifsq_watchdog_stop(&txr->tx_watchdog);
1441 txr->tx_flags &= ~IX_TXFLAG_ENABLED;
79251f5e
SZ
1442 }
1443
1444 ixgbe_reset_hw(hw);
1445 hw->adapter_stopped = FALSE;
1446 ixgbe_stop_adapter(hw);
1447 if (hw->mac.type == ixgbe_mac_82599EB)
1448 ixgbe_stop_mac_link_on_d3_82599(hw);
1449 /* Turn off the laser - noop with no optics */
1450 ixgbe_disable_tx_laser(hw);
1451
1452 /* Update the stack */
1453 sc->link_up = FALSE;
1454 ix_update_link_status(sc);
1455
1456 /* Reprogram the RAR[0] in case user changed it. */
1457 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1458
1459 for (i = 0; i < sc->tx_ring_cnt; ++i)
1460 ix_free_tx_ring(&sc->tx_rings[i]);
1461
1462 for (i = 0; i < sc->rx_ring_cnt; ++i)
1463 ix_free_rx_ring(&sc->rx_rings[i]);
1464}
1465
1466static void
1467ix_setup_optics(struct ix_softc *sc)
1468{
1469 struct ixgbe_hw *hw = &sc->hw;
1470 int layer;
1471
1472 layer = ixgbe_get_supported_physical_layer(hw);
1473
1474 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
1475 sc->optics = IFM_10G_T;
1476 return;
1477 }
1478
1479 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
1480 sc->optics = IFM_1000_T;
1481 return;
1482 }
1483
1484 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
1485 sc->optics = IFM_1000_SX;
1486 return;
1487 }
1488
1489 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
1490 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
1491 sc->optics = IFM_10G_LR;
1492 return;
1493 }
1494
1495 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
1496 sc->optics = IFM_10G_SR;
1497 return;
1498 }
1499
1500 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
1501 sc->optics = IFM_10G_TWINAX;
1502 return;
1503 }
1504
1505 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
1506 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
1507 sc->optics = IFM_10G_CX4;
1508 return;
1509 }
1510
1511 /* If we get here just set the default */
1512 sc->optics = IFM_ETHER | IFM_AUTO;
1513}
1514
79251f5e
SZ
1515static void
1516ix_setup_ifp(struct ix_softc *sc)
1517{
1518 struct ixgbe_hw *hw = &sc->hw;
1519 struct ifnet *ifp = &sc->arpcom.ac_if;
1520 int i;
1521
1522 ifp->if_baudrate = IF_Gbps(10UL);
1523
1524 ifp->if_softc = sc;
1525 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1526 ifp->if_init = ix_init;
1527 ifp->if_ioctl = ix_ioctl;
1528 ifp->if_start = ix_start;
1529 ifp->if_serialize = ix_serialize;
1530 ifp->if_deserialize = ix_deserialize;
1531 ifp->if_tryserialize = ix_tryserialize;
1532#ifdef INVARIANTS
1533 ifp->if_serialize_assert = ix_serialize_assert;
1534#endif
4a648aef
SZ
1535#ifdef IFPOLL_ENABLE
1536 ifp->if_npoll = ix_npoll;
1537#endif
79251f5e 1538
189a0ff3
SZ
1539 /* Increase TSO burst length */
1540 ifp->if_tsolen = (8 * ETHERMTU);
1541
79251f5e
SZ
1542 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1543 ifq_set_ready(&ifp->if_snd);
1544 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1545
1546 ifp->if_mapsubq = ifq_mapsubq_mask;
1547 ifq_set_subq_mask(&ifp->if_snd, 0);
1548
1549 ether_ifattach(ifp, hw->mac.addr, NULL);
1550
1551 ifp->if_capabilities =
1552 IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1553 if (IX_ENABLE_HWRSS(sc))
1554 ifp->if_capabilities |= IFCAP_RSS;
1555 ifp->if_capenable = ifp->if_capabilities;
1556 ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1557
1558 /*
1559 * Tell the upper layer(s) we support long frames.
1560 */
1561 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1562
1563 /* Setup TX rings and subqueues */
1564 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1565 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1566 struct ix_tx_ring *txr = &sc->tx_rings[i];
1567
1568 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1569 ifsq_set_priv(ifsq, txr);
1570 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1571 txr->tx_ifsq = ifsq;
1572
1573 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1574 }
1575
1576 /*
1577 * Specify the media types supported by this adapter and register
1578 * callbacks to update media and link information
1579 */
1580 ifmedia_add(&sc->media, IFM_ETHER | sc->optics, 0, NULL);
1581 ifmedia_set(&sc->media, IFM_ETHER | sc->optics);
1582 if (hw->device_id == IXGBE_DEV_ID_82598AT) {
1583 ifmedia_add(&sc->media,
1584 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
1585 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_T, 0, NULL);
1586 }
1587 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1588 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
1589}
1590
1591static boolean_t
1592ix_is_sfp(const struct ixgbe_hw *hw)
1593{
1594 switch (hw->phy.type) {
1595 case ixgbe_phy_sfp_avago:
1596 case ixgbe_phy_sfp_ftl:
1597 case ixgbe_phy_sfp_intel:
1598 case ixgbe_phy_sfp_unknown:
1599 case ixgbe_phy_sfp_passive_tyco:
1600 case ixgbe_phy_sfp_passive_unknown:
1601 return TRUE;
1602 default:
1603 return FALSE;
1604 }
1605}
1606
1607static void
1608ix_config_link(struct ix_softc *sc)
1609{
1610 struct ixgbe_hw *hw = &sc->hw;
1611 boolean_t sfp;
1612
1613 sfp = ix_is_sfp(hw);
1614 if (sfp) {
1615 if (hw->phy.multispeed_fiber) {
1616 hw->mac.ops.setup_sfp(hw);
1617 ixgbe_enable_tx_laser(hw);
1618 ix_handle_msf(sc);
1619 } else {
1620 ix_handle_mod(sc);
1621 }
1622 } else {
1623 uint32_t autoneg, err = 0;
1624
1625 if (hw->mac.ops.check_link != NULL) {
1626 err = ixgbe_check_link(hw, &sc->link_speed,
1627 &sc->link_up, FALSE);
1628 if (err)
1629 return;
1630 }
1631
1632 autoneg = hw->phy.autoneg_advertised;
1633 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1634 bool negotiate;
1635
1636 err = hw->mac.ops.get_link_capabilities(hw,
1637 &autoneg, &negotiate);
1638 if (err)
1639 return;
1640 }
1641
1642 if (hw->mac.ops.setup_link != NULL) {
1643 err = hw->mac.ops.setup_link(hw,
1644 autoneg, sc->link_up);
1645 if (err)
1646 return;
1647 }
1648 }
1649}
1650
1651static int
1652ix_alloc_rings(struct ix_softc *sc)
1653{
1654 int error, i;
1655
1656 /*
1657 * Create top level busdma tag
1658 */
1659 error = bus_dma_tag_create(NULL, 1, 0,
1660 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1661 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1662 &sc->parent_tag);
1663 if (error) {
1664 device_printf(sc->dev, "could not create top level DMA tag\n");
1665 return error;
1666 }
1667
1668 /*
1669 * Allocate TX descriptor rings and buffers
1670 */
1671 sc->tx_rings = kmalloc_cachealign(
1672 sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1673 M_DEVBUF, M_WAITOK | M_ZERO);
1674 for (i = 0; i < sc->tx_ring_cnt; ++i) {
1675 struct ix_tx_ring *txr = &sc->tx_rings[i];
1676
1677 txr->tx_sc = sc;
1678 txr->tx_idx = i;
189a0ff3 1679 txr->tx_intr_vec = -1;
79251f5e
SZ
1680 lwkt_serialize_init(&txr->tx_serialize);
1681
1682 error = ix_create_tx_ring(txr);
1683 if (error)
1684 return error;
1685 }
1686
1687 /*
1688 * Allocate RX descriptor rings and buffers
1689 */
1690 sc->rx_rings = kmalloc_cachealign(
1691 sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1692 M_DEVBUF, M_WAITOK | M_ZERO);
1693 for (i = 0; i < sc->rx_ring_cnt; ++i) {
1694 struct ix_rx_ring *rxr = &sc->rx_rings[i];
1695
1696 rxr->rx_sc = sc;
1697 rxr->rx_idx = i;
189a0ff3 1698 rxr->rx_intr_vec = -1;
79251f5e
SZ
1699 lwkt_serialize_init(&rxr->rx_serialize);
1700
1701 error = ix_create_rx_ring(rxr);
1702 if (error)
1703 return error;
1704 }
1705
1706 return 0;
1707}
1708
1709static int
1710ix_create_tx_ring(struct ix_tx_ring *txr)
1711{
1712 int error, i, tsize, ntxd;
1713
1714 /*
1715 * Validate number of transmit descriptors. It must not exceed
1716 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1717 */
1718 ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1719 if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1720 ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1721 device_printf(txr->tx_sc->dev,
1722 "Using %d TX descriptors instead of %d!\n",
1723 IX_DEF_TXD, ntxd);
1724 txr->tx_ndesc = IX_DEF_TXD;
1725 } else {
1726 txr->tx_ndesc = ntxd;
1727 }
1728
1729 /*
1730 * Allocate TX head write-back buffer
1731 */
1732 txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1733 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1734 &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1735 if (txr->tx_hdr == NULL) {
1736 device_printf(txr->tx_sc->dev,
1737 "Unable to allocate TX head write-back buffer\n");
1738 return ENOMEM;
1739 }
1740
1741 /*
1742 * Allocate TX descriptor ring
1743 */
1744 tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
1745 IX_DBA_ALIGN);
1746 txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1747 IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
1748 &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
1749 if (txr->tx_base == NULL) {
1750 device_printf(txr->tx_sc->dev,
1751 "Unable to allocate TX Descriptor memory\n");
1752 return ENOMEM;
1753 }
1754
1755 tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
1756 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
1757
1758 /*
1759 * Create DMA tag for TX buffers
1760 */
1761 error = bus_dma_tag_create(txr->tx_sc->parent_tag,
1762 1, 0, /* alignment, bounds */
1763 BUS_SPACE_MAXADDR, /* lowaddr */
1764 BUS_SPACE_MAXADDR, /* highaddr */
1765 NULL, NULL, /* filter, filterarg */
1766 IX_TSO_SIZE, /* maxsize */
1767 IX_MAX_SCATTER, /* nsegments */
1768 PAGE_SIZE, /* maxsegsize */
1769 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
1770 BUS_DMA_ONEBPAGE, /* flags */
1771 &txr->tx_tag);
1772 if (error) {
1773 device_printf(txr->tx_sc->dev,
1774 "Unable to allocate TX DMA tag\n");
1775 kfree(txr->tx_buf, M_DEVBUF);
1776 txr->tx_buf = NULL;
1777 return error;
1778 }
1779
1780 /*
1781 * Create DMA maps for TX buffers
1782 */
1783 for (i = 0; i < txr->tx_ndesc; ++i) {
1784 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1785
1786 error = bus_dmamap_create(txr->tx_tag,
1787 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
1788 if (error) {
1789 device_printf(txr->tx_sc->dev,
1790 "Unable to create TX DMA map\n");
1791 ix_destroy_tx_ring(txr, i);
1792 return error;
1793 }
1794 }
1795
1796 /*
1797 * Initialize various watermark
1798 */
1799 txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
1800 txr->tx_intr_nsegs = txr->tx_ndesc / 16;
1801
1802 return 0;
1803}
1804
1805static void
1806ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
1807{
1808 int i;
1809
1810 if (txr->tx_hdr != NULL) {
1811 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
1812 bus_dmamem_free(txr->tx_hdr_dtag,
1813 __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
1814 bus_dma_tag_destroy(txr->tx_hdr_dtag);
1815 txr->tx_hdr = NULL;
1816 }
1817
1818 if (txr->tx_base != NULL) {
1819 bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
1820 bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
1821 txr->tx_base_map);
1822 bus_dma_tag_destroy(txr->tx_base_dtag);
1823 txr->tx_base = NULL;
1824 }
1825
1826 if (txr->tx_buf == NULL)
1827 return;
1828
1829 for (i = 0; i < ndesc; ++i) {
1830 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
1831
1832 KKASSERT(txbuf->m_head == NULL);
1833 bus_dmamap_destroy(txr->tx_tag, txbuf->map);
1834 }
1835 bus_dma_tag_destroy(txr->tx_tag);
1836
1837 kfree(txr->tx_buf, M_DEVBUF);
1838 txr->tx_buf = NULL;
1839}
1840
1841static void
1842ix_init_tx_ring(struct ix_tx_ring *txr)
1843{
1844 /* Clear the old ring contents */
1845 bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
1846
1847 /* Clear TX head write-back buffer */
1848 *(txr->tx_hdr) = 0;
1849
1850 /* Reset indices */
1851 txr->tx_next_avail = 0;
1852 txr->tx_next_clean = 0;
1853 txr->tx_nsegs = 0;
1854
1855 /* Set number of descriptors available */
1856 txr->tx_avail = txr->tx_ndesc;
4a648aef
SZ
1857
1858 /* Enable this TX ring */
1859 txr->tx_flags |= IX_TXFLAG_ENABLED;
79251f5e
SZ
1860}
1861
1862static void
1863ix_init_tx_unit(struct ix_softc *sc)
1864{
1865 struct ixgbe_hw *hw = &sc->hw;
1866 int i;
1867
1868 /*
1869 * Setup the Base and Length of the Tx Descriptor Ring
1870 */
1871 for (i = 0; i < sc->tx_ring_inuse; ++i) {
1872 struct ix_tx_ring *txr = &sc->tx_rings[i];
1873 uint64_t tdba = txr->tx_base_paddr;
1874 uint64_t hdr_paddr = txr->tx_hdr_paddr;
1875 uint32_t txctrl;
1876
1877 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
1878 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
1879 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
1880 txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
1881
1882 /* Setup the HW Tx Head and Tail descriptor pointers */
1883 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
1884 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
1885
1886 /* Disable TX head write-back relax ordering */
1887 switch (hw->mac.type) {
1888 case ixgbe_mac_82598EB:
1889 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
1890 break;
1891 case ixgbe_mac_82599EB:
1892 case ixgbe_mac_X540:
1893 default:
1894 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
1895 break;
1896 }
1897 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
1898 switch (hw->mac.type) {
1899 case ixgbe_mac_82598EB:
1900 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
1901 break;
1902 case ixgbe_mac_82599EB:
1903 case ixgbe_mac_X540:
1904 default:
1905 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
1906 break;
1907 }
1908
1909 /* Enable TX head write-back */
1910 IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
1911 (uint32_t)(hdr_paddr >> 32));
1912 IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
1913 ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
1914 }
1915
1916 if (hw->mac.type != ixgbe_mac_82598EB) {
1917 uint32_t dmatxctl, rttdcs;
1918
1919 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
1920 dmatxctl |= IXGBE_DMATXCTL_TE;
1921 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
1922
1923 /* Disable arbiter to set MTQC */
1924 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
1925 rttdcs |= IXGBE_RTTDCS_ARBDIS;
1926 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1927
1928 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
1929
1930 /* Reenable aribter */
1931 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
1932 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
1933 }
1934}
1935
1936static int
1937ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
1938 uint32_t *cmd_type_len, uint32_t *olinfo_status)
1939{
1940 struct ixgbe_adv_tx_context_desc *TXD;
1941 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
1942 int ehdrlen, ip_hlen = 0, ctxd;
1943 boolean_t offload = TRUE;
1944
1945 /* First check if TSO is to be used */
1946 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
1947 return ix_tso_ctx_setup(txr, mp,
1948 cmd_type_len, olinfo_status);
1949 }
1950
1951 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
1952 offload = FALSE;
1953
1954 /* Indicate the whole packet as payload when not doing TSO */
1955 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
1956
1957 /*
1958 * In advanced descriptors the vlan tag must be placed into the
1959 * context descriptor. Hence we need to make one even if not
1960 * doing checksum offloads.
1961 */
1962 if (mp->m_flags & M_VLANTAG) {
1963 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
1964 IXGBE_ADVTXD_VLAN_SHIFT;
1965 } else if (!offload) {
1966 /* No TX descriptor is consumed */
1967 return 0;
1968 }
1969
1970 /* Set the ether header length */
1971 ehdrlen = mp->m_pkthdr.csum_lhlen;
1972 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
1973 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1974
1975 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
1976 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1977 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1978 ip_hlen = mp->m_pkthdr.csum_iphlen;
1979 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
1980 }
1981 vlan_macip_lens |= ip_hlen;
1982
1983 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1984 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
1985 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1986 else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
1987 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
1988
1989 if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
1990 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1991
1992 /* Now ready a context descriptor */
1993 ctxd = txr->tx_next_avail;
1994 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1995
1996 /* Now copy bits into descriptor */
1997 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1998 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1999 TXD->seqnum_seed = htole32(0);
2000 TXD->mss_l4len_idx = htole32(0);
2001
2002 /* We've consumed the first desc, adjust counters */
2003 if (++ctxd == txr->tx_ndesc)
2004 ctxd = 0;
2005 txr->tx_next_avail = ctxd;
2006 --txr->tx_avail;
2007
2008 /* One TX descriptor is consumed */
2009 return 1;
2010}
2011
2012static int
2013ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2014 uint32_t *cmd_type_len, uint32_t *olinfo_status)
2015{
2016 struct ixgbe_adv_tx_context_desc *TXD;
2017 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2018 uint32_t mss_l4len_idx = 0, paylen;
2019 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2020
2021 ehdrlen = mp->m_pkthdr.csum_lhlen;
2022 KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2023
2024 ip_hlen = mp->m_pkthdr.csum_iphlen;
2025 KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2026
2027 tcp_hlen = mp->m_pkthdr.csum_thlen;
2028 KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2029
2030 ctxd = txr->tx_next_avail;
2031 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2032
2033 if (mp->m_flags & M_VLANTAG) {
2034 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2035 IXGBE_ADVTXD_VLAN_SHIFT;
2036 }
2037 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2038 vlan_macip_lens |= ip_hlen;
2039 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2040
2041 /* ADV DTYPE TUCMD */
2042 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2043 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2044 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2045 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2046
2047 /* MSS L4LEN IDX */
2048 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2049 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2050 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2051
2052 TXD->seqnum_seed = htole32(0);
2053
2054 if (++ctxd == txr->tx_ndesc)
2055 ctxd = 0;
2056
2057 txr->tx_avail--;
2058 txr->tx_next_avail = ctxd;
2059
2060 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2061
2062 /* This is used in the transmit desc in encap */
2063 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2064
2065 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2066 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2067 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2068
2069 /* One TX descriptor is consumed */
2070 return 1;
2071}
2072
2073static void
189a0ff3 2074ix_txeof(struct ix_tx_ring *txr, int hdr)
79251f5e 2075{
189a0ff3 2076 int first, avail;
79251f5e
SZ
2077
2078 if (txr->tx_avail == txr->tx_ndesc)
2079 return;
2080
2081 first = txr->tx_next_clean;
79251f5e
SZ
2082 if (first == hdr)
2083 return;
2084
2085 avail = txr->tx_avail;
2086 while (first != hdr) {
2087 struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2088
2089 ++avail;
2090 if (txbuf->m_head) {
2091 bus_dmamap_unload(txr->tx_tag, txbuf->map);
2092 m_freem(txbuf->m_head);
2093 txbuf->m_head = NULL;
79251f5e
SZ
2094 }
2095 if (++first == txr->tx_ndesc)
2096 first = 0;
2097 }
2098 txr->tx_next_clean = first;
2099 txr->tx_avail = avail;
2100
2101 if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2102 ifsq_clr_oactive(txr->tx_ifsq);
2103 txr->tx_watchdog.wd_timer = 0;
2104 }
2105}
2106
2107static int
2108ix_create_rx_ring(struct ix_rx_ring *rxr)
2109{
2110 int i, rsize, error, nrxd;
2111
2112 /*
2113 * Validate number of receive descriptors. It must not exceed
2114 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2115 */
2116 nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2117 if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2118 nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2119 device_printf(rxr->rx_sc->dev,
2120 "Using %d RX descriptors instead of %d!\n",
2121 IX_DEF_RXD, nrxd);
2122 rxr->rx_ndesc = IX_DEF_RXD;
2123 } else {
2124 rxr->rx_ndesc = nrxd;
2125 }
2126
2127 /*
2128 * Allocate RX descriptor ring
2129 */
2130 rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2131 IX_DBA_ALIGN);
2132 rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2133 IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2134 &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2135 if (rxr->rx_base == NULL) {
2136 device_printf(rxr->rx_sc->dev,
2137 "Unable to allocate TX Descriptor memory\n");
2138 return ENOMEM;
2139 }
2140
2141 rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2142 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2143
2144 /*
2145 * Create DMA tag for RX buffers
2146 */
2147 error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2148 1, 0, /* alignment, bounds */
2149 BUS_SPACE_MAXADDR, /* lowaddr */
2150 BUS_SPACE_MAXADDR, /* highaddr */
2151 NULL, NULL, /* filter, filterarg */
2152 PAGE_SIZE, /* maxsize */
2153 1, /* nsegments */
2154 PAGE_SIZE, /* maxsegsize */
2155 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2156 &rxr->rx_tag);
2157 if (error) {
2158 device_printf(rxr->rx_sc->dev,
2159 "Unable to create RX DMA tag\n");
2160 kfree(rxr->rx_buf, M_DEVBUF);
2161 rxr->rx_buf = NULL;
2162 return error;
2163 }
2164
2165 /*
2166 * Create spare DMA map for RX buffers
2167 */
2168 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2169 &rxr->rx_sparemap);
2170 if (error) {
2171 device_printf(rxr->rx_sc->dev,
2172 "Unable to create spare RX DMA map\n");
2173 bus_dma_tag_destroy(rxr->rx_tag);
2174 kfree(rxr->rx_buf, M_DEVBUF);
2175 rxr->rx_buf = NULL;
2176 return error;
2177 }
2178
2179 /*
2180 * Create DMA maps for RX buffers
2181 */
2182 for (i = 0; i < rxr->rx_ndesc; ++i) {
2183 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2184
2185 error = bus_dmamap_create(rxr->rx_tag,
2186 BUS_DMA_WAITOK, &rxbuf->map);
2187 if (error) {
2188 device_printf(rxr->rx_sc->dev,
2189 "Unable to create RX dma map\n");
2190 ix_destroy_rx_ring(rxr, i);
2191 return error;
2192 }
2193 }
2194
2195 /*
2196 * Initialize various watermark
2197 */
2198 rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2199
2200 return 0;
2201}
2202
2203static void
2204ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2205{
2206 int i;
2207
2208 if (rxr->rx_base != NULL) {
2209 bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2210 bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2211 rxr->rx_base_map);
2212 bus_dma_tag_destroy(rxr->rx_base_dtag);
2213 rxr->rx_base = NULL;
2214 }
2215
2216 if (rxr->rx_buf == NULL)
2217 return;
2218
2219 for (i = 0; i < ndesc; ++i) {
2220 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2221
2222 KKASSERT(rxbuf->m_head == NULL);
2223 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2224 }
2225 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2226 bus_dma_tag_destroy(rxr->rx_tag);
2227
2228 kfree(rxr->rx_buf, M_DEVBUF);
2229 rxr->rx_buf = NULL;
2230}
2231
2232/*
2233** Used to detect a descriptor that has
2234** been merged by Hardware RSC.
2235*/
2236static __inline uint32_t
2237ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2238{
2239 return (le32toh(rx->wb.lower.lo_dword.data) &
2240 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2241}
2242
2243#if 0
2244/*********************************************************************
2245 *
2246 * Initialize Hardware RSC (LRO) feature on 82599
2247 * for an RX ring, this is toggled by the LRO capability
2248 * even though it is transparent to the stack.
2249 *
2250 * NOTE: since this HW feature only works with IPV4 and
2251 * our testing has shown soft LRO to be as effective
2252 * I have decided to disable this by default.
2253 *
2254 **********************************************************************/
2255static void
2256ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2257{
2258 struct ix_softc *sc = rxr->rx_sc;
2259 struct ixgbe_hw *hw = &sc->hw;
2260 uint32_t rscctrl, rdrxctl;
2261
2262#if 0
2263 /* If turning LRO/RSC off we need to disable it */
2264 if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2265 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2266 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2267 return;
2268 }
2269#endif
2270
2271 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2272 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2273 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2274 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2275 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2276
2277 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2278 rscctrl |= IXGBE_RSCCTL_RSCEN;
2279 /*
2280 ** Limit the total number of descriptors that
2281 ** can be combined, so it does not exceed 64K
2282 */
2283 if (rxr->mbuf_sz == MCLBYTES)
2284 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2285 else if (rxr->mbuf_sz == MJUMPAGESIZE)
2286 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2287 else if (rxr->mbuf_sz == MJUM9BYTES)
2288 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2289 else /* Using 16K cluster */
2290 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2291
2292 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2293
2294 /* Enable TCP header recognition */
2295 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2296 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2297 IXGBE_PSRTYPE_TCPHDR));
2298
2299 /* Disable RSC for ACK packets */
2300 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2301 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2302
2303 rxr->hw_rsc = TRUE;
2304}
2305#endif
2306
2307static int
2308ix_init_rx_ring(struct ix_rx_ring *rxr)
2309{
2310 int i;
2311
2312 /* Clear the ring contents */
2313 bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2314
2315 /* XXX we need JUMPAGESIZE for RSC too */
2316 if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2317 rxr->rx_mbuf_sz = MCLBYTES;
2318 else
2319 rxr->rx_mbuf_sz = MJUMPAGESIZE;
2320
2321 /* Now replenish the mbufs */
2322 for (i = 0; i < rxr->rx_ndesc; ++i) {
2323 int error;
2324
2325 error = ix_newbuf(rxr, i, TRUE);
2326 if (error)
2327 return error;
2328 }
2329
2330 /* Setup our descriptor indices */
2331 rxr->rx_next_check = 0;
2332 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2333
2334#if 0
2335 /*
2336 ** Now set up the LRO interface:
2337 */
2338 if (ixgbe_rsc_enable)
2339 ix_setup_hw_rsc(rxr);
2340#endif
2341
2342 return 0;
2343}
2344
2345#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2346
2347#define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2348
2349static void
2350ix_init_rx_unit(struct ix_softc *sc)
2351{
2352 struct ixgbe_hw *hw = &sc->hw;
2353 struct ifnet *ifp = &sc->arpcom.ac_if;
2354 uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg;
2355 int i;
2356
2357 /*
2358 * Make sure receives are disabled while setting up the descriptor ring
2359 */
2360 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
2361 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
2362
2363 /* Enable broadcasts */
2364 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2365 fctrl |= IXGBE_FCTRL_BAM;
2366 fctrl |= IXGBE_FCTRL_DPF;
2367 fctrl |= IXGBE_FCTRL_PMCF;
2368 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2369
2370 /* Set for Jumbo Frames? */
2371 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2372 if (ifp->if_mtu > ETHERMTU)
2373 hlreg |= IXGBE_HLREG0_JUMBOEN;
2374 else
2375 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2376 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2377
2378 KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2379 bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2380 IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2381
2382 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2383 struct ix_rx_ring *rxr = &sc->rx_rings[i];
2384 uint64_t rdba = rxr->rx_base_paddr;
2385 uint32_t srrctl;
2386
2387 /* Setup the Base and Length of the Rx Descriptor Ring */
2388 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2389 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2390 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2391 rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2392
2393 /*
2394 * Set up the SRRCTL register
2395 */
2396 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2397
2398 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2399 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2400 srrctl |= bufsz;
2401 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2402 if (sc->rx_ring_inuse > 1) {
2403 /* See the commend near ix_enable_rx_drop() */
2404 switch (sc->fc) {
2405 case ixgbe_fc_rx_pause:
2406 case ixgbe_fc_tx_pause:
2407 case ixgbe_fc_full:
2408 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2409 if (i == 0 && bootverbose) {
2410 if_printf(ifp, "flow control %d, "
2411 "disable RX drop\n", sc->fc);
2412 }
2413 break;
2414
2415 case ixgbe_fc_none:
2416 srrctl |= IXGBE_SRRCTL_DROP_EN;
2417 if (i == 0 && bootverbose) {
2418 if_printf(ifp, "flow control %d, "
2419 "enable RX drop\n", sc->fc);
2420 }
2421 break;
2422
2423 default:
2424 break;
2425 }
2426 }
2427 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2428
2429 /* Setup the HW Rx Head and Tail Descriptor Pointers */
2430 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2431 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2432 }
2433
2434 if (sc->hw.mac.type != ixgbe_mac_82598EB)
2435 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2436
2437 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2438
2439 /*
2440 * Setup RSS
2441 */
2442 if (IX_ENABLE_HWRSS(sc)) {
2443 uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2444 int j, r;
2445
2446 /*
2447 * NOTE:
2448 * When we reach here, RSS has already been disabled
2449 * in ix_stop(), so we could safely configure RSS key
2450 * and redirect table.
2451 */
2452
2453 /*
2454 * Configure RSS key
2455 */
2456 toeplitz_get_key(key, sizeof(key));
2457 for (i = 0; i < IX_NRSSRK; ++i) {
2458 uint32_t rssrk;
2459
2460 rssrk = IX_RSSRK_VAL(key, i);
2461 IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2462 i, rssrk);
2463
2464 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2465 }
2466
2467 /*
2468 * Configure RSS redirect table in following fashion:
2469 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
2470 */
2471 r = 0;
2472 for (j = 0; j < IX_NRETA; ++j) {
2473 uint32_t reta = 0;
2474
2475 for (i = 0; i < IX_RETA_SIZE; ++i) {
2476 uint32_t q;
2477
2478 q = r % sc->rx_ring_inuse;
2479 reta |= q << (8 * i);
2480 ++r;
2481 }
2482 IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2483 IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2484 }
2485
2486 /*
2487 * Enable multiple receive queues.
2488 * Enable IPv4 RSS standard hash functions.
2489 */
2490 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2491 IXGBE_MRQC_RSSEN |
2492 IXGBE_MRQC_RSS_FIELD_IPV4 |
2493 IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2494
2495 /*
2496 * NOTE:
2497 * PCSD must be enabled to enable multiple
2498 * receive queues.
2499 */
2500 rxcsum |= IXGBE_RXCSUM_PCSD;
2501 }
2502
2503 if (ifp->if_capenable & IFCAP_RXCSUM)
2504 rxcsum |= IXGBE_RXCSUM_PCSD;
2505
2506 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2507}
2508
2509static __inline void
2510ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2511{
2512 if (--i < 0)
2513 i = rxr->rx_ndesc - 1;
2514 IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2515}
2516
2517static __inline void
2518ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2519{
2520 if ((ptype &
2521 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2522 /* Not IPv4 */
2523 return;
2524 }
2525
2526 if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2527 IXGBE_RXD_STAT_IPCS)
2528 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2529
2530 if ((ptype &
2531 (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2532 /*
2533 * - Neither TCP nor UDP
2534 * - IPv4 fragment
2535 */
2536 return;
2537 }
2538
2539 if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2540 IXGBE_RXD_STAT_L4CS) {
2541 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2542 CSUM_FRAG_NOT_CHECKED;
2543 mp->m_pkthdr.csum_data = htons(0xffff);
2544 }
2545}
2546
2547static __inline struct pktinfo *
2548ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2549 uint32_t hash, uint32_t hashtype, uint32_t ptype)
2550{
2551 switch (hashtype) {
2552 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2553 pi->pi_netisr = NETISR_IP;
2554 pi->pi_flags = 0;
2555 pi->pi_l3proto = IPPROTO_TCP;
2556 break;
2557
2558 case IXGBE_RXDADV_RSSTYPE_IPV4:
2559 if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2560 /* Not UDP or is fragment */
2561 return NULL;
2562 }
2563 pi->pi_netisr = NETISR_IP;
2564 pi->pi_flags = 0;
2565 pi->pi_l3proto = IPPROTO_UDP;
2566 break;
2567
2568 default:
2569 return NULL;
2570 }
2571
2572 m->m_flags |= M_HASH;
2573 m->m_pkthdr.hash = toeplitz_hash(hash);
2574 return pi;
2575}
2576
2577static __inline void
2578ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2579{
2580 rxd->read.pkt_addr = htole64(rxbuf->paddr);
2581 rxd->wb.upper.status_error = 0;
2582}
2583
2584static void
2585ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2586{
2587 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2588
2589 /*
2590 * XXX discard may not be correct
2591 */
2592 if (eop) {
2593 IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2594 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2595 } else {
2596 rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2597 }
2598 if (rxbuf->fmp != NULL) {
2599 m_freem(rxbuf->fmp);
2600 rxbuf->fmp = NULL;
2601 rxbuf->lmp = NULL;
2602 }
2603 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2604}
2605
2606static void
4a648aef 2607ix_rxeof(struct ix_rx_ring *rxr, int count)
79251f5e
SZ
2608{
2609 struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
ff37a356 2610 int i, nsegs = 0, cpuid = mycpuid;
79251f5e
SZ
2611
2612 i = rxr->rx_next_check;
4a648aef 2613 while (count != 0) {
79251f5e
SZ
2614 struct ix_rx_buf *rxbuf, *nbuf = NULL;
2615 union ixgbe_adv_rx_desc *cur;
2616 struct mbuf *sendmp = NULL, *mp;
2617 struct pktinfo *pi = NULL, pi0;
2618 uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2619 uint16_t len;
2620 boolean_t eop;
2621
2622 cur = &rxr->rx_base[i];
2623 staterr = le32toh(cur->wb.upper.status_error);
2624
2625 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2626 break;
2627 ++nsegs;
2628
2629 rxbuf = &rxr->rx_buf[i];
2630 mp = rxbuf->m_head;
2631
2632 len = le16toh(cur->wb.upper.length);
2633 ptype = le32toh(cur->wb.lower.lo_dword.data) &
2634 IXGBE_RXDADV_PKTTYPE_MASK;
2635 hash = le32toh(cur->wb.lower.hi_dword.rss);
2636 hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2637 IXGBE_RXDADV_RSSTYPE_MASK;
4a648aef 2638
79251f5e 2639 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4a648aef
SZ
2640 if (eop)
2641 --count;
79251f5e
SZ
2642
2643 /*
2644 * Make sure bad packets are discarded
2645 */
2646 if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2647 (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2648 ix_rx_discard(rxr, i, eop);
2649 goto next_desc;
2650 }
2651
2652 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2653 if (ix_newbuf(rxr, i, FALSE) != 0) {
2654 ix_rx_discard(rxr, i, eop);
2655 goto next_desc;
2656 }
2657
2658 /*
2659 * On 82599 which supports a hardware LRO, packets
2660 * need not be fragmented across sequential descriptors,
2661 * rather the next descriptor is indicated in bits
2662 * of the descriptor. This also means that we might
2663 * proceses more than one packet at a time, something
2664 * that has never been true before, it required
2665 * eliminating global chain pointers in favor of what
2666 * we are doing here.
2667 */
2668 if (!eop) {
2669 int nextp;
2670
2671 /*
2672 * Figure out the next descriptor
2673 * of this frame.
2674 */
2675 if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2676 rsc = ix_rsc_count(cur);
2677 if (rsc) { /* Get hardware index */
2678 nextp = ((staterr &
2679 IXGBE_RXDADV_NEXTP_MASK) >>
2680 IXGBE_RXDADV_NEXTP_SHIFT);
2681 } else { /* Just sequential */
2682 nextp = i + 1;
2683 if (nextp == rxr->rx_ndesc)
2684 nextp = 0;
2685 }
2686 nbuf = &rxr->rx_buf[nextp];
2687 prefetch(nbuf);
2688 }
2689 mp->m_len = len;
2690
2691 /*
2692 * Rather than using the fmp/lmp global pointers
2693 * we now keep the head of a packet chain in the
2694 * buffer struct and pass this along from one
2695 * descriptor to the next, until we get EOP.
2696 */
2697 if (rxbuf->fmp == NULL) {
2698 mp->m_pkthdr.len = len;
2699 rxbuf->fmp = mp;
2700 rxbuf->lmp = mp;
2701 } else {
2702 rxbuf->fmp->m_pkthdr.len += len;
2703 rxbuf->lmp->m_next = mp;
2704 rxbuf->lmp = mp;
2705 }
2706
2707 if (nbuf != NULL) {
2708 /*
2709 * Not the last fragment of this frame,
2710 * pass this fragment list on
2711 */
2712 nbuf->fmp = rxbuf->fmp;
2713 nbuf->lmp = rxbuf->lmp;
2714 } else {
2715 /*
2716 * Send this frame
2717 */
2718 sendmp = rxbuf->fmp;
2719
2720 sendmp->m_pkthdr.rcvif = ifp;
2721 IFNET_STAT_INC(ifp, ipackets, 1);
2722#ifdef IX_RSS_DEBUG
2723 rxr->rx_pkts++;
2724#endif
2725
2726 /* Process vlan info */
2727 if (staterr & IXGBE_RXD_STAT_VP) {
2728 sendmp->m_pkthdr.ether_vlantag =
2729 le16toh(cur->wb.upper.vlan);
2730 sendmp->m_flags |= M_VLANTAG;
2731 }
2732 if (ifp->if_capenable & IFCAP_RXCSUM)
2733 ix_rxcsum(staterr, sendmp, ptype);
2734 if (ifp->if_capenable & IFCAP_RSS) {
2735 pi = ix_rssinfo(sendmp, &pi0,
2736 hash, hashtype, ptype);
2737 }
2738 }
2739 rxbuf->fmp = NULL;
2740 rxbuf->lmp = NULL;
2741next_desc:
2742 /* Advance our pointers to the next descriptor. */
2743 if (++i == rxr->rx_ndesc)
2744 i = 0;
2745
2746 if (sendmp != NULL)
be4134c6 2747 ifp->if_input(ifp, sendmp, pi, cpuid);
79251f5e
SZ
2748
2749 if (nsegs >= rxr->rx_wreg_nsegs) {
2750 ix_rx_refresh(rxr, i);
2751 nsegs = 0;
2752 }
2753 }
2754 rxr->rx_next_check = i;
2755
2756 if (nsegs > 0)
2757 ix_rx_refresh(rxr, i);
2758}
2759
2760static void
2761ix_set_vlan(struct ix_softc *sc)
2762{
2763 struct ixgbe_hw *hw = &sc->hw;
2764 uint32_t ctrl;
2765
2766 if (hw->mac.type == ixgbe_mac_82598EB) {
2767 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
2768 ctrl |= IXGBE_VLNCTRL_VME;
2769 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
2770 } else {
2771 int i;
2772
2773 /*
2774 * On 82599 and later chips the VLAN enable is
2775 * per queue in RXDCTL
2776 */
2777 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2778 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
2779 ctrl |= IXGBE_RXDCTL_VME;
2780 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
2781 }
2782 }
2783}
2784
2785static void
2786ix_enable_intr(struct ix_softc *sc)
2787{
2788 struct ixgbe_hw *hw = &sc->hw;
189a0ff3 2789 uint32_t fwsm;
79251f5e
SZ
2790 int i;
2791
2792 for (i = 0; i < sc->intr_cnt; ++i)
2793 lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
2794
189a0ff3 2795 sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
79251f5e
SZ
2796
2797 /* Enable Fan Failure detection */
2798 if (hw->device_id == IXGBE_DEV_ID_82598AT)
189a0ff3 2799 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
79251f5e
SZ
2800
2801 switch (sc->hw.mac.type) {
2802 case ixgbe_mac_82599EB:
189a0ff3
SZ
2803 sc->intr_mask |= IXGBE_EIMS_ECC;
2804 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
2805 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
2806 sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
79251f5e 2807 break;
189a0ff3 2808
79251f5e 2809 case ixgbe_mac_X540:
189a0ff3 2810 sc->intr_mask |= IXGBE_EIMS_ECC;
79251f5e
SZ
2811 /* Detect if Thermal Sensor is enabled */
2812 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
2813 if (fwsm & IXGBE_FWSM_TS_ENABLED)
189a0ff3 2814 sc->intr_mask |= IXGBE_EIMS_TS;
79251f5e
SZ
2815 /* FALL THROUGH */
2816 default:
2817 break;
2818 }
79251f5e 2819
189a0ff3 2820 /* With MSI-X we use auto clear for RX and TX rings */
79251f5e 2821 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
189a0ff3
SZ
2822 /*
2823 * There are no EIAC1/EIAC2 for newer chips; the related
2824 * bits for TX and RX rings > 16 are always auto clear.
2825 *
2826 * XXX which bits? There are _no_ documented EICR1 and
2827 * EICR2 at all; only EICR.
2828 */
2829 IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
79251f5e 2830 } else {
189a0ff3 2831 sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
79251f5e
SZ
2832
2833 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
2834 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
2835 sc->intr_mask |= IX_RX1_INTR_MASK;
2836 }
2837
79251f5e 2838 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
189a0ff3
SZ
2839
2840 /*
2841 * Enable RX and TX rings for MSI-X
2842 */
2843 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
2844 for (i = 0; i < sc->tx_ring_inuse; ++i) {
2845 const struct ix_tx_ring *txr = &sc->tx_rings[i];
2846
2847 if (txr->tx_intr_vec >= 0) {
2848 IXGBE_WRITE_REG(hw, txr->tx_eims,
2849 txr->tx_eims_val);
2850 }
2851 }
2852 for (i = 0; i < sc->rx_ring_inuse; ++i) {
2853 const struct ix_rx_ring *rxr = &sc->rx_rings[i];
2854
2855 KKASSERT(rxr->rx_intr_vec >= 0);
2856 IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
2857 }
2858 }
79251f5e
SZ
2859
2860 IXGBE_WRITE_FLUSH(hw);
2861}
2862
2863static void
2864ix_disable_intr(struct ix_softc *sc)
2865{
2866 int i;
2867
189a0ff3 2868 if (sc->intr_type == PCI_INTR_TYPE_MSIX)
79251f5e 2869 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
189a0ff3 2870
79251f5e
SZ
2871 if (sc->hw.mac.type == ixgbe_mac_82598EB) {
2872 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
2873 } else {
2874 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
2875 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
2876 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
2877 }
2878 IXGBE_WRITE_FLUSH(&sc->hw);
2879
2880 for (i = 0; i < sc->intr_cnt; ++i)
2881 lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
2882}
2883
2884uint16_t
2885ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg)
2886{
2887 return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
2888 reg, 2);
2889}
2890
2891void
2892ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value)
2893{
2894 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
2895 reg, value, 2);
2896}
2897
2898static void
2899ix_slot_info(struct ix_softc *sc)
2900{
2901 struct ixgbe_hw *hw = &sc->hw;
2902 device_t dev = sc->dev;
2903 struct ixgbe_mac_info *mac = &hw->mac;
2904 uint16_t link;
2905 uint32_t offset;
2906
2907 /* For most devices simply call the shared code routine */
2908 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
2909 ixgbe_get_bus_info(hw);
2910 goto display;
2911 }
2912
2913 /*
2914 * For the Quad port adapter we need to parse back
2915 * up the PCI tree to find the speed of the expansion
2916 * slot into which this adapter is plugged. A bit more work.
2917 */
2918 dev = device_get_parent(device_get_parent(dev));
2919#ifdef IXGBE_DEBUG
2920 device_printf(dev, "parent pcib = %x,%x,%x\n",
2921 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2922#endif
2923 dev = device_get_parent(device_get_parent(dev));
2924#ifdef IXGBE_DEBUG
2925 device_printf(dev, "slot pcib = %x,%x,%x\n",
2926 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
2927#endif
2928 /* Now get the PCI Express Capabilities offset */
2929 offset = pci_get_pciecap_ptr(dev);
2930 /* ...and read the Link Status Register */
2931 link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
2932 switch (link & IXGBE_PCI_LINK_WIDTH) {
2933 case IXGBE_PCI_LINK_WIDTH_1:
2934 hw->bus.width = ixgbe_bus_width_pcie_x1;
2935 break;
2936 case IXGBE_PCI_LINK_WIDTH_2:
2937 hw->bus.width = ixgbe_bus_width_pcie_x2;
2938 break;
2939 case IXGBE_PCI_LINK_WIDTH_4:
2940 hw->bus.width = ixgbe_bus_width_pcie_x4;
2941 break;
2942 case IXGBE_PCI_LINK_WIDTH_8:
2943 hw->bus.width = ixgbe_bus_width_pcie_x8;
2944 break;
2945 default:
2946 hw->bus.width = ixgbe_bus_width_unknown;
2947 break;
2948 }
2949
2950 switch (link & IXGBE_PCI_LINK_SPEED) {
2951 case IXGBE_PCI_LINK_SPEED_2500:
2952 hw->bus.speed = ixgbe_bus_speed_2500;
2953 break;
2954 case IXGBE_PCI_LINK_SPEED_5000:
2955 hw->bus.speed = ixgbe_bus_speed_5000;
2956 break;
2957 case IXGBE_PCI_LINK_SPEED_8000:
2958 hw->bus.speed = ixgbe_bus_speed_8000;
2959 break;
2960 default:
2961 hw->bus.speed = ixgbe_bus_speed_unknown;
2962 break;
2963 }
2964
2965 mac->ops.set_lan_id(hw);
2966
2967display:
2968 device_printf(dev, "PCI Express Bus: Speed %s %s\n",
2969 hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
2970 hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
2971 hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
2972 hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
2973 hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
2974 hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
2975
2976 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
2977 hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
2978 hw->bus.speed == ixgbe_bus_speed_2500) {
2979 device_printf(dev, "For optimal performance a x8 "
2980 "PCIE, or x4 PCIE Gen2 slot is required.\n");
2981 } else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
2982 hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
2983 hw->bus.speed < ixgbe_bus_speed_8000) {
2984 device_printf(dev, "For optimal performance a x8 "
2985 "PCIE Gen3 slot is required.\n");
2986 }
2987}
2988
2989/*
2990 * TODO comment is incorrect
2991 *
2992 * Setup the correct IVAR register for a particular MSIX interrupt
2993 * - entry is the register array entry
2994 * - vector is the MSIX vector for this queue
2995 * - type is RX/TX/MISC
2996 */
2997static void
2998ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
2999 int8_t type)
3000{
3001 struct ixgbe_hw *hw = &sc->hw;
3002 uint32_t ivar, index;
3003
3004 vector |= IXGBE_IVAR_ALLOC_VAL;
3005
3006 switch (hw->mac.type) {
3007 case ixgbe_mac_82598EB:
3008 if (type == -1)
3009 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3010 else
3011 entry += (type * 64);
3012 index = (entry >> 2) & 0x1F;
3013 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3014 ivar &= ~(0xFF << (8 * (entry & 0x3)));
3015 ivar |= (vector << (8 * (entry & 0x3)));
3016 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3017 break;
3018
3019 case ixgbe_mac_82599EB:
3020 case ixgbe_mac_X540:
3021 if (type == -1) { /* MISC IVAR */
3022 index = (entry & 1) * 8;
3023 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3024 ivar &= ~(0xFF << index);
3025 ivar |= (vector << index);
3026 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3027 } else { /* RX/TX IVARS */
3028 index = (16 * (entry & 1)) + (8 * type);
3029 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3030 ivar &= ~(0xFF << index);
3031 ivar |= (vector << index);
3032 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3033 }
3034
3035 default:
3036 break;
3037 }
3038}
3039
79251f5e
SZ
3040static boolean_t
3041ix_sfp_probe(struct ix_softc *sc)
3042{
3043 struct ixgbe_hw *hw = &sc->hw;
3044
3045 if (hw->phy.type == ixgbe_phy_nl &&
3046 hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3047 int32_t ret;
3048
3049 ret = hw->phy.ops.identify_sfp(hw);
3050 if (ret)
3051 return FALSE;
3052
3053 ret = hw->phy.ops.reset(hw);
3054 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3055 if_printf(&sc->arpcom.ac_if,
3056 "Unsupported SFP+ module detected! "
3057 "Reload driver with supported module.\n");
3058 sc->sfp_probe = FALSE;
3059 return FALSE;
3060 }
3061 if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3062
3063 /* We now have supported optics */
3064 sc->sfp_probe = FALSE;
3065 /* Set the optics type so system reports correctly */
3066 ix_setup_optics(sc);
3067
3068 return TRUE;
3069 }
3070 return FALSE;
3071}
3072
3073static void
3074ix_handle_link(struct ix_softc *sc)
3075{
3076 ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3077 ix_update_link_status(sc);
3078}
3079
3080/*
3081 * Handling SFP module
3082 */
3083static void
3084ix_handle_mod(struct ix_softc *sc)
3085{
3086 struct ixgbe_hw *hw = &sc->hw;
3087 uint32_t err;
3088
3089 err = hw->phy.ops.identify_sfp(hw);
3090 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3091 if_printf(&sc->arpcom.ac_if,
3092 "Unsupported SFP+ module type was detected.\n");
3093 return;
3094 }
3095 err = hw->mac.ops.setup_sfp(hw);
3096 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3097 if_printf(&sc->arpcom.ac_if,
3098 "Setup failure - unsupported SFP+ module type.\n");
3099 return;
3100 }
3101 ix_handle_msf(sc);
3102}
3103
3104/*
3105 * Handling MSF (multispeed fiber)
3106 */
3107static void
3108ix_handle_msf(struct ix_softc *sc)
3109{
3110 struct ixgbe_hw *hw = &sc->hw;
3111 uint32_t autoneg;
3112
3113 autoneg = hw->phy.autoneg_advertised;
3114 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3115 bool negotiate;
3116
3117 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3118 }
3119 if (hw->mac.ops.setup_link != NULL)
3120 hw->mac.ops.setup_link(hw, autoneg, TRUE);
3121}
3122
3123static void
3124ix_update_stats(struct ix_softc *sc)
3125{
3126 struct ifnet *ifp = &sc->arpcom.ac_if;
3127 struct ixgbe_hw *hw = &sc->hw;
3128 uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3129 uint64_t total_missed_rx = 0;
3130 int i;
3131
3132 sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3133 sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3134 sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3135 sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3136
3137 /*
3138 * Note: These are for the 8 possible traffic classes, which
3139 * in current implementation is unused, therefore only 0 should
3140 * read real data.
3141 */
3142 for (i = 0; i < 8; i++) {
3143 uint32_t mp;
3144
3145 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
3146 /* missed_rx tallies misses for the gprc workaround */
3147 missed_rx += mp;
3148 /* global total per queue */
3149 sc->stats.mpc[i] += mp;
3150
3151 /* Running comprehensive total for stats display */
3152 total_missed_rx += sc->stats.mpc[i];
3153
3154 if (hw->mac.type == ixgbe_mac_82598EB) {
3155 sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
3156 sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
3157 sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
3158 sc->stats.pxonrxc[i] +=
3159 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
3160 } else {
3161 sc->stats.pxonrxc[i] +=
3162 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
3163 }
3164 sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
3165 sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
3166 sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
3167 sc->stats.pxon2offc[i] +=
3168 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
3169 }
3170 for (i = 0; i < 16; i++) {
3171 sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3172 sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3173 sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3174 }
3175 sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3176 sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3177 sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3178
3179 /* Hardware workaround, gprc counts missed packets */
3180 sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3181 sc->stats.gprc -= missed_rx;
3182
3183 if (hw->mac.type != ixgbe_mac_82598EB) {
3184 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3185 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3186 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3187 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3188 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3189 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3190 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3191 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3192 } else {
3193 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3194 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3195 /* 82598 only has a counter in the high register */
3196 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3197 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3198 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3199 }
3200
3201 /*
3202 * Workaround: mprc hardware is incorrectly counting
3203 * broadcasts, so for now we subtract those.
3204 */
3205 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3206 sc->stats.bprc += bprc;
3207 sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3208 if (hw->mac.type == ixgbe_mac_82598EB)
3209 sc->stats.mprc -= bprc;
3210
3211 sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3212 sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3213 sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3214 sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3215 sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3216 sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3217
3218 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3219 sc->stats.lxontxc += lxon;
3220 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3221 sc->stats.lxofftxc += lxoff;
3222 total = lxon + lxoff;
3223
3224 sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3225 sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3226 sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3227 sc->stats.gptc -= total;
3228 sc->stats.mptc -= total;
3229 sc->stats.ptc64 -= total;
3230 sc->stats.gotc -= total * ETHER_MIN_LEN;
3231
3232 sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3233 sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3234 sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3235 sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3236 sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3237 sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3238 sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3239 sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3240 sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3241 sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3242 sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3243 sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3244 sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3245 sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3246 sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3247 sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3248 sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3249 sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3250 /* Only read FCOE on 82599 */
3251 if (hw->mac.type != ixgbe_mac_82598EB) {
3252 sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3253 sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3254 sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3255 sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3256 sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3257 }
3258
3259 /* Rx Errors */
3260 IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3261 IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3262}
3263
3264#if 0
3265/*
3266 * Add sysctl variables, one per statistic, to the system.
3267 */
3268static void
3269ix_add_hw_stats(struct ix_softc *sc)
3270{
3271
3272 device_t dev = sc->dev;
3273
3274 struct ix_tx_ring *txr = sc->tx_rings;
3275 struct ix_rx_ring *rxr = sc->rx_rings;
3276
3277 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3278 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3279 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3280 struct ixgbe_hw_stats *stats = &sc->stats;
3281
3282 struct sysctl_oid *stat_node, *queue_node;
3283 struct sysctl_oid_list *stat_list, *queue_list;
3284
3285#define QUEUE_NAME_LEN 32
3286 char namebuf[QUEUE_NAME_LEN];
3287
3288 /* MAC stats get the own sub node */
3289
3290 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3291 CTLFLAG_RD, NULL, "MAC Statistics");
3292 stat_list = SYSCTL_CHILDREN(stat_node);
3293
3294 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3295 CTLFLAG_RD, &stats->crcerrs,
3296 "CRC Errors");
3297 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3298 CTLFLAG_RD, &stats->illerrc,
3299 "Illegal Byte Errors");
3300 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3301 CTLFLAG_RD, &stats->errbc,
3302 "Byte Errors");
3303 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3304 CTLFLAG_RD, &stats->mspdc,
3305 "MAC Short Packets Discarded");
3306 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3307 CTLFLAG_RD, &stats->mlfc,
3308 "MAC Local Faults");
3309 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3310 CTLFLAG_RD, &stats->mrfc,
3311 "MAC Remote Faults");
3312 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3313 CTLFLAG_RD, &stats->rlec,
3314 "Receive Length Errors");
3315
3316 /* Flow Control stats */
3317 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3318 CTLFLAG_RD, &stats->lxontxc,
3319 "Link XON Transmitted");
3320 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3321 CTLFLAG_RD, &stats->lxonrxc,
3322 "Link XON Received");
3323 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3324 CTLFLAG_RD, &stats->lxofftxc,
3325 "Link XOFF Transmitted");
3326 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3327 CTLFLAG_RD, &stats->lxoffrxc,
3328 "Link XOFF Received");
3329
3330 /* Packet Reception Stats */
3331 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3332 CTLFLAG_RD, &stats->tor,
3333 "Total Octets Received");
3334 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3335 CTLFLAG_RD, &stats->gorc,
3336 "Good Octets Received");
3337 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3338 CTLFLAG_RD, &stats->tpr,
3339 "Total Packets Received");
3340 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3341 CTLFLAG_RD, &stats->gprc,
3342 "Good Packets Received");
3343 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3344 CTLFLAG_RD, &stats->mprc,
3345 "Multicast Packets Received");
3346 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3347 CTLFLAG_RD, &stats->bprc,
3348 "Broadcast Packets Received");
3349 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3350 CTLFLAG_RD, &stats->prc64,
3351 "64 byte frames received ");
3352 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3353 CTLFLAG_RD, &stats->prc127,
3354 "65-127 byte frames received");
3355 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3356 CTLFLAG_RD, &stats->prc255,
3357 "128-255 byte frames received");
3358 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3359 CTLFLAG_RD, &stats->prc511,
3360 "256-511 byte frames received");
3361 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3362 CTLFLAG_RD, &stats->prc1023,
3363 "512-1023 byte frames received");
3364 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3365 CTLFLAG_RD, &stats->prc1522,
3366 "1023-1522 byte frames received");
3367 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3368 CTLFLAG_RD, &stats->ruc,
3369 "Receive Undersized");
3370 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3371 CTLFLAG_RD, &stats->rfc,
3372 "Fragmented Packets Received ");
3373 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3374 CTLFLAG_RD, &stats->roc,
3375 "Oversized Packets Received");
3376 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3377 CTLFLAG_RD, &stats->rjc,
3378 "Received Jabber");
3379 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3380 CTLFLAG_RD, &stats->mngprc,
3381 "Management Packets Received");
3382 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3383 CTLFLAG_RD, &stats->mngptc,
3384 "Management Packets Dropped");
3385 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3386 CTLFLAG_RD, &stats->xec,
3387 "Checksum Errors");
3388
3389 /* Packet Transmission Stats */
3390 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3391 CTLFLAG_RD, &stats->gotc,
3392 "Good Octets Transmitted");
3393 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3394 CTLFLAG_RD, &stats->tpt,
3395 "Total Packets Transmitted");
3396 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3397 CTLFLAG_RD, &stats->gptc,
3398 "Good Packets Transmitted");
3399 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3400 CTLFLAG_RD, &stats->bptc,
3401 "Broadcast Packets Transmitted");
3402 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3403 CTLFLAG_RD, &stats->mptc,
3404 "Multicast Packets Transmitted");
3405 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3406 CTLFLAG_RD, &stats->mngptc,
3407 "Management Packets Transmitted");
3408 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3409 CTLFLAG_RD, &stats->ptc64,
3410 "64 byte frames transmitted ");
3411 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3412 CTLFLAG_RD, &stats->ptc127,
3413 "65-127 byte frames transmitted");
3414 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3415 CTLFLAG_RD, &stats->ptc255,
3416 "128-255 byte frames transmitted");
3417 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3418 CTLFLAG_RD, &stats->ptc511,
3419 "256-511 byte frames transmitted");
3420 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3421 CTLFLAG_RD, &stats->ptc1023,
3422 "512-1023 byte frames transmitted");
3423 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3424 CTLFLAG_RD, &stats->ptc1522,
3425 "1024-1522 byte frames transmitted");
3426}
3427#endif
3428
3429/*
3430 * Enable the hardware to drop packets when the buffer is full.
3431 * This is useful when multiple RX rings are used, so that no
3432 * single RX ring being full stalls the entire RX engine. We
3433 * only enable this when multiple RX rings are used and when
3434 * flow control is disabled.
3435 */
3436static void
3437ix_enable_rx_drop(struct ix_softc *sc)
3438{
3439 struct ixgbe_hw *hw = &sc->hw;
3440 int i;
3441
3442 if (bootverbose) {
3443 if_printf(&sc->arpcom.ac_if,
3444 "flow control %d, enable RX drop\n", sc->fc);
3445 }
3446
3447 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3448 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3449
3450 srrctl |= IXGBE_SRRCTL_DROP_EN;
3451 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3452 }
3453}
3454
3455static void
3456ix_disable_rx_drop(struct ix_softc *sc)
3457{
3458 struct ixgbe_hw *hw = &sc->hw;
3459 int i;
3460
3461 if (bootverbose) {
3462 if_printf(&sc->arpcom.ac_if,
3463 "flow control %d, disable RX drop\n", sc->fc);
3464 }
3465
3466 for (i = 0; i < sc->rx_ring_inuse; ++i) {
3467 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3468
3469 srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3470 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3471 }
3472}
3473
3474static int
3475ix_sysctl_flowctrl(SYSCTL_HANDLER_ARGS)
3476{
3477 struct ix_softc *sc = (struct ix_softc *)arg1;
3478 struct ifnet *ifp = &sc->arpcom.ac_if;
3479 int error, fc;
3480
3481 fc = sc->fc;
3482 error = sysctl_handle_int(oidp, &fc, 0, req);
3483 if (error || req->newptr == NULL)
3484 return error;
3485
3486 switch (fc) {
3487 case ixgbe_fc_rx_pause:
3488 case ixgbe_fc_tx_pause:
3489 case ixgbe_fc_full:
3490 case ixgbe_fc_none:
3491 break;
3492 default:
3493 return EINVAL;
3494 }
3495
3496 ifnet_serialize_all(ifp);
3497
3498 /* Don't bother if it's not changed */
3499 if (sc->fc == fc)
3500 goto done;
3501 sc->fc = fc;
3502
3503 /* Don't do anything, if the interface is not up yet */
3504 if ((ifp->if_flags & IFF_RUNNING) == 0)
3505 goto done;
3506
3507 if (sc->rx_ring_inuse > 1) {
3508 switch (sc->fc) {
3509 case ixgbe_fc_rx_pause:
3510 case ixgbe_fc_tx_pause:
3511 case ixgbe_fc_full:
3512 ix_disable_rx_drop(sc);
3513 break;
3514
3515 case ixgbe_fc_none:
3516 ix_enable_rx_drop(sc);
3517 break;
3518
3519 default:
3520 panic("leading fc check mismatch");
3521 }
3522 }
3523
3524 sc->hw.fc.requested_mode = sc->fc;
3525 /* Don't autoneg if forcing a value */
3526 sc->hw.fc.disable_fc_autoneg = TRUE;
3527 ixgbe_fc_enable(&sc->hw);
3528
3529done:
3530 ifnet_deserialize_all(ifp);
3531 return error;
3532}
3533
3534#ifdef foo
3535/* XXX not working properly w/ 82599 connected w/ DAC */
3536/* XXX only work after the interface is up */
3537static int
3538ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS)
3539{
3540 struct ix_softc *sc = (struct ix_softc *)arg1;
3541 struct ifnet *ifp = &sc->arpcom.ac_if;
3542 struct ixgbe_hw *hw = &sc->hw;
3543 ixgbe_link_speed speed;
3544 int error, advspeed;
3545
3546 advspeed = sc->advspeed;
3547 error = sysctl_handle_int(oidp, &advspeed, 0, req);
3548 if (error || req->newptr == NULL)
3549 return error;
3550
3551 if (!(hw->phy.media_type == ixgbe_media_type_copper ||
3552 hw->phy.multispeed_fiber))
3553 return EOPNOTSUPP;
3554 if (hw->mac.ops.setup_link == NULL)
3555 return EOPNOTSUPP;
3556
3557 switch (advspeed) {
3558 case 0: /* auto */
3559 speed = IXGBE_LINK_SPEED_UNKNOWN;
3560 break;
3561
3562 case 1: /* 1Gb */
3563 speed = IXGBE_LINK_SPEED_1GB_FULL;
3564 break;
3565
3566 case 2: /* 100Mb */
3567 speed = IXGBE_LINK_SPEED_100_FULL;
3568 break;
3569
3570 case 3: /* 1Gb/10Gb */
3571 speed = IXGBE_LINK_SPEED_1GB_FULL |
3572 IXGBE_LINK_SPEED_10GB_FULL;
3573 break;
3574
3575 default:
3576 return EINVAL;
3577 }
3578
3579 ifnet_serialize_all(ifp);
3580
3581 if (sc->advspeed == advspeed) /* no change */
3582 goto done;
3583
3584 if ((speed & IXGBE_LINK_SPEED_100_FULL) &&
3585 hw->mac.type != ixgbe_mac_X540) {
3586 error = EOPNOTSUPP;
3587 goto done;
3588 }
3589
3590 sc->advspeed = advspeed;
3591
3592 if ((ifp->if_flags & IFF_RUNNING) == 0)
3593 goto done;
3594
3595 if (speed == IXGBE_LINK_SPEED_UNKNOWN) {
3596 ix_config_link(sc);
3597 } else {
3598 hw->mac.autotry_restart = TRUE;
3599 hw->mac.ops.setup_link(hw, speed, sc->link_up);
3600 }
3601
3602done:
3603 ifnet_deserialize_all(ifp);
3604 return error;
3605}
3606#endif
3607
3608static void
3609ix_setup_serialize(struct ix_softc *sc)
3610{
3611 int i = 0, j;
3612
3613 /* Main + RX + TX */
3614 sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3615 sc->serializes =
3616 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3617 M_DEVBUF, M_WAITOK | M_ZERO);
3618
3619 /*
3620 * Setup serializes
3621 *
3622 * NOTE: Order is critical
3623 */
3624
3625 KKASSERT(i < sc->nserialize);
3626 sc->serializes[i++] = &sc->main_serialize;
3627
3628 for (j = 0; j < sc->rx_ring_cnt; ++j) {
3629 KKASSERT(i < sc->nserialize);
3630 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3631 }
3632
3633 for (j = 0; j < sc->tx_ring_cnt; ++j) {
3634 KKASSERT(i < sc->nserialize);
3635 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3636 }
3637
3638 KKASSERT(i == sc->nserialize);
3639}
3640
3641static int
3642ix_alloc_intr(struct ix_softc *sc)
3643{
3644 struct ix_intr_data *intr;
3645 u_int intr_flags;
189a0ff3
SZ
3646
3647 ix_alloc_msix(sc);
3648 if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3649 ix_set_ring_inuse(sc, FALSE);
3650 return 0;
3651 }
79251f5e
SZ
3652
3653 if (sc->intr_data != NULL)
3654 kfree(sc->intr_data, M_DEVBUF);
3655
3656 sc->intr_cnt = 1;
3657 sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3658 M_WAITOK | M_ZERO);
3659 intr = &sc->intr_data[0];
3660
3661 /*
3662 * Allocate MSI/legacy interrupt resource
3663 */
3664 sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3665 &intr->intr_rid, &intr_flags);
3666
3667 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3668 &intr->intr_rid, intr_flags);
3669 if (intr->intr_res == NULL) {
3670 device_printf(sc->dev, "Unable to allocate bus resource: "
3671 "interrupt\n");
3672 return ENXIO;
3673 }
3674
3675 intr->intr_serialize = &sc->main_serialize;
3676 intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3677 intr->intr_func = ix_intr;
3678 intr->intr_funcarg = sc;
3679 intr->intr_rate = IX_INTR_RATE;
3680 intr->intr_use = IX_INTR_USE_RXTX;
3681
189a0ff3
SZ
3682 sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3683 sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
79251f5e 3684
189a0ff3 3685 sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
79251f5e
SZ
3686
3687 ix_set_ring_inuse(sc, FALSE);
3688
3689 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3690 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3691 sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3692
3693 return 0;
3694}
3695
3696static void
3697ix_free_intr(struct ix_softc *sc)
3698{
3699 if (sc->intr_data == NULL)
3700 return;
3701
3702 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3703 struct ix_intr_data *intr = &sc->intr_data[0];
3704
3705 KKASSERT(sc->intr_cnt == 1);
3706 if (intr->intr_res != NULL) {
3707 bus_release_resource(sc->dev, SYS_RES_IRQ,
3708 intr->intr_rid, intr->intr_res);
3709 }
3710 if (sc->intr_type == PCI_INTR_TYPE_MSI)
3711 pci_release_msi(sc->dev);
189a0ff3
SZ
3712
3713 kfree(sc->intr_data, M_DEVBUF);
79251f5e 3714 } else {
189a0ff3 3715 ix_free_msix(sc, TRUE);
79251f5e 3716 }
79251f5e
SZ
3717}
3718
3719static void
3720ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
3721{
3722 sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
3723 sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
3724 if (bootverbose) {
3725 if_printf(&sc->arpcom.ac_if,
3726 "RX rings %d/%d, TX rings %d/%d\n",
3727 sc->rx_ring_inuse, sc->rx_ring_cnt,
3728 sc->tx_ring_inuse, sc->tx_ring_cnt);
3729 }
3730}
3731
3732static int
3733ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
3734{
3735 if (!IX_ENABLE_HWRSS(sc))
3736 return 1;
3737
3738 if (polling)
3739 return sc->rx_ring_cnt;
3740 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3741 return IX_MIN_RXRING_RSS;
3742 else
189a0ff3 3743 return sc->rx_ring_msix;
79251f5e
SZ
3744}
3745
3746static int
3747ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
3748{
3749 if (!IX_ENABLE_HWTSS(sc))
3750 return 1;
3751
3752 if (polling)
3753 return sc->tx_ring_cnt;
3754 else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
3755 return 1;
3756 else
189a0ff3 3757 return sc->tx_ring_msix;
79251f5e
SZ
3758}
3759
3760static int
3761ix_setup_intr(struct ix_softc *sc)
3762{
3763 int i;
3764
3765 for (i = 0; i < sc->intr_cnt; ++i) {
3766 struct ix_intr_data *intr = &sc->intr_data[i];
3767 int error;
3768
3769 error = bus_setup_intr_descr(sc->dev, intr->intr_res,
3770 INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
3771 &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
3772 if (error) {
3773 device_printf(sc->dev, "can't setup %dth intr\n", i);
3774 ix_teardown_intr(sc, i);
3775 return error;
3776 }
3777 }
3778 return 0;
3779}
3780
3781static void
3782ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
3783{
3784 int i;
3785
3786 if (sc->intr_data == NULL)
3787 return;
3788
3789 for (i = 0; i < intr_cnt; ++i) {
3790 struct ix_intr_data *intr = &sc->intr_data[i];
3791
3792 bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
3793 }
3794}
3795
3796static void
3797ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
3798{
3799 struct ix_softc *sc = ifp->if_softc;
3800
3801 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
3802}
3803
3804static void
3805ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3806{
3807 struct ix_softc *sc = ifp->if_softc;
3808
3809 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
3810}
3811
3812static int
3813ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
3814{
3815 struct ix_softc *sc = ifp->if_softc;
3816
3817 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
3818}
3819
3820#ifdef INVARIANTS
3821
3822static void
3823ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
3824 boolean_t serialized)
3825{
3826 struct ix_softc *sc = ifp->if_softc;
3827
3828 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
3829 serialized);
3830}
3831
3832#endif /* INVARIANTS */
3833
3834static void
3835ix_free_rings(struct ix_softc *sc)
3836{
3837 int i;
3838
3839 if (sc->tx_rings != NULL) {
3840 for (i = 0; i < sc->tx_ring_cnt; ++i) {
3841 struct ix_tx_ring *txr = &sc->tx_rings[i];
3842
3843 ix_destroy_tx_ring(txr, txr->tx_ndesc);
3844 }
3845 kfree(sc->tx_rings, M_DEVBUF);
3846 }
3847
3848 if (sc->rx_rings != NULL) {
3849 for (i =0; i < sc->rx_ring_cnt; ++i) {
3850 struct ix_rx_ring *rxr = &sc->rx_rings[i];
3851
3852 ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
3853 }
3854 kfree(sc->rx_rings, M_DEVBUF);
3855 }
3856
3857 if (sc->parent_tag != NULL)
3858 bus_dma_tag_destroy(sc->parent_tag);
3859}
3860
3861static void
3862ix_watchdog(struct ifaltq_subque *ifsq)
3863{
3864 struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
3865 struct ifnet *ifp = ifsq_get_ifp(ifsq);
3866 struct ix_softc *sc = ifp->if_softc;
3867 int i;
3868
3869 KKASSERT(txr->tx_ifsq == ifsq);
3870 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3871
3872 /*
3873 * If the interface has been paused then don't do the watchdog check
3874 */
3875 if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
3876 txr->tx_watchdog.wd_timer = 5;
3877 return;
3878 }
3879
3880 if_printf(ifp, "Watchdog timeout -- resetting\n");
3881 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
3882 IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
3883 IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
3884 if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
3885 txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
3886
3887 ix_init(sc);
3888 for (i = 0; i < sc->tx_ring_inuse; ++i)
3889 ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
3890}
3891
3892static void
3893ix_free_tx_ring(struct ix_tx_ring *txr)
3894{
3895 int i;
3896
3897 for (i = 0; i < txr->tx_ndesc; ++i) {
3898 struct ix_tx_buf *txbuf = &txr->tx_buf[i];
3899
3900 if (txbuf->m_head != NULL) {
3901 bus_dmamap_unload(txr->tx_tag, txbuf->map);
3902 m_freem(txbuf->m_head);
3903 txbuf->m_head = NULL;
3904 }
3905 }
3906}
3907
3908static void
3909ix_free_rx_ring(struct ix_rx_ring *rxr)
3910{
3911 int i;
3912
3913 for (i = 0; i < rxr->rx_ndesc; ++i) {
3914 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
3915
3916 if (rxbuf->fmp != NULL) {
3917 m_freem(rxbuf->fmp);
3918 rxbuf->fmp = NULL;
3919 rxbuf->lmp = NULL;
3920 } else {
3921 KKASSERT(rxbuf->lmp == NULL);
3922 }
3923 if (rxbuf->m_head != NULL) {
3924 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3925 m_freem(rxbuf->m_head);
3926 rxbuf->m_head = NULL;
3927 }
3928 }
3929}
3930
3931static int
3932ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
3933{
3934 struct mbuf *m;
3935 bus_dma_segment_t seg;
3936 bus_dmamap_t map;
3937 struct ix_rx_buf *rxbuf;
3938 int flags, error, nseg;
3939
b5523eac 3940 flags = M_NOWAIT;
79251f5e 3941 if (__predict_false(wait))
b5523eac 3942 flags = M_WAITOK;
79251f5e
SZ
3943
3944 m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
3945 if (m == NULL) {
3946 if (wait) {
3947 if_printf(&rxr->rx_sc->arpcom.ac_if,
3948 "Unable to allocate RX mbuf\n");
3949 }
3950 return ENOBUFS;
3951 }
3952 m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
3953
3954 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
3955 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
3956 if (error) {
3957 m_freem(m);
3958 if (wait) {
3959 if_printf(&rxr->rx_sc->arpcom.ac_if,
3960 "Unable to load RX mbuf\n");
3961 }
3962 return error;
3963 }
3964
3965 rxbuf = &rxr->rx_buf[i];
3966 if (rxbuf->m_head != NULL)
3967 bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
3968
3969 map = rxbuf->map;
3970 rxbuf->map = rxr->rx_sparemap;
3971 rxr->rx_sparemap = map;
3972
3973 rxbuf->m_head = m;
3974 rxbuf->paddr = seg.ds_addr;
3975
3976 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
3977 return 0;
3978}
3979
3980static void
3981ix_add_sysctl(struct ix_softc *sc)
3982{
26595b18
SW
3983 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
3984 struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
79251f5e
SZ
3985#ifdef IX_RSS_DEBUG
3986 char node[32];
020afcaa 3987 int i;
79251f5e 3988#endif
79251f5e 3989
26595b18 3990 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
79251f5e 3991 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
26595b18 3992 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
3993 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
3994 "# of RX rings used");
26595b18 3995 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
79251f5e 3996 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
26595b18 3997 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
3998 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
3999 "# of TX rings used");
26595b18 4000 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4001 OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4002 sc, 0, ix_sysctl_rxd, "I",
4003 "# of RX descs");
26595b18 4004 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4005 OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4006 sc, 0, ix_sysctl_txd, "I",
4007 "# of TX descs");
26595b18 4008 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4009 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4010 sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4011 "# of segments sent before write to hardware register");
26595b18 4012 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4013 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4014 sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4015 "# of received segments sent before write to hardware register");
26595b18 4016 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4017 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4018 sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4019 "# of segments per TX interrupt");
4020
4a648aef 4021#ifdef IFPOLL_ENABLE
26595b18 4022 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4a648aef
SZ
4023 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW,
4024 sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset");
26595b18 4025 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4a648aef
SZ
4026 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW,
4027 sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset");
4028#endif
4029
189a0ff3
SZ
4030#define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4031do { \
4032 ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4033 ix_sysctl_##name, #use " interrupt rate"); \
4034} while (0)
4035
4036 IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4037 IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4038 IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4039 IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4040
4041#undef IX_ADD_INTR_RATE_SYSCTL
79251f5e
SZ
4042
4043#ifdef IX_RSS_DEBUG
26595b18 4044 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4045 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4046 "RSS debug level");
4047 for (i = 0; i < sc->rx_ring_cnt; ++i) {
4048 ksnprintf(node, sizeof(node), "rx%d_pkt", i);
26595b18
SW
4049 SYSCTL_ADD_ULONG(ctx,
4050 SYSCTL_CHILDREN(tree), OID_AUTO, node,
79251f5e
SZ
4051 CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4052 }
4053#endif
4054
26595b18 4055 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4056 OID_AUTO, "flowctrl", CTLTYPE_INT | CTLFLAG_RW,
4057 sc, 0, ix_sysctl_flowctrl, "I",
4058 "flow control, 0 - off, 1 - rx pause, 2 - tx pause, 3 - full");
4059
4060#ifdef foo
4061 /*
4062 * Allow a kind of speed control by forcing the autoneg
4063 * advertised speed list to only a certain value, this
4064 * supports 1G on 82599 devices, and 100Mb on X540.
4065 */
26595b18 4066 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
79251f5e
SZ
4067 OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW,
4068 sc, 0, ix_sysctl_advspeed, "I",
4069 "advertised link speed, "
4070 "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb");
4071#endif
4072
4073#if 0
4074 ix_add_hw_stats(sc);
4075#endif
4076
4077}
4078
4079static int
4080ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4081{
4082 struct ix_softc *sc = (void *)arg1;
4083 struct ifnet *ifp = &sc->arpcom.ac_if;
4084 int error, nsegs, i;
4085
4086 nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4087 error = sysctl_handle_int(oidp, &nsegs, 0, req);
4088 if (error || req->newptr == NULL)
4089 return error;