Revert "kernel: Disable attributes that take no effect."
[dragonfly.git] / sys / dev / virtual / virtio / net / if_vtnet.c
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* Driver for VirtIO network devices. */
28
29 #include <sys/cdefs.h>
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sockio.h>
35 #include <sys/mbuf.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/taskqueue.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/serialize.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46
47 #include <machine/limits.h>
48
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #include <net/if_media.h>
55 #include <net/vlan/if_vlan_var.h>
56 #include <net/vlan/if_vlan_ether.h>
57 #include <net/ifq_var.h>
58
59 #include <net/bpf.h>
60
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip6.h>
65 #include <netinet/udp.h>
66 #include <netinet/tcp.h>
67
68 #include <dev/virtual/virtio/virtio/virtio.h>
69 #include <dev/virtual/virtio/virtio/virtqueue.h>
70 #include <dev/virtual/virtio/net/virtio_net.h>
71 #include <dev/virtual/virtio/net/if_vtnetvar.h>
72
73 #include "virtio_if.h"
74
75 MALLOC_DEFINE(M_VTNET, "VTNET_TX", "Outgoing VTNET TX frame header");
76
77 static int      vtnet_probe(device_t);
78 static int      vtnet_attach(device_t);
79 static int      vtnet_detach(device_t);
80 static int      vtnet_suspend(device_t);
81 static int      vtnet_resume(device_t);
82 static int      vtnet_shutdown(device_t);
83 static int      vtnet_config_change(device_t);
84
85 static void     vtnet_negotiate_features(struct vtnet_softc *);
86 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
87 static void     vtnet_get_hwaddr(struct vtnet_softc *);
88 static void     vtnet_set_hwaddr(struct vtnet_softc *);
89 static int      vtnet_is_link_up(struct vtnet_softc *);
90 static void     vtnet_update_link_status(struct vtnet_softc *);
91 #if 0
92 static void     vtnet_watchdog(struct vtnet_softc *);
93 #endif
94 static void     vtnet_config_change_task(void *, int);
95 static int      vtnet_setup_interface(struct vtnet_softc *);
96 static int      vtnet_change_mtu(struct vtnet_softc *, int);
97 static int      vtnet_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
98
99 static int      vtnet_init_rx_vq(struct vtnet_softc *);
100 static void     vtnet_free_rx_mbufs(struct vtnet_softc *);
101 static void     vtnet_free_tx_mbufs(struct vtnet_softc *);
102 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
103
104 static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
105                     struct mbuf **);
106 static int      vtnet_replace_rxbuf(struct vtnet_softc *,
107                     struct mbuf *, int);
108 static int      vtnet_newbuf(struct vtnet_softc *);
109 static void     vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
110 static void     vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
111 static int      vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
112 static void     vtnet_vlan_tag_remove(struct mbuf *);
113 static int      vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
114                     struct virtio_net_hdr *);
115 static int      vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
116 static int      vtnet_rxeof(struct vtnet_softc *, int, int *);
117 static void     vtnet_rx_intr_task(void *);
118 static int      vtnet_rx_vq_intr(void *);
119
120 static void     vtnet_enqueue_txhdr(struct vtnet_softc *,
121                     struct vtnet_tx_header *);
122 static void     vtnet_txeof(struct vtnet_softc *);
123 static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
124                     struct virtio_net_hdr *);
125 static int      vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
126                     struct vtnet_tx_header *);
127 static int      vtnet_encap(struct vtnet_softc *, struct mbuf **);
128 static void     vtnet_start_locked(struct ifnet *, struct ifaltq_subque *);
129 static void     vtnet_start(struct ifnet *, struct ifaltq_subque *);
130 static void     vtnet_tick(void *);
131 static void     vtnet_tx_intr_task(void *);
132 static int      vtnet_tx_vq_intr(void *);
133
134 static void     vtnet_stop(struct vtnet_softc *);
135 static int      vtnet_virtio_reinit(struct vtnet_softc *);
136 static void     vtnet_init_locked(struct vtnet_softc *);
137 static void     vtnet_init(void *);
138
139 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
140                     struct sglist *, int, int);
141
142 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
143 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
144 static int      vtnet_set_promisc(struct vtnet_softc *, int);
145 static int      vtnet_set_allmulti(struct vtnet_softc *, int);
146 static void     vtnet_rx_filter(struct vtnet_softc *sc);
147 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
148
149 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
150 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
151 static void     vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
152 static void     vtnet_register_vlan(void *, struct ifnet *, uint16_t);
153 static void     vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
154
155 static int      vtnet_ifmedia_upd(struct ifnet *);
156 static void     vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
157
158 static void     vtnet_add_statistics(struct vtnet_softc *);
159
160 static int      vtnet_enable_rx_intr(struct vtnet_softc *);
161 static int      vtnet_enable_tx_intr(struct vtnet_softc *);
162 static void     vtnet_disable_rx_intr(struct vtnet_softc *);
163 static void     vtnet_disable_tx_intr(struct vtnet_softc *);
164
165 /* Tunables. */
166 static int vtnet_csum_disable = 0;
167 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
168 static int vtnet_tso_disable = 1;
169 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
170 static int vtnet_lro_disable = 0;
171 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
172
173 /*
174  * Reducing the number of transmit completed interrupts can
175  * improve performance. To do so, the define below keeps the
176  * Tx vq interrupt disabled and adds calls to vtnet_txeof()
177  * in the start and watchdog paths. The price to pay for this
178  * is the m_free'ing of transmitted mbufs may be delayed until
179  * the watchdog fires.
180  */
181 #define VTNET_TX_INTR_MODERATION
182
183 static struct virtio_feature_desc vtnet_feature_desc[] = {
184         { VIRTIO_NET_F_CSUM,            "TxChecksum"    },
185         { VIRTIO_NET_F_GUEST_CSUM,      "RxChecksum"    },
186         { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "DynOffload"        },
187         { VIRTIO_NET_F_MAC,             "MacAddress"    },
188         { VIRTIO_NET_F_GSO,             "TxAllGSO"      },
189         { VIRTIO_NET_F_GUEST_TSO4,      "RxTSOv4"       },
190         { VIRTIO_NET_F_GUEST_TSO6,      "RxTSOv6"       },
191         { VIRTIO_NET_F_GUEST_ECN,       "RxECN"         },
192         { VIRTIO_NET_F_GUEST_UFO,       "RxUFO"         },
193         { VIRTIO_NET_F_HOST_TSO4,       "TxTSOv4"       },
194         { VIRTIO_NET_F_HOST_TSO6,       "TxTSOv6"       },
195         { VIRTIO_NET_F_HOST_ECN,        "TxTSOECN"      },
196         { VIRTIO_NET_F_HOST_UFO,        "TxUFO"         },
197         { VIRTIO_NET_F_MRG_RXBUF,       "MrgRxBuf"      },
198         { VIRTIO_NET_F_STATUS,          "Status"        },
199         { VIRTIO_NET_F_CTRL_VQ,         "ControlVq"     },
200         { VIRTIO_NET_F_CTRL_RX,         "RxMode"        },
201         { VIRTIO_NET_F_CTRL_VLAN,       "VLanFilter"    },
202         { VIRTIO_NET_F_CTRL_RX_EXTRA,   "RxModeExtra"   },
203         { VIRTIO_NET_F_GUEST_ANNOUNCE,  "GuestAnnounce" },
204         { VIRTIO_NET_F_MQ,              "RFS"           },
205         { VIRTIO_NET_F_CTRL_MAC_ADDR,   "SetMacAddress" },
206         { 0, NULL }
207 };
208
209 static device_method_t vtnet_methods[] = {
210         /* Device methods. */
211         DEVMETHOD(device_probe,         vtnet_probe),
212         DEVMETHOD(device_attach,        vtnet_attach),
213         DEVMETHOD(device_detach,        vtnet_detach),
214         DEVMETHOD(device_suspend,       vtnet_suspend),
215         DEVMETHOD(device_resume,        vtnet_resume),
216         DEVMETHOD(device_shutdown,      vtnet_shutdown),
217
218         /* VirtIO methods. */
219         DEVMETHOD(virtio_config_change, vtnet_config_change),
220
221         DEVMETHOD_END
222 };
223
224 static driver_t vtnet_driver = {
225         "vtnet",
226         vtnet_methods,
227         sizeof(struct vtnet_softc)
228 };
229
230 static devclass_t vtnet_devclass;
231
232 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, NULL, NULL);
233 MODULE_VERSION(vtnet, 1);
234 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
235
236 static int
237 vtnet_probe(device_t dev)
238 {
239         if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
240                 return (ENXIO);
241
242         device_set_desc(dev, "VirtIO Networking Adapter");
243
244         return (BUS_PROBE_DEFAULT);
245 }
246
247 static int
248 vtnet_attach(device_t dev)
249 {
250         struct vtnet_softc *sc;
251         int error;
252
253         sc = device_get_softc(dev);
254         sc->vtnet_dev = dev;
255
256         lwkt_serialize_init(&sc->vtnet_slz);
257         callout_init(&sc->vtnet_tick_ch);
258
259         ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
260                      vtnet_ifmedia_sts);
261         ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
262         ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
263
264         vtnet_add_statistics(sc);
265         SLIST_INIT(&sc->vtnet_txhdr_free);
266
267         /* Register our feature descriptions. */
268         virtio_set_feature_desc(dev, vtnet_feature_desc);
269         vtnet_negotiate_features(sc);
270
271         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
272                 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
273
274         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
275                 /* This feature should always be negotiated. */
276                 sc->vtnet_flags |= VTNET_FLAG_MAC;
277         }
278
279         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
280                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
281                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
282         } else {
283                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
284         }
285
286         sc->vtnet_rx_mbuf_size = MCLBYTES;
287         sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
288
289         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
290                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
291
292                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
293                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
294                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
295                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
296                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
297                     virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
298                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
299         }
300
301         /* Read (or generate) the MAC address for the adapter. */
302         vtnet_get_hwaddr(sc);
303
304         error = vtnet_alloc_virtqueues(sc);
305         if (error) {
306                 device_printf(dev, "cannot allocate virtqueues\n");
307                 goto fail;
308         }
309
310         error = vtnet_setup_interface(sc);
311         if (error) {
312                 device_printf(dev, "cannot setup interface\n");
313                 goto fail;
314         }
315
316         TASK_INIT(&sc->vtnet_cfgchg_task, 0, vtnet_config_change_task, sc);
317
318         error = virtio_setup_intr(dev, &sc->vtnet_slz);
319         if (error) {
320                 device_printf(dev, "cannot setup virtqueue interrupts\n");
321                 ether_ifdetach(sc->vtnet_ifp);
322                 goto fail;
323         }
324
325         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
326                 lwkt_serialize_enter(&sc->vtnet_slz);
327                 vtnet_set_hwaddr(sc);
328                 lwkt_serialize_exit(&sc->vtnet_slz);
329         }
330
331         /*
332          * Device defaults to promiscuous mode for backwards
333          * compatibility. Turn it off if possible.
334          */
335         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
336                 lwkt_serialize_enter(&sc->vtnet_slz);
337                 if (vtnet_set_promisc(sc, 0) != 0) {
338                         sc->vtnet_ifp->if_flags |= IFF_PROMISC;
339                         device_printf(dev,
340                             "cannot disable promiscuous mode\n");
341                 }
342                 lwkt_serialize_exit(&sc->vtnet_slz);
343         } else
344                 sc->vtnet_ifp->if_flags |= IFF_PROMISC;
345
346 fail:
347         if (error)
348                 vtnet_detach(dev);
349
350         return (error);
351 }
352
353 static int
354 vtnet_detach(device_t dev)
355 {
356         struct vtnet_softc *sc;
357         struct ifnet *ifp;
358
359         sc = device_get_softc(dev);
360         ifp = sc->vtnet_ifp;
361
362         if (device_is_attached(dev)) {
363                 lwkt_serialize_enter(&sc->vtnet_slz);
364                 vtnet_stop(sc);
365                 lwkt_serialize_exit(&sc->vtnet_slz);
366
367                 callout_stop(&sc->vtnet_tick_ch);
368                 taskqueue_drain(taskqueue_swi, &sc->vtnet_cfgchg_task);
369
370                 ether_ifdetach(ifp);
371         }
372
373         if (sc->vtnet_vlan_attach != NULL) {
374                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
375                 sc->vtnet_vlan_attach = NULL;
376         }
377         if (sc->vtnet_vlan_detach != NULL) {
378                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
379                 sc->vtnet_vlan_detach = NULL;
380         }
381
382         if (ifp) {
383                 if_free(ifp);
384                 sc->vtnet_ifp = NULL;
385         }
386
387         if (sc->vtnet_rx_vq != NULL)
388                 vtnet_free_rx_mbufs(sc);
389         if (sc->vtnet_tx_vq != NULL)
390                 vtnet_free_tx_mbufs(sc);
391         if (sc->vtnet_ctrl_vq != NULL)
392                 vtnet_free_ctrl_vq(sc);
393
394         if (sc->vtnet_txhdrarea != NULL) {
395                 contigfree(sc->vtnet_txhdrarea,
396                     sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
397                     M_VTNET);
398                 sc->vtnet_txhdrarea = NULL;
399         }
400         SLIST_INIT(&sc->vtnet_txhdr_free);
401         if (sc->vtnet_macfilter != NULL) {
402                 contigfree(sc->vtnet_macfilter,
403                     sizeof(struct vtnet_mac_filter), M_DEVBUF);
404                 sc->vtnet_macfilter = NULL;
405         }
406
407         ifmedia_removeall(&sc->vtnet_media);
408
409         return (0);
410 }
411
412 static int
413 vtnet_suspend(device_t dev)
414 {
415         struct vtnet_softc *sc;
416
417         sc = device_get_softc(dev);
418
419         lwkt_serialize_enter(&sc->vtnet_slz);
420         vtnet_stop(sc);
421         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
422         lwkt_serialize_exit(&sc->vtnet_slz);
423
424         return (0);
425 }
426
427 static int
428 vtnet_resume(device_t dev)
429 {
430         struct vtnet_softc *sc;
431         struct ifnet *ifp;
432
433         sc = device_get_softc(dev);
434         ifp = sc->vtnet_ifp;
435
436         lwkt_serialize_enter(&sc->vtnet_slz);
437         if (ifp->if_flags & IFF_UP)
438                 vtnet_init_locked(sc);
439         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
440         lwkt_serialize_exit(&sc->vtnet_slz);
441
442         return (0);
443 }
444
445 static int
446 vtnet_shutdown(device_t dev)
447 {
448
449         /*
450          * Suspend already does all of what we need to
451          * do here; we just never expect to be resumed.
452          */
453         return (vtnet_suspend(dev));
454 }
455
456 static int
457 vtnet_config_change(device_t dev)
458 {
459         struct vtnet_softc *sc;
460
461         sc = device_get_softc(dev);
462
463         taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtnet_cfgchg_task);
464
465         return (1);
466 }
467
468 static void
469 vtnet_negotiate_features(struct vtnet_softc *sc)
470 {
471         device_t dev;
472         uint64_t mask, features;
473
474         dev = sc->vtnet_dev;
475         mask = 0;
476
477         if (vtnet_csum_disable)
478                 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
479
480         /*
481          * XXX DragonFly doesn't support receive checksum offload for ipv6 yet,
482          *     hence always disable the virtio feature for now.
483          * XXX We need to support the DynOffload feature, in order to
484          *     dynamically enable/disable this feature.
485          */
486         mask |= VIRTIO_NET_F_GUEST_CSUM;
487
488         /*
489          * TSO is only available when the tx checksum offload feature is also
490          * negotiated.
491          */
492         if (vtnet_csum_disable || vtnet_tso_disable)
493                 mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
494                     VIRTIO_NET_F_HOST_ECN;
495
496         if (vtnet_lro_disable)
497                 mask |= VTNET_LRO_FEATURES;
498
499         features = VTNET_FEATURES & ~mask;
500         features |= VIRTIO_F_NOTIFY_ON_EMPTY;
501         features |= VIRTIO_F_ANY_LAYOUT;
502         sc->vtnet_features = virtio_negotiate_features(dev, features);
503
504         if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
505             virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
506                 /*
507                  * LRO without mergeable buffers requires special care. This
508                  * is not ideal because every receive buffer must be large
509                  * enough to hold the maximum TCP packet, the Ethernet header,
510                  * and the header. This requires up to 34 descriptors with
511                  * MCLBYTES clusters. If we do not have indirect descriptors,
512                  * LRO is disabled since the virtqueue will not contain very
513                  * many receive buffers.
514                  */
515                 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
516                         device_printf(dev,
517                             "LRO disabled due to both mergeable buffers and "
518                             "indirect descriptors not negotiated\n");
519
520                         features &= ~VTNET_LRO_FEATURES;
521                         sc->vtnet_features =
522                             virtio_negotiate_features(dev, features);
523                 } else
524                         sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
525         }
526 }
527
528 static int
529 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
530 {
531         device_t dev;
532         struct vq_alloc_info vq_info[3];
533         int nvqs;
534
535         dev = sc->vtnet_dev;
536         nvqs = 2;
537
538         /*
539          * Indirect descriptors are not needed for the Rx
540          * virtqueue when mergeable buffers are negotiated.
541          * The header is placed inline with the data, not
542          * in a separate descriptor, and mbuf clusters are
543          * always physically contiguous.
544          */
545         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
546                 sc->vtnet_rx_nsegs = (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) ?
547                     VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
548         } else
549                 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
550
551         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
552             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
553                 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
554         else
555                 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
556
557         VQ_ALLOC_INFO_INIT(&vq_info[0], sc->vtnet_rx_nsegs,
558             vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
559             "%s receive", device_get_nameunit(dev));
560
561         VQ_ALLOC_INFO_INIT(&vq_info[1], sc->vtnet_tx_nsegs,
562             vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
563             "%s transmit", device_get_nameunit(dev));
564
565         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
566                 nvqs++;
567
568                 VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
569                     &sc->vtnet_ctrl_vq, "%s control",
570                     device_get_nameunit(dev));
571         }
572
573         return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
574 }
575
576 static int
577 vtnet_setup_interface(struct vtnet_softc *sc)
578 {
579         device_t dev;
580         struct ifnet *ifp;
581         int i;
582
583         dev = sc->vtnet_dev;
584
585         ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
586         if (ifp == NULL) {
587                 device_printf(dev, "cannot allocate ifnet structure\n");
588                 return (ENOSPC);
589         }
590
591         ifp->if_softc = sc;
592         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
593         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
594         ifp->if_init = vtnet_init;
595         ifp->if_start = vtnet_start;
596         ifp->if_ioctl = vtnet_ioctl;
597
598         sc->vtnet_rx_process_limit = virtqueue_size(sc->vtnet_rx_vq);
599         sc->vtnet_tx_size = virtqueue_size(sc->vtnet_tx_vq);
600         if (sc->vtnet_flags & VTNET_FLAG_INDIRECT)
601                 sc->vtnet_txhdrcount = sc->vtnet_tx_size;
602         else
603                 sc->vtnet_txhdrcount = (sc->vtnet_tx_size / 2) + 1;
604         sc->vtnet_txhdrarea = contigmalloc(
605             sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
606             M_VTNET, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
607         if (sc->vtnet_txhdrarea == NULL) {
608                 device_printf(dev, "cannot contigmalloc the tx headers\n");
609                 return (ENOMEM);
610         }
611         for (i = 0; i < sc->vtnet_txhdrcount; i++)
612                 vtnet_enqueue_txhdr(sc, &sc->vtnet_txhdrarea[i]);
613         sc->vtnet_macfilter = contigmalloc(
614             sizeof(struct vtnet_mac_filter),
615             M_DEVBUF, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
616         if (sc->vtnet_macfilter == NULL) {
617                 device_printf(dev,
618                     "cannot contigmalloc the mac filter table\n");
619                 return (ENOMEM);
620         }
621         ifq_set_maxlen(&ifp->if_snd, sc->vtnet_tx_size - 1);
622         ifq_set_ready(&ifp->if_snd);
623
624         ether_ifattach(ifp, sc->vtnet_hwaddr, NULL);
625
626         /* Tell the upper layer(s) we support long frames. */
627         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
628         ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
629
630         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
631                 ifp->if_capabilities |= IFCAP_TXCSUM;
632
633                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
634                         ifp->if_capabilities |= IFCAP_TSO4;
635                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
636                         ifp->if_capabilities |= IFCAP_TSO6;
637                 if (ifp->if_capabilities & IFCAP_TSO)
638                         ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
639
640                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
641                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
642         }
643
644         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
645                 ifp->if_capabilities |= IFCAP_RXCSUM;
646
647 #if 0   /* IFCAP_LRO doesn't exist in DragonFly. */
648         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
649             virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
650                 ifp->if_capabilities |= IFCAP_LRO;
651 #endif
652
653         if ((ifp->if_capabilities & IFCAP_HWCSUM) == IFCAP_HWCSUM) {
654                 /*
655                  * VirtIO does not support VLAN tagging, but we can fake
656                  * it by inserting and removing the 802.1Q header during
657                  * transmit and receive. We are then able to do checksum
658                  * offloading of VLAN frames.
659                  */
660                 ifp->if_capabilities |=
661                         IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
662         }
663
664         ifp->if_capenable = ifp->if_capabilities;
665
666         /*
667          * Capabilities after here are not enabled by default.
668          */
669
670         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
671                 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
672
673                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
674                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
675                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
676                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
677         }
678
679         return (0);
680 }
681
682 static void
683 vtnet_set_hwaddr(struct vtnet_softc *sc)
684 {
685         device_t dev;
686
687         dev = sc->vtnet_dev;
688
689         if ((sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) &&
690             (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)) {
691                 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
692                         device_printf(dev, "unable to set MAC address\n");
693         } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
694                 virtio_write_device_config(dev,
695                     offsetof(struct virtio_net_config, mac),
696                     sc->vtnet_hwaddr, ETHER_ADDR_LEN);
697         }
698 }
699
700 static void
701 vtnet_get_hwaddr(struct vtnet_softc *sc)
702 {
703         device_t dev;
704
705         dev = sc->vtnet_dev;
706
707         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
708                 /*
709                  * Generate a random locally administered unicast address.
710                  *
711                  * It would be nice to generate the same MAC address across
712                  * reboots, but it seems all the hosts currently available
713                  * support the MAC feature, so this isn't too important.
714                  */
715                 sc->vtnet_hwaddr[0] = 0xB2;
716                 karc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1);
717                 return;
718         }
719
720         virtio_read_device_config(dev,
721             offsetof(struct virtio_net_config, mac),
722             sc->vtnet_hwaddr, ETHER_ADDR_LEN);
723 }
724
725 static int
726 vtnet_is_link_up(struct vtnet_softc *sc)
727 {
728         device_t dev;
729         struct ifnet *ifp;
730         uint16_t status;
731
732         dev = sc->vtnet_dev;
733         ifp = sc->vtnet_ifp;
734
735         ASSERT_SERIALIZED(&sc->vtnet_slz);
736
737         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) {
738                 status = virtio_read_dev_config_2(dev,
739                                 offsetof(struct virtio_net_config, status));
740         } else {
741                 status = VIRTIO_NET_S_LINK_UP;
742         }
743
744         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
745 }
746
747 static void
748 vtnet_update_link_status(struct vtnet_softc *sc)
749 {
750         device_t dev;
751         struct ifnet *ifp;
752         struct ifaltq_subque *ifsq;
753         int link;
754
755         dev = sc->vtnet_dev;
756         ifp = sc->vtnet_ifp;
757         ifsq = ifq_get_subq_default(&ifp->if_snd);
758
759         link = vtnet_is_link_up(sc);
760
761         if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
762                 sc->vtnet_flags |= VTNET_FLAG_LINK;
763                 if (bootverbose)
764                         device_printf(dev, "Link is up\n");
765                 ifp->if_link_state = LINK_STATE_UP;
766                 if_link_state_change(ifp);
767                 if (!ifsq_is_empty(ifsq))
768                         vtnet_start_locked(ifp, ifsq);
769         } else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
770                 sc->vtnet_flags &= ~VTNET_FLAG_LINK;
771                 if (bootverbose)
772                         device_printf(dev, "Link is down\n");
773
774                 ifp->if_link_state = LINK_STATE_DOWN;
775                 if_link_state_change(ifp);
776         }
777 }
778
779 #if 0
780 static void
781 vtnet_watchdog(struct vtnet_softc *sc)
782 {
783         struct ifnet *ifp;
784
785         ifp = sc->vtnet_ifp;
786
787 #ifdef VTNET_TX_INTR_MODERATION
788         vtnet_txeof(sc);
789 #endif
790
791         if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
792                 return;
793
794         if_printf(ifp, "watchdog timeout -- resetting\n");
795 #ifdef VTNET_DEBUG
796         virtqueue_dump(sc->vtnet_tx_vq);
797 #endif
798         ifp->if_oerrors++;
799         ifp->if_flags &= ~IFF_RUNNING;
800         vtnet_init_locked(sc);
801 }
802 #endif
803
804 static void
805 vtnet_config_change_task(void *arg, int pending)
806 {
807         struct vtnet_softc *sc;
808
809         sc = arg;
810
811         lwkt_serialize_enter(&sc->vtnet_slz);
812         vtnet_update_link_status(sc);
813         lwkt_serialize_exit(&sc->vtnet_slz);
814 }
815
816 static int
817 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data,struct ucred *cr)
818 {
819         struct vtnet_softc *sc;
820         struct ifreq *ifr;
821         int reinit, mask, error;
822
823         sc = ifp->if_softc;
824         ifr = (struct ifreq *) data;
825         reinit = 0;
826         error = 0;
827
828         switch (cmd) {
829         case SIOCSIFMTU:
830                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
831                         error = EINVAL;
832                 else if (ifp->if_mtu != ifr->ifr_mtu) {
833                         lwkt_serialize_enter(&sc->vtnet_slz);
834                         error = vtnet_change_mtu(sc, ifr->ifr_mtu);
835                         lwkt_serialize_exit(&sc->vtnet_slz);
836                 }
837                 break;
838
839         case SIOCSIFFLAGS:
840                 lwkt_serialize_enter(&sc->vtnet_slz);
841                 if ((ifp->if_flags & IFF_UP) == 0) {
842                         if (ifp->if_flags & IFF_RUNNING)
843                                 vtnet_stop(sc);
844                 } else if (ifp->if_flags & IFF_RUNNING) {
845                         if ((ifp->if_flags ^ sc->vtnet_if_flags) &
846                             (IFF_PROMISC | IFF_ALLMULTI)) {
847                                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
848                                         vtnet_rx_filter(sc);
849                                 else
850                                         error = ENOTSUP;
851                         }
852                 } else
853                         vtnet_init_locked(sc);
854
855                 if (error == 0)
856                         sc->vtnet_if_flags = ifp->if_flags;
857                 lwkt_serialize_exit(&sc->vtnet_slz);
858                 break;
859
860         case SIOCADDMULTI:
861         case SIOCDELMULTI:
862                 lwkt_serialize_enter(&sc->vtnet_slz);
863                 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
864                     (ifp->if_flags & IFF_RUNNING))
865                         vtnet_rx_filter_mac(sc);
866                 lwkt_serialize_exit(&sc->vtnet_slz);
867                 break;
868
869         case SIOCSIFMEDIA:
870         case SIOCGIFMEDIA:
871                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
872                 break;
873
874         case SIOCSIFCAP:
875                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
876
877                 lwkt_serialize_enter(&sc->vtnet_slz);
878
879                 if (mask & IFCAP_TXCSUM) {
880                         ifp->if_capenable ^= IFCAP_TXCSUM;
881                         if (ifp->if_capenable & IFCAP_TXCSUM)
882                                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
883                         else
884                                 ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
885                 }
886
887                 if (mask & IFCAP_TSO4) {
888                         ifp->if_capenable ^= IFCAP_TSO4;
889                         if (ifp->if_capenable & IFCAP_TSO4)
890                                 ifp->if_hwassist |= CSUM_TSO;
891                         else
892                                 ifp->if_hwassist &= ~CSUM_TSO;
893                 }
894
895                 if (mask & IFCAP_RXCSUM) {
896                         ifp->if_capenable ^= IFCAP_RXCSUM;
897                         reinit = 1;
898                 }
899
900 #if 0   /* IFCAP_LRO doesn't exist in DragonFly. */
901                 if (mask & IFCAP_LRO) {
902                         ifp->if_capenable ^= IFCAP_LRO;
903                         reinit = 1;
904                 }
905 #endif
906
907                 if (mask & IFCAP_VLAN_HWFILTER) {
908                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
909                         reinit = 1;
910                 }
911
912                 if (mask & IFCAP_VLAN_HWTSO)
913                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
914
915                 if (mask & IFCAP_VLAN_HWTAGGING)
916                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
917
918                 if (reinit && (ifp->if_flags & IFF_RUNNING)) {
919                         ifp->if_flags &= ~IFF_RUNNING;
920                         vtnet_init_locked(sc);
921                 }
922                 //VLAN_CAPABILITIES(ifp);
923
924                 lwkt_serialize_exit(&sc->vtnet_slz);
925                 break;
926
927         default:
928                 error = ether_ioctl(ifp, cmd, data);
929                 break;
930         }
931
932         return (error);
933 }
934
935 static int
936 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
937 {
938         struct ifnet *ifp;
939         int new_frame_size, clsize;
940
941         ifp = sc->vtnet_ifp;
942
943         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
944                 new_frame_size = sizeof(struct vtnet_rx_header) +
945                     sizeof(struct ether_vlan_header) + new_mtu;
946
947                 if (new_frame_size > MJUM9BYTES)
948                         return (EINVAL);
949
950                 if (new_frame_size <= MCLBYTES)
951                         clsize = MCLBYTES;
952                 else
953                         clsize = MJUM9BYTES;
954         } else {
955                 new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
956                     sizeof(struct ether_vlan_header) + new_mtu;
957
958                 if (new_frame_size <= MCLBYTES)
959                         clsize = MCLBYTES;
960                 else
961                         clsize = MJUMPAGESIZE;
962         }
963
964         sc->vtnet_rx_mbuf_size = clsize;
965         sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
966         KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
967             ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
968
969         ifp->if_mtu = new_mtu;
970
971         if (ifp->if_flags & IFF_RUNNING) {
972                 ifp->if_flags &= ~IFF_RUNNING;
973                 vtnet_init_locked(sc);
974         }
975
976         return (0);
977 }
978
979 static int
980 vtnet_init_rx_vq(struct vtnet_softc *sc)
981 {
982         struct virtqueue *vq;
983         int nbufs, error;
984
985         vq = sc->vtnet_rx_vq;
986         nbufs = 0;
987         error = ENOSPC;
988
989         while (!virtqueue_full(vq)) {
990                 if ((error = vtnet_newbuf(sc)) != 0)
991                         break;
992                 nbufs++;
993         }
994
995         if (nbufs > 0) {
996                 virtqueue_notify(vq, &sc->vtnet_slz);
997
998                 /*
999                  * EMSGSIZE signifies the virtqueue did not have enough
1000                  * entries available to hold the last mbuf. This is not
1001                  * an error. We should not get ENOSPC since we check if
1002                  * the virtqueue is full before attempting to add a
1003                  * buffer.
1004                  */
1005                 if (error == EMSGSIZE)
1006                         error = 0;
1007         }
1008
1009         return (error);
1010 }
1011
1012 static void
1013 vtnet_free_rx_mbufs(struct vtnet_softc *sc)
1014 {
1015         struct virtqueue *vq;
1016         struct mbuf *m;
1017         int last;
1018
1019         vq = sc->vtnet_rx_vq;
1020         last = 0;
1021
1022         while ((m = virtqueue_drain(vq, &last)) != NULL)
1023                 m_freem(m);
1024
1025         KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
1026 }
1027
1028 static void
1029 vtnet_free_tx_mbufs(struct vtnet_softc *sc)
1030 {
1031         struct virtqueue *vq;
1032         struct vtnet_tx_header *txhdr;
1033         int last;
1034
1035         vq = sc->vtnet_tx_vq;
1036         last = 0;
1037
1038         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1039                 m_freem(txhdr->vth_mbuf);
1040                 vtnet_enqueue_txhdr(sc, txhdr);
1041         }
1042
1043         KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
1044 }
1045
1046 static void
1047 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
1048 {
1049         /*
1050          * The control virtqueue is only polled, therefore
1051          * it should already be empty.
1052          */
1053         KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
1054                 ("Ctrl Vq not empty"));
1055 }
1056
1057 static struct mbuf *
1058 vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1059 {
1060         struct mbuf *m_head, *m_tail, *m;
1061         int i, clsize;
1062
1063         clsize = sc->vtnet_rx_mbuf_size;
1064
1065         /*use getcl instead of getjcl. see  if_mxge.c comment line 2398*/
1066         //m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, clsize);
1067         m_head = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR );
1068         if (m_head == NULL)
1069                 goto fail;
1070
1071         m_head->m_len = clsize;
1072         m_tail = m_head;
1073
1074         if (nbufs > 1) {
1075                 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1076                         ("chained Rx mbuf requested without LRO_NOMRG"));
1077
1078                 for (i = 0; i < nbufs - 1; i++) {
1079                         //m = m_getjcl(M_DONTWAIT, MT_DATA, 0, clsize);
1080                         m = m_getcl(M_NOWAIT, MT_DATA, 0);
1081                         if (m == NULL)
1082                                 goto fail;
1083
1084                         m->m_len = clsize;
1085                         m_tail->m_next = m;
1086                         m_tail = m;
1087                 }
1088         }
1089
1090         if (m_tailp != NULL)
1091                 *m_tailp = m_tail;
1092
1093         return (m_head);
1094
1095 fail:
1096         sc->vtnet_stats.mbuf_alloc_failed++;
1097         m_freem(m_head);
1098
1099         return (NULL);
1100 }
1101
1102 static int
1103 vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
1104 {
1105         struct mbuf *m, *m_prev;
1106         struct mbuf *m_new, *m_tail;
1107         int len, clsize, nreplace, error;
1108
1109         m = m0;
1110         m_prev = NULL;
1111         len = len0;
1112
1113         m_tail = NULL;
1114         clsize = sc->vtnet_rx_mbuf_size;
1115         nreplace = 0;
1116
1117         if (m->m_next != NULL)
1118                 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1119                     ("chained Rx mbuf without LRO_NOMRG"));
1120
1121         /*
1122          * Since LRO_NOMRG mbuf chains are so large, we want to avoid
1123          * allocating an entire chain for each received frame. When
1124          * the received frame's length is less than that of the chain,
1125          * the unused mbufs are reassigned to the new chain.
1126          */
1127         while (len > 0) {
1128                 /*
1129                  * Something is seriously wrong if we received
1130                  * a frame larger than the mbuf chain. Drop it.
1131                  */
1132                 if (m == NULL) {
1133                         sc->vtnet_stats.rx_frame_too_large++;
1134                         return (EMSGSIZE);
1135                 }
1136
1137                 KASSERT(m->m_len == clsize,
1138                     ("mbuf length not expected cluster size: %d",
1139                     m->m_len));
1140
1141                 m->m_len = MIN(m->m_len, len);
1142                 len -= m->m_len;
1143
1144                 m_prev = m;
1145                 m = m->m_next;
1146                 nreplace++;
1147         }
1148
1149         KASSERT(m_prev != NULL, ("m_prev == NULL"));
1150         KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
1151                 ("too many replacement mbufs: %d/%d", nreplace,
1152                 sc->vtnet_rx_mbuf_count));
1153
1154         m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
1155         if (m_new == NULL) {
1156                 m_prev->m_len = clsize;
1157                 return (ENOBUFS);
1158         }
1159
1160         /*
1161          * Move unused mbufs, if any, from the original chain
1162          * onto the end of the new chain.
1163          */
1164         if (m_prev->m_next != NULL) {
1165                 m_tail->m_next = m_prev->m_next;
1166                 m_prev->m_next = NULL;
1167         }
1168
1169         error = vtnet_enqueue_rxbuf(sc, m_new);
1170         if (error) {
1171                 /*
1172                  * BAD! We could not enqueue the replacement mbuf chain. We
1173                  * must restore the m0 chain to the original state if it was
1174                  * modified so we can subsequently discard it.
1175                  *
1176                  * NOTE: The replacement is suppose to be an identical copy
1177                  * to the one just dequeued so this is an unexpected error.
1178                  */
1179                 sc->vtnet_stats.rx_enq_replacement_failed++;
1180
1181                 if (m_tail->m_next != NULL) {
1182                         m_prev->m_next = m_tail->m_next;
1183                         m_tail->m_next = NULL;
1184                 }
1185
1186                 m_prev->m_len = clsize;
1187                 m_freem(m_new);
1188         }
1189
1190         return (error);
1191 }
1192
1193 static int
1194 vtnet_newbuf(struct vtnet_softc *sc)
1195 {
1196         struct mbuf *m;
1197         int error;
1198
1199         m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
1200         if (m == NULL)
1201                 return (ENOBUFS);
1202
1203         error = vtnet_enqueue_rxbuf(sc, m);
1204         if (error)
1205                 m_freem(m);
1206
1207         return (error);
1208 }
1209
1210 static void
1211 vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
1212 {
1213         struct virtqueue *vq;
1214         struct mbuf *m;
1215
1216         vq = sc->vtnet_rx_vq;
1217
1218         while (--nbufs > 0) {
1219                 if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
1220                         break;
1221                 vtnet_discard_rxbuf(sc, m);
1222         }
1223 }
1224
1225 static void
1226 vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1227 {
1228         int error;
1229
1230         /*
1231          * Requeue the discarded mbuf. This should always be
1232          * successful since it was just dequeued.
1233          */
1234         error = vtnet_enqueue_rxbuf(sc, m);
1235         KASSERT(error == 0, ("cannot requeue discarded mbuf"));
1236 }
1237
1238 static int
1239 vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1240 {
1241         struct sglist sg;
1242         struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1243         struct vtnet_rx_header *rxhdr;
1244         struct virtio_net_hdr *hdr;
1245         uint8_t *mdata;
1246         int offset, error;
1247
1248         ASSERT_SERIALIZED(&sc->vtnet_slz);
1249         if ((sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0)
1250                 KASSERT(m->m_next == NULL, ("chained Rx mbuf"));
1251
1252         sglist_init(&sg, sc->vtnet_rx_nsegs, segs);
1253
1254         mdata = mtod(m, uint8_t *);
1255         offset = 0;
1256
1257         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1258                 rxhdr = (struct vtnet_rx_header *) mdata;
1259                 hdr = &rxhdr->vrh_hdr;
1260                 offset += sizeof(struct vtnet_rx_header);
1261
1262                 error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
1263                 KASSERT(error == 0, ("cannot add header to sglist"));
1264         }
1265
1266         error = sglist_append(&sg, mdata + offset, m->m_len - offset);
1267         if (error)
1268                 return (error);
1269
1270         if (m->m_next != NULL) {
1271                 error = sglist_append_mbuf(&sg, m->m_next);
1272                 if (error)
1273                         return (error);
1274         }
1275
1276         return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
1277 }
1278
1279 static void
1280 vtnet_vlan_tag_remove(struct mbuf *m)
1281 {
1282         struct ether_vlan_header *evl;
1283
1284         evl = mtod(m, struct ether_vlan_header *);
1285
1286         m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
1287         m->m_flags |= M_VLANTAG;
1288
1289         /* Strip the 802.1Q header. */
1290         bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
1291             ETHER_HDR_LEN - ETHER_TYPE_LEN);
1292         m_adj(m, ETHER_VLAN_ENCAP_LEN);
1293 }
1294
1295 /*
1296  * Alternative method of doing receive checksum offloading. Rather
1297  * than parsing the received frame down to the IP header, use the
1298  * csum_offset to determine which CSUM_* flags are appropriate. We
1299  * can get by with doing this only because the checksum offsets are
1300  * unique for the things we care about.
1301  */
1302 static int
1303 vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
1304     struct virtio_net_hdr *hdr)
1305 {
1306         struct ether_header *eh;
1307         struct ether_vlan_header *evh;
1308         struct udphdr *udp;
1309         int csum_len;
1310         uint16_t eth_type;
1311
1312         csum_len = hdr->csum_start + hdr->csum_offset;
1313
1314         if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
1315                 return (1);
1316         if (m->m_len < csum_len)
1317                 return (1);
1318
1319         eh = mtod(m, struct ether_header *);
1320         eth_type = ntohs(eh->ether_type);
1321         if (eth_type == ETHERTYPE_VLAN) {
1322                 evh = mtod(m, struct ether_vlan_header *);
1323                 eth_type = ntohs(evh->evl_proto);
1324         }
1325
1326         if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
1327                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1328                 return (1);
1329         }
1330
1331         /* Use the offset to determine the appropriate CSUM_* flags. */
1332         switch (hdr->csum_offset) {
1333         case offsetof(struct udphdr, uh_sum):
1334                 if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
1335                         return (1);
1336                 udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
1337                 if (udp->uh_sum == 0)
1338                         return (0);
1339
1340                 /* FALLTHROUGH */
1341
1342         case offsetof(struct tcphdr, th_sum):
1343                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1344                 m->m_pkthdr.csum_data = 0xFFFF;
1345                 break;
1346
1347         default:
1348                 sc->vtnet_stats.rx_csum_bad_offset++;
1349                 return (1);
1350         }
1351
1352         sc->vtnet_stats.rx_csum_offloaded++;
1353
1354         return (0);
1355 }
1356
1357 static int
1358 vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
1359 {
1360         struct ifnet *ifp;
1361         struct virtqueue *vq;
1362         struct mbuf *m, *m_tail;
1363         int len;
1364
1365         ifp = sc->vtnet_ifp;
1366         vq = sc->vtnet_rx_vq;
1367         m_tail = m_head;
1368
1369         while (--nbufs > 0) {
1370                 m = virtqueue_dequeue(vq, &len);
1371                 if (m == NULL) {
1372                         ifp->if_ierrors++;
1373                         goto fail;
1374                 }
1375
1376                 if (vtnet_newbuf(sc) != 0) {
1377                         ifp->if_iqdrops++;
1378                         vtnet_discard_rxbuf(sc, m);
1379                         if (nbufs > 1)
1380                                 vtnet_discard_merged_rxbuf(sc, nbufs);
1381                         goto fail;
1382                 }
1383
1384                 if (m->m_len < len)
1385                         len = m->m_len;
1386
1387                 m->m_len = len;
1388                 m->m_flags &= ~M_PKTHDR;
1389
1390                 m_head->m_pkthdr.len += len;
1391                 m_tail->m_next = m;
1392                 m_tail = m;
1393         }
1394
1395         return (0);
1396
1397 fail:
1398         sc->vtnet_stats.rx_mergeable_failed++;
1399         m_freem(m_head);
1400
1401         return (1);
1402 }
1403
1404 static int
1405 vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
1406 {
1407         struct virtio_net_hdr lhdr;
1408         struct ifnet *ifp;
1409         struct virtqueue *vq;
1410         struct mbuf *m;
1411         struct ether_header *eh;
1412         struct virtio_net_hdr *hdr;
1413         struct virtio_net_hdr_mrg_rxbuf *mhdr;
1414         int len, deq, nbufs, adjsz, rx_npkts;
1415
1416         ifp = sc->vtnet_ifp;
1417         vq = sc->vtnet_rx_vq;
1418         hdr = &lhdr;
1419         deq = 0;
1420         rx_npkts = 0;
1421
1422         ASSERT_SERIALIZED(&sc->vtnet_slz);
1423
1424         while (--count >= 0) {
1425                 m = virtqueue_dequeue(vq, &len);
1426                 if (m == NULL)
1427                         break;
1428                 deq++;
1429
1430                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1431                         ifp->if_ierrors++;
1432                         vtnet_discard_rxbuf(sc, m);
1433                         continue;
1434                 }
1435
1436                 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1437                         nbufs = 1;
1438                         adjsz = sizeof(struct vtnet_rx_header);
1439                         /*
1440                          * Account for our pad between the header and
1441                          * the actual start of the frame.
1442                          */
1443                         len += VTNET_RX_HEADER_PAD;
1444                 } else {
1445                         mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1446                         nbufs = mhdr->num_buffers;
1447                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1448                 }
1449
1450                 if (vtnet_replace_rxbuf(sc, m, len) != 0) {
1451                         ifp->if_iqdrops++;
1452                         vtnet_discard_rxbuf(sc, m);
1453                         if (nbufs > 1)
1454                                 vtnet_discard_merged_rxbuf(sc, nbufs);
1455                         continue;
1456                 }
1457
1458                 m->m_pkthdr.len = len;
1459                 m->m_pkthdr.rcvif = ifp;
1460                 m->m_pkthdr.csum_flags = 0;
1461
1462                 if (nbufs > 1) {
1463                         if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
1464                                 continue;
1465                 }
1466
1467                 ifp->if_ipackets++;
1468
1469                 /*
1470                  * Save copy of header before we strip it. For both mergeable
1471                  * and non-mergeable, the VirtIO header is placed first in the
1472                  * mbuf's data. We no longer need num_buffers, so always use a
1473                  * virtio_net_hdr.
1474                  */
1475                 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1476                 m_adj(m, adjsz);
1477
1478                 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1479                         eh = mtod(m, struct ether_header *);
1480                         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1481                                 vtnet_vlan_tag_remove(m);
1482
1483                                 /*
1484                                  * With the 802.1Q header removed, update the
1485                                  * checksum starting location accordingly.
1486                                  */
1487                                 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1488                                         hdr->csum_start -=
1489                                             ETHER_VLAN_ENCAP_LEN;
1490                         }
1491                 }
1492
1493                 if (ifp->if_capenable & IFCAP_RXCSUM &&
1494                     hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1495                         if (vtnet_rx_csum(sc, m, hdr) != 0)
1496                                 sc->vtnet_stats.rx_csum_failed++;
1497                 }
1498
1499                 lwkt_serialize_exit(&sc->vtnet_slz);
1500                 rx_npkts++;
1501                 ifp->if_input(ifp, m, NULL, -1);
1502                 lwkt_serialize_enter(&sc->vtnet_slz);
1503
1504                 /*
1505                  * The interface may have been stopped while we were
1506                  * passing the packet up the network stack.
1507                  */
1508                 if ((ifp->if_flags & IFF_RUNNING) == 0)
1509                         break;
1510         }
1511
1512         virtqueue_notify(vq, &sc->vtnet_slz);
1513
1514         if (rx_npktsp != NULL)
1515                 *rx_npktsp = rx_npkts;
1516
1517         return (count > 0 ? 0 : EAGAIN);
1518 }
1519
1520 static void
1521 vtnet_rx_intr_task(void *arg)
1522 {
1523         struct vtnet_softc *sc;
1524         struct ifnet *ifp;
1525         int more;
1526
1527         sc = arg;
1528         ifp = sc->vtnet_ifp;
1529
1530 next:
1531 //      lwkt_serialize_enter(&sc->vtnet_slz);
1532
1533         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1534                 vtnet_enable_rx_intr(sc);
1535 //              lwkt_serialize_exit(&sc->vtnet_slz);
1536                 return;
1537         }
1538
1539         more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
1540         if (!more && vtnet_enable_rx_intr(sc) != 0) {
1541                 vtnet_disable_rx_intr(sc);
1542                 more = 1;
1543         }
1544
1545 //      lwkt_serialize_exit(&sc->vtnet_slz);
1546
1547         if (more) {
1548                 sc->vtnet_stats.rx_task_rescheduled++;
1549                 goto next;
1550         }
1551 }
1552
1553 static int
1554 vtnet_rx_vq_intr(void *xsc)
1555 {
1556         struct vtnet_softc *sc;
1557
1558         sc = xsc;
1559
1560         vtnet_disable_rx_intr(sc);
1561         vtnet_rx_intr_task(sc);
1562
1563         return (1);
1564 }
1565
1566 static void
1567 vtnet_enqueue_txhdr(struct vtnet_softc *sc, struct vtnet_tx_header *txhdr)
1568 {
1569         bzero(txhdr, sizeof(*txhdr));
1570         SLIST_INSERT_HEAD(&sc->vtnet_txhdr_free, txhdr, link);
1571 }
1572
1573 static void
1574 vtnet_txeof(struct vtnet_softc *sc)
1575 {
1576         struct virtqueue *vq;
1577         struct ifnet *ifp;
1578         struct vtnet_tx_header *txhdr;
1579         int deq;
1580
1581         vq = sc->vtnet_tx_vq;
1582         ifp = sc->vtnet_ifp;
1583         deq = 0;
1584
1585         ASSERT_SERIALIZED(&sc->vtnet_slz);
1586
1587         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
1588                 deq++;
1589                 ifp->if_opackets++;
1590                 m_freem(txhdr->vth_mbuf);
1591                 vtnet_enqueue_txhdr(sc, txhdr);
1592         }
1593
1594         if (deq > 0) {
1595                 ifq_clr_oactive(&ifp->if_snd);
1596                 if (virtqueue_empty(vq))
1597                         sc->vtnet_watchdog_timer = 0;
1598         }
1599 }
1600
1601 static struct mbuf *
1602 vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
1603     struct virtio_net_hdr *hdr)
1604 {
1605         struct ifnet *ifp;
1606         struct ether_header *eh;
1607         struct ether_vlan_header *evh;
1608         struct ip *ip;
1609         struct ip6_hdr *ip6;
1610         struct tcphdr *tcp;
1611         int ip_offset;
1612         uint16_t eth_type, csum_start;
1613         uint8_t ip_proto, gso_type;
1614
1615         ifp = sc->vtnet_ifp;
1616         M_ASSERTPKTHDR(m);
1617
1618         ip_offset = sizeof(struct ether_header);
1619         if (m->m_len < ip_offset) {
1620                 if ((m = m_pullup(m, ip_offset)) == NULL)
1621                         return (NULL);
1622         }
1623
1624         eh = mtod(m, struct ether_header *);
1625         eth_type = ntohs(eh->ether_type);
1626         if (eth_type == ETHERTYPE_VLAN) {
1627                 ip_offset = sizeof(struct ether_vlan_header);
1628                 if (m->m_len < ip_offset) {
1629                         if ((m = m_pullup(m, ip_offset)) == NULL)
1630                                 return (NULL);
1631                 }
1632                 evh = mtod(m, struct ether_vlan_header *);
1633                 eth_type = ntohs(evh->evl_proto);
1634         }
1635
1636         switch (eth_type) {
1637         case ETHERTYPE_IP:
1638                 if (m->m_len < ip_offset + sizeof(struct ip)) {
1639                         m = m_pullup(m, ip_offset + sizeof(struct ip));
1640                         if (m == NULL)
1641                                 return (NULL);
1642                 }
1643
1644                 ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
1645                 ip_proto = ip->ip_p;
1646                 csum_start = ip_offset + (ip->ip_hl << 2);
1647                 gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1648                 break;
1649
1650         case ETHERTYPE_IPV6:
1651                 if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
1652                         m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
1653                         if (m == NULL)
1654                                 return (NULL);
1655                 }
1656
1657                 ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
1658                 /*
1659                  * XXX Assume no extension headers are present. Presently,
1660                  * this will always be true in the case of TSO, and FreeBSD
1661                  * does not perform checksum offloading of IPv6 yet.
1662                  */
1663                 ip_proto = ip6->ip6_nxt;
1664                 csum_start = ip_offset + sizeof(struct ip6_hdr);
1665                 gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1666                 break;
1667
1668         default:
1669                 return (m);
1670         }
1671
1672         if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
1673                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
1674                 hdr->csum_start = csum_start;
1675                 hdr->csum_offset = m->m_pkthdr.csum_data;
1676
1677                 sc->vtnet_stats.tx_csum_offloaded++;
1678         }
1679
1680         if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1681                 if (ip_proto != IPPROTO_TCP)
1682                         return (m);
1683
1684                 if (m->m_len < csum_start + sizeof(struct tcphdr)) {
1685                         m = m_pullup(m, csum_start + sizeof(struct tcphdr));
1686                         if (m == NULL)
1687                                 return (NULL);
1688                 }
1689
1690                 tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
1691                 hdr->gso_type = gso_type;
1692                 hdr->hdr_len = csum_start + (tcp->th_off << 2);
1693                 hdr->gso_size = m->m_pkthdr.tso_segsz;
1694
1695                 if (tcp->th_flags & TH_CWR) {
1696                         /*
1697                          * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
1698                          * ECN support is only configurable globally with the
1699                          * net.inet.tcp.ecn.enable sysctl knob.
1700                          */
1701                         if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1702                                 if_printf(ifp, "TSO with ECN not supported "
1703                                     "by host\n");
1704                                 m_freem(m);
1705                                 return (NULL);
1706                         }
1707
1708                         hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1709                 }
1710
1711                 sc->vtnet_stats.tx_tso_offloaded++;
1712         }
1713
1714         return (m);
1715 }
1716
1717 static int
1718 vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
1719     struct vtnet_tx_header *txhdr)
1720 {
1721         struct sglist sg;
1722         struct sglist_seg segs[VTNET_MAX_TX_SEGS];
1723         struct virtqueue *vq;
1724         struct mbuf *m;
1725         int error;
1726
1727         vq = sc->vtnet_tx_vq;
1728         m = *m_head;
1729
1730         sglist_init(&sg, sc->vtnet_tx_nsegs, segs);
1731         error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
1732         KASSERT(error == 0 && sg.sg_nseg == 1,
1733             ("%s: error %d adding header to sglist", __func__, error));
1734
1735         error = sglist_append_mbuf(&sg, m);
1736         if (error) {
1737                 m = m_defrag(m, M_NOWAIT);
1738                 if (m == NULL)
1739                         goto fail;
1740
1741                 *m_head = m;
1742                 sc->vtnet_stats.tx_defragged++;
1743
1744                 error = sglist_append_mbuf(&sg, m);
1745                 if (error)
1746                         goto fail;
1747         }
1748
1749         txhdr->vth_mbuf = m;
1750         error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
1751
1752         return (error);
1753
1754 fail:
1755         sc->vtnet_stats.tx_defrag_failed++;
1756         m_freem(*m_head);
1757         *m_head = NULL;
1758
1759         return (ENOBUFS);
1760 }
1761
1762 static struct mbuf *
1763 vtnet_vlan_tag_insert(struct mbuf *m)
1764 {
1765         struct mbuf *n;
1766         struct ether_vlan_header *evl;
1767
1768         if (M_WRITABLE(m) == 0) {
1769                 n = m_dup(m, M_NOWAIT);
1770                 m_freem(m);
1771                 if ((m = n) == NULL)
1772                         return (NULL);
1773         }
1774
1775         M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1776         if (m == NULL)
1777                 return (NULL);
1778         if (m->m_len < sizeof(struct ether_vlan_header)) {
1779                 m = m_pullup(m, sizeof(struct ether_vlan_header));
1780                 if (m == NULL)
1781                         return (NULL);
1782         }
1783
1784         /* Insert 802.1Q header into the existing Ethernet header. */
1785         evl = mtod(m, struct ether_vlan_header *);
1786         bcopy((char *) evl + ETHER_VLAN_ENCAP_LEN,
1787               (char *) evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1788         evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1789         evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1790         m->m_flags &= ~M_VLANTAG;
1791
1792         return (m);
1793 }
1794
1795 static int
1796 vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
1797 {
1798         struct vtnet_tx_header *txhdr;
1799         struct virtio_net_hdr *hdr;
1800         struct mbuf *m;
1801         int error;
1802
1803         txhdr = SLIST_FIRST(&sc->vtnet_txhdr_free);
1804         if (txhdr == NULL)
1805                 return (ENOBUFS);
1806         SLIST_REMOVE_HEAD(&sc->vtnet_txhdr_free, link);
1807
1808         /*
1809          * Always use the non-mergeable header to simplify things. When
1810          * the mergeable feature is negotiated, the num_buffers field
1811          * must be set to zero. We use vtnet_hdr_size later to enqueue
1812          * the correct header size to the host.
1813          */
1814         hdr = &txhdr->vth_uhdr.hdr;
1815         m = *m_head;
1816
1817         error = ENOBUFS;
1818
1819         if (m->m_flags & M_VLANTAG) {
1820                 //m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
1821                 m = vtnet_vlan_tag_insert(m);
1822                 if ((*m_head = m) == NULL)
1823                         goto fail;
1824                 m->m_flags &= ~M_VLANTAG;
1825         }
1826
1827         if (m->m_pkthdr.csum_flags != 0) {
1828                 m = vtnet_tx_offload(sc, m, hdr);
1829                 if ((*m_head = m) == NULL)
1830                         goto fail;
1831         }
1832
1833         error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
1834 fail:
1835         if (error != 0)
1836                 vtnet_enqueue_txhdr(sc, txhdr);
1837         return (error);
1838 }
1839
1840 static void
1841 vtnet_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1842 {
1843         struct vtnet_softc *sc;
1844
1845         sc = ifp->if_softc;
1846
1847         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1848         lwkt_serialize_enter(&sc->vtnet_slz);
1849         vtnet_start_locked(ifp, ifsq);
1850         lwkt_serialize_exit(&sc->vtnet_slz);
1851 }
1852
1853 static void
1854 vtnet_start_locked(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1855 {
1856         struct vtnet_softc *sc;
1857         struct virtqueue *vq;
1858         struct mbuf *m0;
1859         int enq;
1860
1861         sc = ifp->if_softc;
1862         vq = sc->vtnet_tx_vq;
1863         enq = 0;
1864
1865         ASSERT_SERIALIZED(&sc->vtnet_slz);
1866
1867         if ((ifp->if_flags & (IFF_RUNNING)) !=
1868             IFF_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
1869                 return;
1870
1871 #ifdef VTNET_TX_INTR_MODERATION
1872         if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
1873                 vtnet_txeof(sc);
1874 #endif
1875
1876         while (!ifsq_is_empty(ifsq)) {
1877                 if (virtqueue_full(vq)) {
1878                         ifq_set_oactive(&ifp->if_snd);
1879                         break;
1880                 }
1881
1882                 m0 = ifq_dequeue(&ifp->if_snd);
1883                 if (m0 == NULL)
1884                         break;
1885
1886                 if (vtnet_encap(sc, &m0) != 0) {
1887                         if (m0 == NULL)
1888                                 break;
1889                         ifq_prepend(&ifp->if_snd, m0);
1890                         ifq_set_oactive(&ifp->if_snd);
1891                         break;
1892                 }
1893
1894                 enq++;
1895                 ETHER_BPF_MTAP(ifp, m0);
1896         }
1897
1898         if (enq > 0) {
1899                 virtqueue_notify(vq, &sc->vtnet_slz);
1900                 sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
1901         }
1902 }
1903
1904 static void
1905 vtnet_tick(void *xsc)
1906 {
1907         struct vtnet_softc *sc;
1908
1909         sc = xsc;
1910
1911 #if 0
1912         ASSERT_SERIALIZED(&sc->vtnet_slz);
1913 #ifdef VTNET_DEBUG
1914         virtqueue_dump(sc->vtnet_rx_vq);
1915         virtqueue_dump(sc->vtnet_tx_vq);
1916 #endif
1917
1918         vtnet_watchdog(sc);
1919         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
1920 #endif
1921 }
1922
1923 static void
1924 vtnet_tx_intr_task(void *arg)
1925 {
1926         struct vtnet_softc *sc;
1927         struct ifnet *ifp;
1928         struct ifaltq_subque *ifsq;
1929
1930         sc = arg;
1931         ifp = sc->vtnet_ifp;
1932         ifsq = ifq_get_subq_default(&ifp->if_snd);
1933
1934 next:
1935 //      lwkt_serialize_enter(&sc->vtnet_slz);
1936
1937         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1938                 vtnet_enable_tx_intr(sc);
1939 //              lwkt_serialize_exit(&sc->vtnet_slz);
1940                 return;
1941         }
1942
1943         vtnet_txeof(sc);
1944
1945         if (!ifsq_is_empty(ifsq))
1946                 vtnet_start_locked(ifp, ifsq);
1947
1948         if (vtnet_enable_tx_intr(sc) != 0) {
1949                 vtnet_disable_tx_intr(sc);
1950                 sc->vtnet_stats.tx_task_rescheduled++;
1951 //              lwkt_serialize_exit(&sc->vtnet_slz);
1952                 goto next;
1953         }
1954
1955 //      lwkt_serialize_exit(&sc->vtnet_slz);
1956 }
1957
1958 static int
1959 vtnet_tx_vq_intr(void *xsc)
1960 {
1961         struct vtnet_softc *sc;
1962
1963         sc = xsc;
1964
1965         vtnet_disable_tx_intr(sc);
1966         vtnet_tx_intr_task(sc);
1967
1968         return (1);
1969 }
1970
1971 static void
1972 vtnet_stop(struct vtnet_softc *sc)
1973 {
1974         device_t dev;
1975         struct ifnet *ifp;
1976
1977         dev = sc->vtnet_dev;
1978         ifp = sc->vtnet_ifp;
1979
1980         ASSERT_SERIALIZED(&sc->vtnet_slz);
1981
1982         sc->vtnet_watchdog_timer = 0;
1983         callout_stop(&sc->vtnet_tick_ch);
1984         ifq_clr_oactive(&ifp->if_snd);
1985         ifp->if_flags &= ~(IFF_RUNNING);
1986
1987         vtnet_disable_rx_intr(sc);
1988         vtnet_disable_tx_intr(sc);
1989
1990         /*
1991          * Stop the host VirtIO adapter. Note this will reset the host
1992          * adapter's state back to the pre-initialized state, so in
1993          * order to make the device usable again, we must drive it
1994          * through virtio_reinit() and virtio_reinit_complete().
1995          */
1996         virtio_stop(dev);
1997
1998         sc->vtnet_flags &= ~VTNET_FLAG_LINK;
1999
2000         vtnet_free_rx_mbufs(sc);
2001         vtnet_free_tx_mbufs(sc);
2002 }
2003
2004 static int
2005 vtnet_virtio_reinit(struct vtnet_softc *sc)
2006 {
2007         device_t dev;
2008         struct ifnet *ifp;
2009         uint64_t features;
2010         int error;
2011
2012         dev = sc->vtnet_dev;
2013         ifp = sc->vtnet_ifp;
2014         features = sc->vtnet_features;
2015
2016         /*
2017          * Re-negotiate with the host, removing any disabled receive
2018          * features. Transmit features are disabled only on our side
2019          * via if_capenable and if_hwassist.
2020          */
2021
2022         if (ifp->if_capabilities & IFCAP_RXCSUM) {
2023                 if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
2024                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
2025         }
2026
2027 #if 0   /* IFCAP_LRO doesn't exist in DragonFly. */
2028         if (ifp->if_capabilities & IFCAP_LRO) {
2029                 if ((ifp->if_capenable & IFCAP_LRO) == 0)
2030                         features &= ~VTNET_LRO_FEATURES;
2031         }
2032 #endif
2033
2034         if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2035                 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2036                         features &= ~VIRTIO_NET_F_CTRL_VLAN;
2037         }
2038
2039         error = virtio_reinit(dev, features);
2040         if (error)
2041                 device_printf(dev, "virtio reinit error %d\n", error);
2042
2043         return (error);
2044 }
2045
2046 static void
2047 vtnet_init_locked(struct vtnet_softc *sc)
2048 {
2049         device_t dev;
2050         struct ifnet *ifp;
2051         int error;
2052
2053         dev = sc->vtnet_dev;
2054         ifp = sc->vtnet_ifp;
2055
2056         ASSERT_SERIALIZED(&sc->vtnet_slz);
2057
2058         if (ifp->if_flags & IFF_RUNNING)
2059                 return;
2060
2061         /* Stop host's adapter, cancel any pending I/O. */
2062         vtnet_stop(sc);
2063
2064         /* Reinitialize the host device. */
2065         error = vtnet_virtio_reinit(sc);
2066         if (error) {
2067                 device_printf(dev,
2068                     "reinitialization failed, stopping device...\n");
2069                 vtnet_stop(sc);
2070                 return;
2071         }
2072
2073         /* Update host with assigned MAC address. */
2074         bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2075         vtnet_set_hwaddr(sc);
2076
2077         ifp->if_hwassist = 0;
2078         if (ifp->if_capenable & IFCAP_TXCSUM)
2079                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2080         if (ifp->if_capenable & IFCAP_TSO4)
2081                 ifp->if_hwassist |= CSUM_TSO;
2082
2083         error = vtnet_init_rx_vq(sc);
2084         if (error) {
2085                 device_printf(dev,
2086                     "cannot allocate mbufs for Rx virtqueue\n");
2087                 vtnet_stop(sc);
2088                 return;
2089         }
2090
2091         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
2092                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2093                         /* Restore promiscuous and all-multicast modes. */
2094                         vtnet_rx_filter(sc);
2095
2096                         /* Restore filtered MAC addresses. */
2097                         vtnet_rx_filter_mac(sc);
2098                 }
2099
2100                 /* Restore VLAN filters. */
2101                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2102                         vtnet_rx_filter_vlan(sc);
2103         }
2104
2105         {
2106                 vtnet_enable_rx_intr(sc);
2107                 vtnet_enable_tx_intr(sc);
2108         }
2109
2110         ifp->if_flags |= IFF_RUNNING;
2111         ifq_clr_oactive(&ifp->if_snd);
2112
2113         virtio_reinit_complete(dev);
2114
2115         vtnet_update_link_status(sc);
2116         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2117 }
2118
2119 static void
2120 vtnet_init(void *xsc)
2121 {
2122         struct vtnet_softc *sc;
2123
2124         sc = xsc;
2125
2126         lwkt_serialize_enter(&sc->vtnet_slz);
2127         vtnet_init_locked(sc);
2128         lwkt_serialize_exit(&sc->vtnet_slz);
2129 }
2130
2131 static void
2132 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
2133     struct sglist *sg, int readable, int writable)
2134 {
2135         struct virtqueue *vq;
2136         void *c;
2137
2138         vq = sc->vtnet_ctrl_vq;
2139
2140         ASSERT_SERIALIZED(&sc->vtnet_slz);
2141         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
2142             ("no control virtqueue"));
2143         KASSERT(virtqueue_empty(vq),
2144             ("control command already enqueued"));
2145
2146         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
2147                 return;
2148
2149         virtqueue_notify(vq, &sc->vtnet_slz);
2150
2151         /*
2152          * Poll until the command is complete. Previously, we would
2153          * sleep until the control virtqueue interrupt handler woke
2154          * us up, but dropping the VTNET_MTX leads to serialization
2155          * difficulties.
2156          *
2157          * Furthermore, it appears QEMU/KVM only allocates three MSIX
2158          * vectors. Two of those vectors are needed for the Rx and Tx
2159          * virtqueues. We do not support sharing both a Vq and config
2160          * changed notification on the same MSIX vector.
2161          */
2162         c = virtqueue_poll(vq, NULL);
2163         KASSERT(c == cookie, ("unexpected control command response"));
2164 }
2165
2166 static int
2167 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
2168 {
2169         struct {
2170                 struct virtio_net_ctrl_hdr hdr __aligned(2);
2171                 uint8_t pad1;
2172                 char aligned_hwaddr[ETHER_ADDR_LEN] __aligned(8);
2173                 uint8_t pad2;
2174                 uint8_t ack;
2175         } s;
2176         struct sglist_seg segs[3];
2177         struct sglist sg;
2178         int error;
2179
2180         s.hdr.class = VIRTIO_NET_CTRL_MAC;
2181         s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
2182         s.ack = VIRTIO_NET_ERR;
2183
2184         /* Copy the mac address into physically contiguous memory */
2185         memcpy(s.aligned_hwaddr, hwaddr, ETHER_ADDR_LEN);
2186
2187         sglist_init(&sg, 3, segs);
2188         error = 0;
2189         error |= sglist_append(&sg, &s.hdr,
2190             sizeof(struct virtio_net_ctrl_hdr));
2191         error |= sglist_append(&sg, s.aligned_hwaddr, ETHER_ADDR_LEN);
2192         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2193         KASSERT(error == 0 && sg.sg_nseg == 3,
2194             ("%s: error %d adding set MAC msg to sglist", __func__, error));
2195
2196         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2197
2198         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2199 }
2200
2201 static void
2202 vtnet_rx_filter(struct vtnet_softc *sc)
2203 {
2204         device_t dev;
2205         struct ifnet *ifp;
2206
2207         dev = sc->vtnet_dev;
2208         ifp = sc->vtnet_ifp;
2209
2210         ASSERT_SERIALIZED(&sc->vtnet_slz);
2211         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2212             ("CTRL_RX feature not negotiated"));
2213
2214         if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
2215                 device_printf(dev, "cannot %s promiscuous mode\n",
2216                     (ifp->if_flags & IFF_PROMISC) ? "enable" : "disable");
2217
2218         if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
2219                 device_printf(dev, "cannot %s all-multicast mode\n",
2220                     (ifp->if_flags & IFF_ALLMULTI) ? "enable" : "disable");
2221 }
2222
2223 static int
2224 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
2225 {
2226         struct sglist_seg segs[3];
2227         struct sglist sg;
2228         struct {
2229                 struct virtio_net_ctrl_hdr hdr __aligned(2);
2230                 uint8_t pad1;
2231                 uint8_t onoff;
2232                 uint8_t pad2;
2233                 uint8_t ack;
2234         } s;
2235         int error;
2236
2237         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2238             ("%s: CTRL_RX feature not negotiated", __func__));
2239
2240         s.hdr.class = VIRTIO_NET_CTRL_RX;
2241         s.hdr.cmd = cmd;
2242         s.onoff = !!on;
2243         s.ack = VIRTIO_NET_ERR;
2244
2245         sglist_init(&sg, 3, segs);
2246         error = 0;
2247         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2248         error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
2249         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2250         KASSERT(error == 0 && sg.sg_nseg == 3,
2251             ("%s: error %d adding Rx message to sglist", __func__, error));
2252
2253         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2254
2255         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2256 }
2257
2258 static int
2259 vtnet_set_promisc(struct vtnet_softc *sc, int on)
2260 {
2261
2262         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
2263 }
2264
2265 static int
2266 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
2267 {
2268
2269         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
2270 }
2271
2272 static void
2273 vtnet_rx_filter_mac(struct vtnet_softc *sc)
2274 {
2275         struct virtio_net_ctrl_hdr hdr __aligned(2);
2276         struct vtnet_mac_filter *filter;
2277         struct sglist_seg segs[4];
2278         struct sglist sg;
2279         struct ifnet *ifp;
2280         struct ifaddr *ifa;
2281         struct ifaddr_container *ifac;
2282         struct ifmultiaddr *ifma;
2283         int ucnt, mcnt, promisc, allmulti, error;
2284         uint8_t ack;
2285
2286         ifp = sc->vtnet_ifp;
2287         ucnt = 0;
2288         mcnt = 0;
2289         promisc = 0;
2290         allmulti = 0;
2291
2292         ASSERT_SERIALIZED(&sc->vtnet_slz);
2293         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2294             ("%s: CTRL_RX feature not negotiated", __func__));
2295
2296         /* Use the MAC filtering table allocated in vtnet_attach. */
2297         filter = sc->vtnet_macfilter;
2298         memset(filter, 0, sizeof(struct vtnet_mac_filter));
2299
2300         /* Unicast MAC addresses: */
2301         //if_addr_rlock(ifp);
2302         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2303                 ifa = ifac->ifa;
2304                 if (ifa->ifa_addr->sa_family != AF_LINK)
2305                         continue;
2306                 else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2307                     sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
2308                         continue;
2309                 else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
2310                         promisc = 1;
2311                         break;
2312                 }
2313
2314                 bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2315                     &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
2316                 ucnt++;
2317         }
2318         //if_addr_runlock(ifp);
2319
2320         if (promisc != 0) {
2321                 filter->vmf_unicast.nentries = 0;
2322                 if_printf(ifp, "more than %d MAC addresses assigned, "
2323                     "falling back to promiscuous mode\n",
2324                     VTNET_MAX_MAC_ENTRIES);
2325         } else
2326                 filter->vmf_unicast.nentries = ucnt;
2327
2328         /* Multicast MAC addresses: */
2329         //if_maddr_rlock(ifp);
2330         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2331                 if (ifma->ifma_addr->sa_family != AF_LINK)
2332                         continue;
2333                 else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
2334                         allmulti = 1;
2335                         break;
2336                 }
2337
2338                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2339                     &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
2340                 mcnt++;
2341         }
2342         //if_maddr_runlock(ifp);
2343
2344         if (allmulti != 0) {
2345                 filter->vmf_multicast.nentries = 0;
2346                 if_printf(ifp, "more than %d multicast MAC addresses "
2347                     "assigned, falling back to all-multicast mode\n",
2348                     VTNET_MAX_MAC_ENTRIES);
2349         } else
2350                 filter->vmf_multicast.nentries = mcnt;
2351
2352         if (promisc != 0 && allmulti != 0)
2353                 goto out;
2354
2355         hdr.class = VIRTIO_NET_CTRL_MAC;
2356         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
2357         ack = VIRTIO_NET_ERR;
2358
2359         sglist_init(&sg, 4, segs);
2360         error = 0;
2361         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2362         error |= sglist_append(&sg, &filter->vmf_unicast,
2363             sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
2364         error |= sglist_append(&sg, &filter->vmf_multicast,
2365             sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
2366         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2367         KASSERT(error == 0 && sg.sg_nseg == 4,
2368             ("%s: error %d adding MAC filter msg to sglist", __func__, error));
2369
2370         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2371
2372         if (ack != VIRTIO_NET_OK)
2373                 if_printf(ifp, "error setting host MAC filter table\n");
2374
2375 out:
2376         if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
2377                 if_printf(ifp, "cannot enable promiscuous mode\n");
2378         if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
2379                 if_printf(ifp, "cannot enable all-multicast mode\n");
2380 }
2381
2382 static int
2383 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2384 {
2385         struct sglist_seg segs[3];
2386         struct sglist sg;
2387         struct {
2388                 struct virtio_net_ctrl_hdr hdr __aligned(2);
2389                 uint8_t pad1;
2390                 uint16_t tag;
2391                 uint8_t pad2;
2392                 uint8_t ack;
2393         } s;
2394         int error;
2395
2396         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
2397         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
2398         s.tag = tag;
2399         s.ack = VIRTIO_NET_ERR;
2400
2401         sglist_init(&sg, 3, segs);
2402         error = 0;
2403         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2404         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
2405         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2406         KASSERT(error == 0 && sg.sg_nseg == 3,
2407             ("%s: error %d adding VLAN message to sglist", __func__, error));
2408
2409         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2410
2411         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2412 }
2413
2414 static void
2415 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
2416 {
2417         uint32_t w;
2418         uint16_t tag;
2419         int i, bit, nvlans;
2420
2421         ASSERT_SERIALIZED(&sc->vtnet_slz);
2422         KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2423             ("%s: VLAN_FILTER feature not negotiated", __func__));
2424
2425         nvlans = sc->vtnet_nvlans;
2426
2427         /* Enable the filter for each configured VLAN. */
2428         for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
2429                 w = sc->vtnet_vlan_shadow[i];
2430                 while ((bit = ffs(w) - 1) != -1) {
2431                         w &= ~(1 << bit);
2432                         tag = sizeof(w) * CHAR_BIT * i + bit;
2433                         nvlans--;
2434
2435                         if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
2436                                 device_printf(sc->vtnet_dev,
2437                                     "cannot enable VLAN %d filter\n", tag);
2438                         }
2439                 }
2440         }
2441
2442         KASSERT(nvlans == 0, ("VLAN count incorrect"));
2443 }
2444
2445 static void
2446 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2447 {
2448         struct ifnet *ifp;
2449         int idx, bit;
2450
2451         ifp = sc->vtnet_ifp;
2452         idx = (tag >> 5) & 0x7F;
2453         bit = tag & 0x1F;
2454
2455         if (tag == 0 || tag > 4095)
2456                 return;
2457
2458         lwkt_serialize_enter(&sc->vtnet_slz);
2459
2460         /* Update shadow VLAN table. */
2461         if (add) {
2462                 sc->vtnet_nvlans++;
2463                 sc->vtnet_vlan_shadow[idx] |= (1 << bit);
2464         } else {
2465                 sc->vtnet_nvlans--;
2466                 sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
2467         }
2468
2469         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
2470             vtnet_exec_vlan_filter(sc, add, tag) != 0) {
2471                 device_printf(sc->vtnet_dev,
2472                     "cannot %s VLAN %d %s the host filter table\n",
2473                     add ? "add" : "remove", tag, add ? "to" : "from");
2474         }
2475
2476         lwkt_serialize_exit(&sc->vtnet_slz);
2477 }
2478
2479 static void
2480 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2481 {
2482
2483         if (ifp->if_softc != arg)
2484                 return;
2485
2486         vtnet_update_vlan_filter(arg, 1, tag);
2487 }
2488
2489 static void
2490 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2491 {
2492
2493         if (ifp->if_softc != arg)
2494                 return;
2495
2496         vtnet_update_vlan_filter(arg, 0, tag);
2497 }
2498
2499 static int
2500 vtnet_ifmedia_upd(struct ifnet *ifp)
2501 {
2502         struct vtnet_softc *sc;
2503         struct ifmedia *ifm;
2504
2505         sc = ifp->if_softc;
2506         ifm = &sc->vtnet_media;
2507
2508         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2509                 return (EINVAL);
2510
2511         return (0);
2512 }
2513
2514 static void
2515 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2516 {
2517         struct vtnet_softc *sc;
2518
2519         sc = ifp->if_softc;
2520
2521         ifmr->ifm_status = IFM_AVALID;
2522         ifmr->ifm_active = IFM_ETHER;
2523
2524         lwkt_serialize_enter(&sc->vtnet_slz);
2525         if (vtnet_is_link_up(sc) != 0) {
2526                 ifmr->ifm_status |= IFM_ACTIVE;
2527                 ifmr->ifm_active |= VTNET_MEDIATYPE;
2528         } else
2529                 ifmr->ifm_active |= IFM_NONE;
2530         lwkt_serialize_exit(&sc->vtnet_slz);
2531 }
2532
2533 static void
2534 vtnet_add_statistics(struct vtnet_softc *sc)
2535 {
2536         device_t dev;
2537         struct vtnet_statistics *stats;
2538         struct sysctl_ctx_list *ctx;
2539         struct sysctl_oid *tree;
2540         struct sysctl_oid_list *child;
2541
2542         dev = sc->vtnet_dev;
2543         stats = &sc->vtnet_stats;
2544         ctx = device_get_sysctl_ctx(dev);
2545         tree = device_get_sysctl_tree(dev);
2546         child = SYSCTL_CHILDREN(tree);
2547
2548         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
2549             CTLFLAG_RD, &stats->mbuf_alloc_failed, 0,
2550             "Mbuf cluster allocation failures");
2551
2552         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
2553             CTLFLAG_RD, &stats->rx_frame_too_large, 0,
2554             "Received frame larger than the mbuf chain");
2555         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
2556             CTLFLAG_RD, &stats->rx_enq_replacement_failed, 0,
2557             "Enqueuing the replacement receive mbuf failed");
2558         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
2559             CTLFLAG_RD, &stats->rx_mergeable_failed, 0,
2560             "Mergeable buffers receive failures");
2561         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
2562             CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 0,
2563             "Received checksum offloaded buffer with unsupported "
2564             "Ethernet type");
2565         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
2566             CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 0,
2567             "Received checksum offloaded buffer with incorrect IP protocol");
2568         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
2569             CTLFLAG_RD, &stats->rx_csum_bad_offset, 0,
2570             "Received checksum offloaded buffer with incorrect offset");
2571         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
2572             CTLFLAG_RD, &stats->rx_csum_failed, 0,
2573             "Received buffer checksum offload failed");
2574         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
2575             CTLFLAG_RD, &stats->rx_csum_offloaded, 0,
2576             "Received buffer checksum offload succeeded");
2577         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
2578             CTLFLAG_RD, &stats->rx_task_rescheduled, 0,
2579             "Times the receive interrupt task rescheduled itself");
2580
2581         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
2582             CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 0,
2583             "Aborted transmit of checksum offloaded buffer with unknown "
2584             "Ethernet type");
2585         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
2586             CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 0,
2587             "Aborted transmit of TSO buffer with unknown Ethernet type");
2588         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
2589             CTLFLAG_RD, &stats->tx_defragged, 0,
2590             "Transmit mbufs defragged");
2591         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
2592             CTLFLAG_RD, &stats->tx_defrag_failed, 0,
2593             "Aborted transmit of buffer because defrag failed");
2594         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
2595             CTLFLAG_RD, &stats->tx_csum_offloaded, 0,
2596             "Offloaded checksum of transmitted buffer");
2597         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
2598             CTLFLAG_RD, &stats->tx_tso_offloaded, 0,
2599             "Segmentation offload of transmitted buffer");
2600         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
2601             CTLFLAG_RD, &stats->tx_task_rescheduled, 0,
2602             "Times the transmit interrupt task rescheduled itself");
2603 }
2604
2605 static int
2606 vtnet_enable_rx_intr(struct vtnet_softc *sc)
2607 {
2608
2609         return (virtqueue_enable_intr(sc->vtnet_rx_vq));
2610 }
2611
2612 static void
2613 vtnet_disable_rx_intr(struct vtnet_softc *sc)
2614 {
2615
2616         virtqueue_disable_intr(sc->vtnet_rx_vq);
2617 }
2618
2619 static int
2620 vtnet_enable_tx_intr(struct vtnet_softc *sc)
2621 {
2622
2623 #ifdef VTNET_TX_INTR_MODERATION
2624         return (0);
2625 #else
2626         return (virtqueue_enable_intr(sc->vtnet_tx_vq));
2627 #endif
2628 }
2629
2630 static void
2631 vtnet_disable_tx_intr(struct vtnet_softc *sc)
2632 {
2633
2634         virtqueue_disable_intr(sc->vtnet_tx_vq);
2635 }