if_vtnet, sync with FreeBSD 2/x: Sync vtnet_*_filter functions.
[dragonfly.git] / sys / dev / virtual / virtio / net / if_vtnet.c
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* Driver for VirtIO network devices. */
28
29 #include <sys/cdefs.h>
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sockio.h>
35 #include <sys/mbuf.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/taskqueue.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/serialize.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_arp.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/if_media.h>
53 #include <net/vlan/if_vlan_var.h>
54 #include <net/vlan/if_vlan_ether.h>
55 #include <net/ifq_var.h>
56
57 #include <net/bpf.h>
58
59 #include <netinet/in_systm.h>
60 #include <netinet/in.h>
61 #include <netinet/ip.h>
62 #include <netinet/ip6.h>
63 #include <netinet/udp.h>
64 #include <netinet/tcp.h>
65
66 #include <dev/virtual/virtio/virtio/virtio.h>
67 #include <dev/virtual/virtio/virtio/virtqueue.h>
68
69 #include "virtio_net.h"
70 #include "virtio_if.h"
71
72 struct vtnet_statistics {
73         unsigned long           mbuf_alloc_failed;
74
75         unsigned long           rx_frame_too_large;
76         unsigned long           rx_enq_replacement_failed;
77         unsigned long           rx_mergeable_failed;
78         unsigned long           rx_csum_bad_ethtype;
79         unsigned long           rx_csum_bad_start;
80         unsigned long           rx_csum_bad_ipproto;
81         unsigned long           rx_csum_bad_offset;
82         unsigned long           rx_csum_failed;
83         unsigned long           rx_csum_offloaded;
84         unsigned long           rx_task_rescheduled;
85
86         unsigned long           tx_csum_offloaded;
87         unsigned long           tx_tso_offloaded;
88         unsigned long           tx_csum_bad_ethtype;
89         unsigned long           tx_tso_bad_ethtype;
90         unsigned long           tx_task_rescheduled;
91 };
92
93 struct vtnet_softc {
94         device_t                vtnet_dev;
95         struct ifnet            *vtnet_ifp;
96         struct lwkt_serialize   vtnet_slz;
97
98         uint32_t                vtnet_flags;
99 #define VTNET_FLAG_LINK         0x0001
100 #define VTNET_FLAG_SUSPENDED    0x0002
101 #define VTNET_FLAG_MAC          0x0004
102 #define VTNET_FLAG_CTRL_VQ      0x0008
103 #define VTNET_FLAG_CTRL_RX      0x0010
104 #define VTNET_FLAG_CTRL_MAC     0x0020
105 #define VTNET_FLAG_VLAN_FILTER  0x0040
106 #define VTNET_FLAG_TSO_ECN      0x0080
107 #define VTNET_FLAG_MRG_RXBUFS   0x0100
108 #define VTNET_FLAG_LRO_NOMRG    0x0200
109
110         struct virtqueue        *vtnet_rx_vq;
111         struct virtqueue        *vtnet_tx_vq;
112         struct virtqueue        *vtnet_ctrl_vq;
113
114         struct vtnet_tx_header  *vtnet_txhdrarea;
115         uint32_t                vtnet_txhdridx;
116         struct vtnet_mac_filter *vtnet_macfilter;
117
118         int                     vtnet_hdr_size;
119         int                     vtnet_tx_size;
120         int                     vtnet_rx_size;
121         int                     vtnet_rx_process_limit;
122         int                     vtnet_rx_mbuf_size;
123         int                     vtnet_rx_mbuf_count;
124         int                     vtnet_if_flags;
125         int                     vtnet_watchdog_timer;
126         uint64_t                vtnet_features;
127
128         struct task             vtnet_cfgchg_task;
129
130         struct vtnet_statistics vtnet_stats;
131
132         struct callout          vtnet_tick_ch;
133
134         eventhandler_tag        vtnet_vlan_attach;
135         eventhandler_tag        vtnet_vlan_detach;
136
137         struct ifmedia          vtnet_media;
138         /*
139          * Fake media type; the host does not provide us with
140          * any real media information.
141          */
142 #define VTNET_MEDIATYPE         (IFM_ETHER | IFM_1000_T | IFM_FDX)
143         char                    vtnet_hwaddr[ETHER_ADDR_LEN];
144
145         /*
146          * During reset, the host's VLAN filtering table is lost. The
147          * array below is used to restore all the VLANs configured on
148          * this interface after a reset.
149          */
150 #define VTNET_VLAN_SHADOW_SIZE  (4096 / 32)
151         int                     vtnet_nvlans;
152         uint32_t                vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE];
153
154         char                    vtnet_mtx_name[16];
155 };
156
157 /*
158  * When mergeable buffers are not negotiated, the vtnet_rx_header structure
159  * below is placed at the beginning of the mbuf data. Use 4 bytes of pad to
160  * both keep the VirtIO header and the data non-contiguous and to keep the
161  * frame's payload 4 byte aligned.
162  *
163  * When mergeable buffers are negotiated, the host puts the VirtIO header in
164  * the beginning of the first mbuf's data.
165  */
166 #define VTNET_RX_HEADER_PAD     4
167 struct vtnet_rx_header {
168         struct virtio_net_hdr   vrh_hdr;
169         char                    vrh_pad[VTNET_RX_HEADER_PAD];
170 } __packed;
171
172 /*
173  * For each outgoing frame, the vtnet_tx_header below is allocated from
174  * the vtnet_tx_header_zone.
175  */
176 struct vtnet_tx_header {
177         union {
178                 struct virtio_net_hdr           hdr;
179                 struct virtio_net_hdr_mrg_rxbuf mhdr;
180         } vth_uhdr;
181
182         struct mbuf             *vth_mbuf;
183 };
184
185 MALLOC_DEFINE(M_VTNET, "VTNET_TX", "Outgoing VTNET TX frame header");
186
187 /*
188  * The VirtIO specification does not place a limit on the number of MAC
189  * addresses the guest driver may request to be filtered. In practice,
190  * the host is constrained by available resources. To simplify this driver,
191  * impose a reasonably high limit of MAC addresses we will filter before
192  * falling back to promiscuous or all-multicast modes.
193  */
194 #define VTNET_MAX_MAC_ENTRIES   128
195
196 struct vtnet_mac_table {
197         uint32_t                nentries;
198         uint8_t                 macs[VTNET_MAX_MAC_ENTRIES][ETHER_ADDR_LEN];
199 } __packed;
200
201 struct vtnet_mac_filter {
202         struct vtnet_mac_table  vmf_unicast;
203         uint32_t                vmf_pad; /* Make tables non-contiguous. */
204         struct vtnet_mac_table  vmf_multicast;
205 };
206
207 #define VTNET_WATCHDOG_TIMEOUT  5
208 #define VTNET_CSUM_OFFLOAD      (CSUM_TCP | CSUM_UDP)
209
210 /* Features desired/implemented by this driver. */
211 #define VTNET_FEATURES          \
212     (VIRTIO_NET_F_MAC           | \
213      VIRTIO_NET_F_STATUS        | \
214      VIRTIO_NET_F_CTRL_VQ       | \
215      VIRTIO_NET_F_CTRL_RX       | \
216      VIRTIO_NET_F_CTRL_MAC_ADDR | \
217      VIRTIO_NET_F_CTRL_VLAN     | \
218      VIRTIO_NET_F_CSUM          | \
219      VIRTIO_NET_F_HOST_TSO4     | \
220      VIRTIO_NET_F_HOST_TSO6     | \
221      VIRTIO_NET_F_HOST_ECN      | \
222      VIRTIO_NET_F_GUEST_CSUM    | \
223      VIRTIO_NET_F_GUEST_TSO4    | \
224      VIRTIO_NET_F_GUEST_TSO6    | \
225      VIRTIO_NET_F_GUEST_ECN     | \
226      VIRTIO_NET_F_MRG_RXBUF)
227
228 /*
229  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
230  * frames larger than 1514 bytes. We do not yet support software LRO
231  * via tcp_lro_rx().
232  */
233 #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
234                             VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
235
236 #define VTNET_MAX_MTU           65536
237 #define VTNET_MAX_RX_SIZE       65550
238
239 /*
240  * Used to preallocate the Vq indirect descriptors. The first segment
241  * is reserved for the header.
242  */
243 #define VTNET_MIN_RX_SEGS       2
244 #define VTNET_MAX_RX_SEGS       34
245 #define VTNET_MAX_TX_SEGS       34
246
247 #define IFCAP_TSO4              0x00100 /* can do TCP Segmentation Offload */
248 #define IFCAP_TSO6              0x00200 /* can do TCP6 Segmentation Offload */
249 #define IFCAP_LRO               0x00400 /* can do Large Receive Offload */
250 #define IFCAP_VLAN_HWFILTER     0x10000 /* interface hw can filter vlan tag */
251 #define IFCAP_VLAN_HWTSO        0x40000 /* can do IFCAP_TSO on VLANs */
252
253
254 /*
255  * Assert we can receive and transmit the maximum with regular
256  * size clusters.
257  */
258 CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
259 CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
260
261 /*
262  * Determine how many mbufs are in each receive buffer. For LRO without
263  * mergeable descriptors, we must allocate an mbuf chain large enough to
264  * hold both the vtnet_rx_header and the maximum receivable data.
265  */
266 #define VTNET_NEEDED_RX_MBUFS(_sc)                                      \
267         ((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 :          \
268         howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE,     \
269         (_sc)->vtnet_rx_mbuf_size)
270
271 static int      vtnet_modevent(module_t, int, void *);
272
273 static int      vtnet_probe(device_t);
274 static int      vtnet_attach(device_t);
275 static int      vtnet_detach(device_t);
276 static int      vtnet_suspend(device_t);
277 static int      vtnet_resume(device_t);
278 static int      vtnet_shutdown(device_t);
279 static int      vtnet_config_change(device_t);
280
281 static void     vtnet_negotiate_features(struct vtnet_softc *);
282 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
283 static void     vtnet_get_hwaddr(struct vtnet_softc *);
284 static void     vtnet_set_hwaddr(struct vtnet_softc *);
285 static int      vtnet_is_link_up(struct vtnet_softc *);
286 static void     vtnet_update_link_status(struct vtnet_softc *);
287 #if 0
288 static void     vtnet_watchdog(struct vtnet_softc *);
289 #endif
290 static void     vtnet_config_change_task(void *, int);
291 static int      vtnet_change_mtu(struct vtnet_softc *, int);
292 static int      vtnet_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
293
294 static int      vtnet_init_rx_vq(struct vtnet_softc *);
295 static void     vtnet_free_rx_mbufs(struct vtnet_softc *);
296 static void     vtnet_free_tx_mbufs(struct vtnet_softc *);
297 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
298
299 static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
300                     struct mbuf **);
301 static int      vtnet_replace_rxbuf(struct vtnet_softc *,
302                     struct mbuf *, int);
303 static int      vtnet_newbuf(struct vtnet_softc *);
304 static void     vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
305 static void     vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
306 static int      vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
307 static void     vtnet_vlan_tag_remove(struct mbuf *);
308 static int      vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
309                     struct virtio_net_hdr *);
310 static int      vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
311 static int      vtnet_rxeof(struct vtnet_softc *, int, int *);
312 static void     vtnet_rx_intr_task(void *);
313 static int      vtnet_rx_vq_intr(void *);
314
315 static void     vtnet_txeof(struct vtnet_softc *);
316 static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
317                     struct virtio_net_hdr *);
318 static int      vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
319                     struct vtnet_tx_header *);
320 static int      vtnet_encap(struct vtnet_softc *, struct mbuf **);
321 static void     vtnet_start_locked(struct ifnet *, struct ifaltq_subque *);
322 static void     vtnet_start(struct ifnet *, struct ifaltq_subque *);
323 static void     vtnet_tick(void *);
324 static void     vtnet_tx_intr_task(void *);
325 static int      vtnet_tx_vq_intr(void *);
326
327 static void     vtnet_stop(struct vtnet_softc *);
328 static int      vtnet_reinit(struct vtnet_softc *);
329 static void     vtnet_init_locked(struct vtnet_softc *);
330 static void     vtnet_init(void *);
331
332 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
333                     struct sglist *, int, int);
334
335 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
336 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
337 static int      vtnet_set_promisc(struct vtnet_softc *, int);
338 static int      vtnet_set_allmulti(struct vtnet_softc *, int);
339 static void     vtnet_rx_filter(struct vtnet_softc *sc);
340 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
341
342 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
343 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
344 static void     vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
345 static void     vtnet_register_vlan(void *, struct ifnet *, uint16_t);
346 static void     vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
347
348 static int      vtnet_ifmedia_upd(struct ifnet *);
349 static void     vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
350
351 static void     vtnet_add_statistics(struct vtnet_softc *);
352
353 static int      vtnet_enable_rx_intr(struct vtnet_softc *);
354 static int      vtnet_enable_tx_intr(struct vtnet_softc *);
355 static void     vtnet_disable_rx_intr(struct vtnet_softc *);
356 static void     vtnet_disable_tx_intr(struct vtnet_softc *);
357
358 /* Tunables. */
359 static int vtnet_csum_disable = 0;
360 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
361 static int vtnet_tso_disable = 1;
362 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
363 static int vtnet_lro_disable = 1;
364 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
365
366 /*
367  * Reducing the number of transmit completed interrupts can
368  * improve performance. To do so, the define below keeps the
369  * Tx vq interrupt disabled and adds calls to vtnet_txeof()
370  * in the start and watchdog paths. The price to pay for this
371  * is the m_free'ing of transmitted mbufs may be delayed until
372  * the watchdog fires.
373  */
374 #define VTNET_TX_INTR_MODERATION
375
376 static struct virtio_feature_desc vtnet_feature_desc[] = {
377         { VIRTIO_NET_F_CSUM,            "TxChecksum"    },
378         { VIRTIO_NET_F_GUEST_CSUM,      "RxChecksum"    },
379         { VIRTIO_NET_F_MAC,             "MacAddress"    },
380         { VIRTIO_NET_F_GSO,             "TxAllGSO"      },
381         { VIRTIO_NET_F_GUEST_TSO4,      "RxTSOv4"       },
382         { VIRTIO_NET_F_GUEST_TSO6,      "RxTSOv6"       },
383         { VIRTIO_NET_F_GUEST_ECN,       "RxECN"         },
384         { VIRTIO_NET_F_GUEST_UFO,       "RxUFO"         },
385         { VIRTIO_NET_F_HOST_TSO4,       "TxTSOv4"       },
386         { VIRTIO_NET_F_HOST_TSO6,       "TxTSOv6"       },
387         { VIRTIO_NET_F_HOST_ECN,        "TxTSOECN"      },
388         { VIRTIO_NET_F_HOST_UFO,        "TxUFO"         },
389         { VIRTIO_NET_F_MRG_RXBUF,       "MrgRxBuf"      },
390         { VIRTIO_NET_F_STATUS,          "Status"        },
391         { VIRTIO_NET_F_CTRL_VQ,         "ControlVq"     },
392         { VIRTIO_NET_F_CTRL_RX,         "RxMode"        },
393         { VIRTIO_NET_F_CTRL_VLAN,       "VLanFilter"    },
394         { VIRTIO_NET_F_CTRL_RX_EXTRA,   "RxModeExtra"   },
395         { VIRTIO_NET_F_GUEST_ANNOUNCE,  "GuestAnnounce" },
396         { VIRTIO_NET_F_MQ,              "RFS"           },
397         { VIRTIO_NET_F_CTRL_MAC_ADDR,   "SetMacAddress" },
398         { 0, NULL }
399 };
400
401 static device_method_t vtnet_methods[] = {
402         /* Device methods. */
403         DEVMETHOD(device_probe,         vtnet_probe),
404         DEVMETHOD(device_attach,        vtnet_attach),
405         DEVMETHOD(device_detach,        vtnet_detach),
406         DEVMETHOD(device_suspend,       vtnet_suspend),
407         DEVMETHOD(device_resume,        vtnet_resume),
408         DEVMETHOD(device_shutdown,      vtnet_shutdown),
409
410         /* VirtIO methods. */
411         DEVMETHOD(virtio_config_change, vtnet_config_change),
412
413         { 0, 0 }
414 };
415
416 static driver_t vtnet_driver = {
417         "vtnet",
418         vtnet_methods,
419         sizeof(struct vtnet_softc)
420 };
421
422 static devclass_t vtnet_devclass;
423
424 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
425               vtnet_modevent, 0);
426 MODULE_VERSION(vtnet, 1);
427 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
428
429 static int
430 vtnet_modevent(module_t mod, int type, void *unused)
431 {
432         int error;
433
434         error = 0;
435
436         switch (type) {
437         case MOD_LOAD:
438                 break;
439         case MOD_UNLOAD:
440                 break;
441         case MOD_SHUTDOWN:
442                 break;
443         default:
444                 error = EOPNOTSUPP;
445                 break;
446         }
447
448         return (error);
449 }
450
451 static int
452 vtnet_probe(device_t dev)
453 {
454         if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
455                 return (ENXIO);
456
457         device_set_desc(dev, "VirtIO Networking Adapter");
458
459         return (BUS_PROBE_DEFAULT);
460 }
461
462 static int
463 vtnet_attach(device_t dev)
464 {
465         struct vtnet_softc *sc;
466         struct ifnet *ifp;
467         int tx_size, error;
468
469         sc = device_get_softc(dev);
470         sc->vtnet_dev = dev;
471
472         lwkt_serialize_init(&sc->vtnet_slz);
473         callout_init(&sc->vtnet_tick_ch);
474
475         ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
476                      vtnet_ifmedia_sts);
477         ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
478         ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
479
480         vtnet_add_statistics(sc);
481
482         virtio_set_feature_desc(dev, vtnet_feature_desc);
483         vtnet_negotiate_features(sc);
484
485         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
486                 /* This feature should always be negotiated. */
487                 sc->vtnet_flags |= VTNET_FLAG_MAC;
488         }
489
490         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
491                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
492                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
493         } else {
494                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
495         }
496
497         sc->vtnet_rx_mbuf_size = MCLBYTES;
498         sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
499
500         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
501                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
502
503                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
504                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
505                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
506                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
507                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
508                     virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
509                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
510         }
511
512         /* Read (or generate) the MAC address for the adapter. */
513         vtnet_get_hwaddr(sc);
514
515         error = vtnet_alloc_virtqueues(sc);
516         if (error) {
517                 device_printf(dev, "cannot allocate virtqueues\n");
518                 goto fail;
519         }
520
521         ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
522         if (ifp == NULL) {
523                 device_printf(dev, "cannot allocate ifnet structure\n");
524                 error = ENOSPC;
525                 goto fail;
526         }
527
528         ifp->if_softc = sc;
529         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
530         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
531         ifp->if_init = vtnet_init;
532         ifp->if_start = vtnet_start;
533         ifp->if_ioctl = vtnet_ioctl;
534
535         sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
536         sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
537
538         tx_size = virtqueue_size(sc->vtnet_tx_vq);
539         sc->vtnet_tx_size = tx_size;
540         sc->vtnet_txhdridx = 0;
541         sc->vtnet_txhdrarea = contigmalloc(
542             ((sc->vtnet_tx_size / 2) + 1) * sizeof(struct vtnet_tx_header),
543             M_VTNET, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
544         if (sc->vtnet_txhdrarea == NULL) {
545                 device_printf(dev, "cannot contigmalloc the tx headers\n");
546                 goto fail;
547         }
548         sc->vtnet_macfilter = contigmalloc(
549             sizeof(struct vtnet_mac_filter),
550             M_DEVBUF, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
551         if (sc->vtnet_macfilter == NULL) {
552                 device_printf(dev,
553                     "cannot contigmalloc the mac filter table\n");
554                 goto fail;
555         }
556         ifq_set_maxlen(&ifp->if_snd, tx_size - 1);
557         ifq_set_ready(&ifp->if_snd);
558
559         ether_ifattach(ifp, sc->vtnet_hwaddr, NULL);
560
561         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)){
562                 //ifp->if_capabilities |= IFCAP_LINKSTATE;
563                  kprintf("add dynamic link state\n");
564         }
565
566         /* Tell the upper layer(s) we support long frames. */
567         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
568         ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
569
570         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
571                 ifp->if_capabilities |= IFCAP_TXCSUM;
572
573                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
574                         ifp->if_capabilities |= IFCAP_TSO4;
575                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
576                         ifp->if_capabilities |= IFCAP_TSO6;
577                 if (ifp->if_capabilities & IFCAP_TSO)
578                         ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
579
580                 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
581                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
582         }
583
584         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
585                 ifp->if_capabilities |= IFCAP_RXCSUM;
586
587                 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
588                     virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
589                         ifp->if_capabilities |= IFCAP_LRO;
590         }
591
592         if (ifp->if_capabilities & IFCAP_HWCSUM) {
593                 /*
594                  * VirtIO does not support VLAN tagging, but we can fake
595                  * it by inserting and removing the 802.1Q header during
596                  * transmit and receive. We are then able to do checksum
597                  * offloading of VLAN frames.
598                  */
599                 ifp->if_capabilities |=
600                         IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
601         }
602
603         ifp->if_capenable = ifp->if_capabilities;
604
605         /*
606          * Capabilities after here are not enabled by default.
607          */
608
609         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
610                 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
611
612                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
613                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
614                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
615                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
616         }
617
618         TASK_INIT(&sc->vtnet_cfgchg_task, 0, vtnet_config_change_task, sc);
619
620         error = virtio_setup_intr(dev, &sc->vtnet_slz);
621         if (error) {
622                 device_printf(dev, "cannot setup virtqueue interrupts\n");
623                 ether_ifdetach(ifp);
624                 goto fail;
625         }
626
627         /*
628          * Device defaults to promiscuous mode for backwards
629          * compatibility. Turn it off if possible.
630          */
631         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
632                 lwkt_serialize_enter(&sc->vtnet_slz);
633                 if (vtnet_set_promisc(sc, 0) != 0) {
634                         ifp->if_flags |= IFF_PROMISC;
635                         device_printf(dev,
636                             "cannot disable promiscuous mode\n");
637                 }
638                 lwkt_serialize_exit(&sc->vtnet_slz);
639         } else
640                 ifp->if_flags |= IFF_PROMISC;
641
642 fail:
643         if (error)
644                 vtnet_detach(dev);
645
646         return (error);
647 }
648
649 static int
650 vtnet_detach(device_t dev)
651 {
652         struct vtnet_softc *sc;
653         struct ifnet *ifp;
654
655         sc = device_get_softc(dev);
656         ifp = sc->vtnet_ifp;
657
658         if (device_is_attached(dev)) {
659                 lwkt_serialize_enter(&sc->vtnet_slz);
660                 vtnet_stop(sc);
661                 lwkt_serialize_exit(&sc->vtnet_slz);
662
663                 callout_stop(&sc->vtnet_tick_ch);
664                 taskqueue_drain(taskqueue_swi, &sc->vtnet_cfgchg_task);
665
666                 ether_ifdetach(ifp);
667         }
668
669         if (sc->vtnet_vlan_attach != NULL) {
670                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
671                 sc->vtnet_vlan_attach = NULL;
672         }
673         if (sc->vtnet_vlan_detach != NULL) {
674                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
675                 sc->vtnet_vlan_detach = NULL;
676         }
677
678         if (ifp) {
679                 if_free(ifp);
680                 sc->vtnet_ifp = NULL;
681         }
682
683         if (sc->vtnet_rx_vq != NULL)
684                 vtnet_free_rx_mbufs(sc);
685         if (sc->vtnet_tx_vq != NULL)
686                 vtnet_free_tx_mbufs(sc);
687         if (sc->vtnet_ctrl_vq != NULL)
688                 vtnet_free_ctrl_vq(sc);
689
690         if (sc->vtnet_txhdrarea != NULL) {
691                 contigfree(sc->vtnet_txhdrarea,
692                     ((sc->vtnet_tx_size / 2) + 1) *
693                     sizeof(struct vtnet_tx_header), M_VTNET);
694                 sc->vtnet_txhdrarea = NULL;
695         }
696         if (sc->vtnet_macfilter != NULL) {
697                 contigfree(sc->vtnet_macfilter,
698                     sizeof(struct vtnet_mac_filter), M_DEVBUF);
699                 sc->vtnet_macfilter = NULL;
700         }
701
702         ifmedia_removeall(&sc->vtnet_media);
703
704         return (0);
705 }
706
707 static int
708 vtnet_suspend(device_t dev)
709 {
710         struct vtnet_softc *sc;
711
712         sc = device_get_softc(dev);
713
714         lwkt_serialize_enter(&sc->vtnet_slz);
715         vtnet_stop(sc);
716         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
717         lwkt_serialize_exit(&sc->vtnet_slz);
718
719         return (0);
720 }
721
722 static int
723 vtnet_resume(device_t dev)
724 {
725         struct vtnet_softc *sc;
726         struct ifnet *ifp;
727
728         sc = device_get_softc(dev);
729         ifp = sc->vtnet_ifp;
730
731         lwkt_serialize_enter(&sc->vtnet_slz);
732         if (ifp->if_flags & IFF_UP)
733                 vtnet_init_locked(sc);
734         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
735         lwkt_serialize_exit(&sc->vtnet_slz);
736
737         return (0);
738 }
739
740 static int
741 vtnet_shutdown(device_t dev)
742 {
743
744         /*
745          * Suspend already does all of what we need to
746          * do here; we just never expect to be resumed.
747          */
748         return (vtnet_suspend(dev));
749 }
750
751 static int
752 vtnet_config_change(device_t dev)
753 {
754         struct vtnet_softc *sc;
755
756         sc = device_get_softc(dev);
757
758         taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtnet_cfgchg_task);
759
760         return (1);
761 }
762
763 static void
764 vtnet_negotiate_features(struct vtnet_softc *sc)
765 {
766         device_t dev;
767         uint64_t mask, features;
768
769         dev = sc->vtnet_dev;
770         mask = 0;
771
772         if (vtnet_csum_disable)
773                 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
774
775         /*
776          * TSO and LRO are only available when their corresponding
777          * checksum offload feature is also negotiated.
778          */
779
780         if (vtnet_csum_disable || vtnet_tso_disable)
781                 mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
782                     VIRTIO_NET_F_HOST_ECN;
783
784         if (vtnet_csum_disable || vtnet_lro_disable)
785                 mask |= VTNET_LRO_FEATURES;
786
787         features = VTNET_FEATURES & ~mask;
788         features |= VIRTIO_F_NOTIFY_ON_EMPTY;
789         sc->vtnet_features = virtio_negotiate_features(dev, features);
790 }
791
792 static int
793 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
794 {
795         device_t dev;
796         struct vq_alloc_info vq_info[3];
797         int nvqs, rxsegs;
798
799         dev = sc->vtnet_dev;
800         nvqs = 2;
801
802         /*
803          * Indirect descriptors are not needed for the Rx
804          * virtqueue when mergeable buffers are negotiated.
805          * The header is placed inline with the data, not
806          * in a separate descriptor, and mbuf clusters are
807          * always physically contiguous.
808          */
809         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
810                 rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
811                     VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
812         } else
813                 rxsegs = 0;
814
815         VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
816             vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
817             "%s receive", device_get_nameunit(dev));
818
819         VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
820             vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
821             "%s transmit", device_get_nameunit(dev));
822
823         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
824                 nvqs++;
825
826                 VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
827                     &sc->vtnet_ctrl_vq, "%s control",
828                     device_get_nameunit(dev));
829         }
830
831         return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
832 }
833
834 static void
835 vtnet_set_hwaddr(struct vtnet_softc *sc)
836 {
837         device_t dev;
838
839         dev = sc->vtnet_dev;
840
841         if ((sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) &&
842             (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)) {
843                 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
844                         device_printf(dev, "unable to set MAC address\n");
845         } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
846                 virtio_write_device_config(dev,
847                     offsetof(struct virtio_net_config, mac),
848                     sc->vtnet_hwaddr, ETHER_ADDR_LEN);
849         }
850 }
851
852 static void
853 vtnet_get_hwaddr(struct vtnet_softc *sc)
854 {
855         device_t dev;
856
857         dev = sc->vtnet_dev;
858
859         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
860                 /*
861                  * Generate a random locally administered unicast address.
862                  *
863                  * It would be nice to generate the same MAC address across
864                  * reboots, but it seems all the hosts currently available
865                  * support the MAC feature, so this isn't too important.
866                  */
867                 sc->vtnet_hwaddr[0] = 0xB2;
868                 karc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1);
869                 vtnet_set_hwaddr(sc);
870                 return;
871         }
872
873         virtio_read_device_config(dev,
874             offsetof(struct virtio_net_config, mac),
875             sc->vtnet_hwaddr, ETHER_ADDR_LEN);
876 }
877
878 static int
879 vtnet_is_link_up(struct vtnet_softc *sc)
880 {
881         device_t dev;
882         struct ifnet *ifp;
883         uint16_t status;
884
885         dev = sc->vtnet_dev;
886         ifp = sc->vtnet_ifp;
887
888         ASSERT_SERIALIZED(&sc->vtnet_slz);
889
890         status = virtio_read_dev_config_2(dev,
891                         offsetof(struct virtio_net_config, status));
892
893         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
894 }
895
896 static void
897 vtnet_update_link_status(struct vtnet_softc *sc)
898 {
899         device_t dev;
900         struct ifnet *ifp;
901         struct ifaltq_subque *ifsq;
902         int link;
903
904         dev = sc->vtnet_dev;
905         ifp = sc->vtnet_ifp;
906         ifsq = ifq_get_subq_default(&ifp->if_snd);
907
908         link = vtnet_is_link_up(sc);
909
910         if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
911                 sc->vtnet_flags |= VTNET_FLAG_LINK;
912                 if (bootverbose)
913                         device_printf(dev, "Link is up\n");
914                 ifp->if_link_state = LINK_STATE_UP;
915                 if_link_state_change(ifp);
916                 if (!ifsq_is_empty(ifsq))
917                         vtnet_start_locked(ifp, ifsq);
918         } else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
919                 sc->vtnet_flags &= ~VTNET_FLAG_LINK;
920                 if (bootverbose)
921                         device_printf(dev, "Link is down\n");
922
923                 ifp->if_link_state = LINK_STATE_DOWN;
924                 if_link_state_change(ifp);
925         }
926 }
927
928 #if 0
929 static void
930 vtnet_watchdog(struct vtnet_softc *sc)
931 {
932         struct ifnet *ifp;
933
934         ifp = sc->vtnet_ifp;
935
936 #ifdef VTNET_TX_INTR_MODERATION
937         vtnet_txeof(sc);
938 #endif
939
940         if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
941                 return;
942
943         if_printf(ifp, "watchdog timeout -- resetting\n");
944 #ifdef VTNET_DEBUG
945         virtqueue_dump(sc->vtnet_tx_vq);
946 #endif
947         ifp->if_oerrors++;
948         ifp->if_flags &= ~IFF_RUNNING;
949         vtnet_init_locked(sc);
950 }
951 #endif
952
953 static void
954 vtnet_config_change_task(void *arg, int pending)
955 {
956         struct vtnet_softc *sc;
957
958         sc = arg;
959
960         lwkt_serialize_enter(&sc->vtnet_slz);
961         vtnet_update_link_status(sc);
962         lwkt_serialize_exit(&sc->vtnet_slz);
963 }
964
965 static int
966 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data,struct ucred *cr)
967 {
968         struct vtnet_softc *sc;
969         struct ifreq *ifr;
970         int reinit, mask, error;
971
972         sc = ifp->if_softc;
973         ifr = (struct ifreq *) data;
974         reinit = 0;
975         error = 0;
976
977         switch (cmd) {
978         case SIOCSIFMTU:
979                 if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
980                         error = EINVAL;
981                 else if (ifp->if_mtu != ifr->ifr_mtu) {
982                         lwkt_serialize_enter(&sc->vtnet_slz);
983                         error = vtnet_change_mtu(sc, ifr->ifr_mtu);
984                         lwkt_serialize_exit(&sc->vtnet_slz);
985                 }
986                 break;
987
988         case SIOCSIFFLAGS:
989                 lwkt_serialize_enter(&sc->vtnet_slz);
990                 if ((ifp->if_flags & IFF_UP) == 0) {
991                         if (ifp->if_flags & IFF_RUNNING)
992                                 vtnet_stop(sc);
993                 } else if (ifp->if_flags & IFF_RUNNING) {
994                         if ((ifp->if_flags ^ sc->vtnet_if_flags) &
995                             (IFF_PROMISC | IFF_ALLMULTI)) {
996                                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
997                                         vtnet_rx_filter(sc);
998                                 else
999                                         error = ENOTSUP;
1000                         }
1001                 } else
1002                         vtnet_init_locked(sc);
1003
1004                 if (error == 0)
1005                         sc->vtnet_if_flags = ifp->if_flags;
1006                 lwkt_serialize_exit(&sc->vtnet_slz);
1007                 break;
1008
1009         case SIOCADDMULTI:
1010         case SIOCDELMULTI:
1011                 lwkt_serialize_enter(&sc->vtnet_slz);
1012                 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
1013                     (ifp->if_flags & IFF_RUNNING))
1014                         vtnet_rx_filter_mac(sc);
1015                 lwkt_serialize_exit(&sc->vtnet_slz);
1016                 break;
1017
1018         case SIOCSIFMEDIA:
1019         case SIOCGIFMEDIA:
1020                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1021                 break;
1022
1023         case SIOCSIFCAP:
1024                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1025
1026                 lwkt_serialize_enter(&sc->vtnet_slz);
1027
1028                 if (mask & IFCAP_TXCSUM) {
1029                         ifp->if_capenable ^= IFCAP_TXCSUM;
1030                         if (ifp->if_capenable & IFCAP_TXCSUM)
1031                                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
1032                         else
1033                                 ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
1034                 }
1035
1036                 if (mask & IFCAP_TSO4) {
1037                         ifp->if_capenable ^= IFCAP_TSO4;
1038                         if (ifp->if_capenable & IFCAP_TSO4)
1039                                 ifp->if_hwassist |= CSUM_TSO;
1040                         else
1041                                 ifp->if_hwassist &= ~CSUM_TSO;
1042                 }
1043
1044                 if (mask & IFCAP_RXCSUM) {
1045                         ifp->if_capenable ^= IFCAP_RXCSUM;
1046                         reinit = 1;
1047                 }
1048
1049                 if (mask & IFCAP_LRO) {
1050                         ifp->if_capenable ^= IFCAP_LRO;
1051                         reinit = 1;
1052                 }
1053
1054                 if (mask & IFCAP_VLAN_HWFILTER) {
1055                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1056                         reinit = 1;
1057                 }
1058
1059                 if (mask & IFCAP_VLAN_HWTSO)
1060                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1061
1062                 if (mask & IFCAP_VLAN_HWTAGGING)
1063                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1064
1065                 if (reinit && (ifp->if_flags & IFF_RUNNING)) {
1066                         ifp->if_flags &= ~IFF_RUNNING;
1067                         vtnet_init_locked(sc);
1068                 }
1069                 //VLAN_CAPABILITIES(ifp);
1070
1071                 lwkt_serialize_exit(&sc->vtnet_slz);
1072                 break;
1073
1074         default:
1075                 error = ether_ioctl(ifp, cmd, data);
1076                 break;
1077         }
1078
1079         return (error);
1080 }
1081
1082 static int
1083 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1084 {
1085         struct ifnet *ifp;
1086         int new_frame_size, clsize;
1087
1088         ifp = sc->vtnet_ifp;
1089
1090         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1091                 new_frame_size = sizeof(struct vtnet_rx_header) +
1092                     sizeof(struct ether_vlan_header) + new_mtu;
1093
1094                 if (new_frame_size > MJUM9BYTES)
1095                         return (EINVAL);
1096
1097                 if (new_frame_size <= MCLBYTES)
1098                         clsize = MCLBYTES;
1099                 else
1100                         clsize = MJUM9BYTES;
1101         } else {
1102                 new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
1103                     sizeof(struct ether_vlan_header) + new_mtu;
1104
1105                 if (new_frame_size <= MCLBYTES)
1106                         clsize = MCLBYTES;
1107                 else
1108                         clsize = MJUMPAGESIZE;
1109         }
1110
1111         sc->vtnet_rx_mbuf_size = clsize;
1112         sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
1113         KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
1114             ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
1115
1116         ifp->if_mtu = new_mtu;
1117
1118         if (ifp->if_flags & IFF_RUNNING) {
1119                 ifp->if_flags &= ~IFF_RUNNING;
1120                 vtnet_init_locked(sc);
1121         }
1122
1123         return (0);
1124 }
1125
1126 static int
1127 vtnet_init_rx_vq(struct vtnet_softc *sc)
1128 {
1129         struct virtqueue *vq;
1130         int nbufs, error;
1131
1132         vq = sc->vtnet_rx_vq;
1133         nbufs = 0;
1134         error = ENOSPC;
1135
1136         while (!virtqueue_full(vq)) {
1137                 if ((error = vtnet_newbuf(sc)) != 0)
1138                         break;
1139                 nbufs++;
1140         }
1141
1142         if (nbufs > 0) {
1143                 virtqueue_notify(vq, &sc->vtnet_slz);
1144
1145                 /*
1146                  * EMSGSIZE signifies the virtqueue did not have enough
1147                  * entries available to hold the last mbuf. This is not
1148                  * an error. We should not get ENOSPC since we check if
1149                  * the virtqueue is full before attempting to add a
1150                  * buffer.
1151                  */
1152                 if (error == EMSGSIZE)
1153                         error = 0;
1154         }
1155
1156         return (error);
1157 }
1158
1159 static void
1160 vtnet_free_rx_mbufs(struct vtnet_softc *sc)
1161 {
1162         struct virtqueue *vq;
1163         struct mbuf *m;
1164         int last;
1165
1166         vq = sc->vtnet_rx_vq;
1167         last = 0;
1168
1169         while ((m = virtqueue_drain(vq, &last)) != NULL)
1170                 m_freem(m);
1171
1172         KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
1173 }
1174
1175 static void
1176 vtnet_free_tx_mbufs(struct vtnet_softc *sc)
1177 {
1178         struct virtqueue *vq;
1179         struct vtnet_tx_header *txhdr;
1180         int last;
1181
1182         vq = sc->vtnet_tx_vq;
1183         last = 0;
1184
1185         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1186                 m_freem(txhdr->vth_mbuf);
1187         }
1188
1189         KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
1190 }
1191
1192 static void
1193 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
1194 {
1195         /*
1196          * The control virtqueue is only polled, therefore
1197          * it should already be empty.
1198          */
1199         KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
1200                 ("Ctrl Vq not empty"));
1201 }
1202
1203 static struct mbuf *
1204 vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1205 {
1206         struct mbuf *m_head, *m_tail, *m;
1207         int i, clsize;
1208
1209         clsize = sc->vtnet_rx_mbuf_size;
1210
1211         /*use getcl instead of getjcl. see  if_mxge.c comment line 2398*/
1212         //m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, clsize);
1213         m_head = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR );
1214         if (m_head == NULL)
1215                 goto fail;
1216
1217         m_head->m_len = clsize;
1218         m_tail = m_head;
1219
1220         if (nbufs > 1) {
1221                 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1222                         ("chained Rx mbuf requested without LRO_NOMRG"));
1223
1224                 for (i = 0; i < nbufs - 1; i++) {
1225                         //m = m_getjcl(M_DONTWAIT, MT_DATA, 0, clsize);
1226                         m = m_getcl(M_NOWAIT, MT_DATA, 0);
1227                         if (m == NULL)
1228                                 goto fail;
1229
1230                         m->m_len = clsize;
1231                         m_tail->m_next = m;
1232                         m_tail = m;
1233                 }
1234         }
1235
1236         if (m_tailp != NULL)
1237                 *m_tailp = m_tail;
1238
1239         return (m_head);
1240
1241 fail:
1242         sc->vtnet_stats.mbuf_alloc_failed++;
1243         m_freem(m_head);
1244
1245         return (NULL);
1246 }
1247
1248 static int
1249 vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
1250 {
1251         struct mbuf *m, *m_prev;
1252         struct mbuf *m_new, *m_tail;
1253         int len, clsize, nreplace, error;
1254
1255         m = m0;
1256         m_prev = NULL;
1257         len = len0;
1258
1259         m_tail = NULL;
1260         clsize = sc->vtnet_rx_mbuf_size;
1261         nreplace = 0;
1262
1263         if (m->m_next != NULL)
1264                 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1265                     ("chained Rx mbuf without LRO_NOMRG"));
1266
1267         /*
1268          * Since LRO_NOMRG mbuf chains are so large, we want to avoid
1269          * allocating an entire chain for each received frame. When
1270          * the received frame's length is less than that of the chain,
1271          * the unused mbufs are reassigned to the new chain.
1272          */
1273         while (len > 0) {
1274                 /*
1275                  * Something is seriously wrong if we received
1276                  * a frame larger than the mbuf chain. Drop it.
1277                  */
1278                 if (m == NULL) {
1279                         sc->vtnet_stats.rx_frame_too_large++;
1280                         return (EMSGSIZE);
1281                 }
1282
1283                 KASSERT(m->m_len == clsize,
1284                     ("mbuf length not expected cluster size: %d",
1285                     m->m_len));
1286
1287                 m->m_len = MIN(m->m_len, len);
1288                 len -= m->m_len;
1289
1290                 m_prev = m;
1291                 m = m->m_next;
1292                 nreplace++;
1293         }
1294
1295         KASSERT(m_prev != NULL, ("m_prev == NULL"));
1296         KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
1297                 ("too many replacement mbufs: %d/%d", nreplace,
1298                 sc->vtnet_rx_mbuf_count));
1299
1300         m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
1301         if (m_new == NULL) {
1302                 m_prev->m_len = clsize;
1303                 return (ENOBUFS);
1304         }
1305
1306         /*
1307          * Move unused mbufs, if any, from the original chain
1308          * onto the end of the new chain.
1309          */
1310         if (m_prev->m_next != NULL) {
1311                 m_tail->m_next = m_prev->m_next;
1312                 m_prev->m_next = NULL;
1313         }
1314
1315         error = vtnet_enqueue_rxbuf(sc, m_new);
1316         if (error) {
1317                 /*
1318                  * BAD! We could not enqueue the replacement mbuf chain. We
1319                  * must restore the m0 chain to the original state if it was
1320                  * modified so we can subsequently discard it.
1321                  *
1322                  * NOTE: The replacement is suppose to be an identical copy
1323                  * to the one just dequeued so this is an unexpected error.
1324                  */
1325                 sc->vtnet_stats.rx_enq_replacement_failed++;
1326
1327                 if (m_tail->m_next != NULL) {
1328                         m_prev->m_next = m_tail->m_next;
1329                         m_tail->m_next = NULL;
1330                 }
1331
1332                 m_prev->m_len = clsize;
1333                 m_freem(m_new);
1334         }
1335
1336         return (error);
1337 }
1338
1339 static int
1340 vtnet_newbuf(struct vtnet_softc *sc)
1341 {
1342         struct mbuf *m;
1343         int error;
1344
1345         m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
1346         if (m == NULL)
1347                 return (ENOBUFS);
1348
1349         error = vtnet_enqueue_rxbuf(sc, m);
1350         if (error)
1351                 m_freem(m);
1352
1353         return (error);
1354 }
1355
1356 static void
1357 vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
1358 {
1359         struct virtqueue *vq;
1360         struct mbuf *m;
1361
1362         vq = sc->vtnet_rx_vq;
1363
1364         while (--nbufs > 0) {
1365                 if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
1366                         break;
1367                 vtnet_discard_rxbuf(sc, m);
1368         }
1369 }
1370
1371 static void
1372 vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1373 {
1374         int error;
1375
1376         /*
1377          * Requeue the discarded mbuf. This should always be
1378          * successful since it was just dequeued.
1379          */
1380         error = vtnet_enqueue_rxbuf(sc, m);
1381         KASSERT(error == 0, ("cannot requeue discarded mbuf"));
1382 }
1383
1384 static int
1385 vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1386 {
1387         struct sglist sg;
1388         struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1389         struct vtnet_rx_header *rxhdr;
1390         struct virtio_net_hdr *hdr;
1391         uint8_t *mdata;
1392         int offset, error;
1393
1394         ASSERT_SERIALIZED(&sc->vtnet_slz);
1395         if ((sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0)
1396                 KASSERT(m->m_next == NULL, ("chained Rx mbuf"));
1397
1398         sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
1399
1400         mdata = mtod(m, uint8_t *);
1401         offset = 0;
1402
1403         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1404                 rxhdr = (struct vtnet_rx_header *) mdata;
1405                 hdr = &rxhdr->vrh_hdr;
1406                 offset += sizeof(struct vtnet_rx_header);
1407
1408                 error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
1409                 KASSERT(error == 0, ("cannot add header to sglist"));
1410         }
1411
1412         error = sglist_append(&sg, mdata + offset, m->m_len - offset);
1413         if (error)
1414                 return (error);
1415
1416         if (m->m_next != NULL) {
1417                 error = sglist_append_mbuf(&sg, m->m_next);
1418                 if (error)
1419                         return (error);
1420         }
1421
1422         return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
1423 }
1424
1425 static void
1426 vtnet_vlan_tag_remove(struct mbuf *m)
1427 {
1428         struct ether_vlan_header *evl;
1429
1430         evl = mtod(m, struct ether_vlan_header *);
1431
1432         m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
1433         m->m_flags |= M_VLANTAG;
1434
1435         /* Strip the 802.1Q header. */
1436         bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
1437             ETHER_HDR_LEN - ETHER_TYPE_LEN);
1438         m_adj(m, ETHER_VLAN_ENCAP_LEN);
1439 }
1440
1441 /*
1442  * Alternative method of doing receive checksum offloading. Rather
1443  * than parsing the received frame down to the IP header, use the
1444  * csum_offset to determine which CSUM_* flags are appropriate. We
1445  * can get by with doing this only because the checksum offsets are
1446  * unique for the things we care about.
1447  */
1448 static int
1449 vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
1450     struct virtio_net_hdr *hdr)
1451 {
1452         struct ether_header *eh;
1453         struct ether_vlan_header *evh;
1454         struct udphdr *udp;
1455         int csum_len;
1456         uint16_t eth_type;
1457
1458         csum_len = hdr->csum_start + hdr->csum_offset;
1459
1460         if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
1461                 return (1);
1462         if (m->m_len < csum_len)
1463                 return (1);
1464
1465         eh = mtod(m, struct ether_header *);
1466         eth_type = ntohs(eh->ether_type);
1467         if (eth_type == ETHERTYPE_VLAN) {
1468                 evh = mtod(m, struct ether_vlan_header *);
1469                 eth_type = ntohs(evh->evl_proto);
1470         }
1471
1472         if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
1473                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1474                 return (1);
1475         }
1476
1477         /* Use the offset to determine the appropriate CSUM_* flags. */
1478         switch (hdr->csum_offset) {
1479         case offsetof(struct udphdr, uh_sum):
1480                 if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
1481                         return (1);
1482                 udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
1483                 if (udp->uh_sum == 0)
1484                         return (0);
1485
1486                 /* FALLTHROUGH */
1487
1488         case offsetof(struct tcphdr, th_sum):
1489                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1490                 m->m_pkthdr.csum_data = 0xFFFF;
1491                 break;
1492
1493         default:
1494                 sc->vtnet_stats.rx_csum_bad_offset++;
1495                 return (1);
1496         }
1497
1498         sc->vtnet_stats.rx_csum_offloaded++;
1499
1500         return (0);
1501 }
1502
1503 static int
1504 vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
1505 {
1506         struct ifnet *ifp;
1507         struct virtqueue *vq;
1508         struct mbuf *m, *m_tail;
1509         int len;
1510
1511         ifp = sc->vtnet_ifp;
1512         vq = sc->vtnet_rx_vq;
1513         m_tail = m_head;
1514
1515         while (--nbufs > 0) {
1516                 m = virtqueue_dequeue(vq, &len);
1517                 if (m == NULL) {
1518                         ifp->if_ierrors++;
1519                         goto fail;
1520                 }
1521
1522                 if (vtnet_newbuf(sc) != 0) {
1523                         ifp->if_iqdrops++;
1524                         vtnet_discard_rxbuf(sc, m);
1525                         if (nbufs > 1)
1526                                 vtnet_discard_merged_rxbuf(sc, nbufs);
1527                         goto fail;
1528                 }
1529
1530                 if (m->m_len < len)
1531                         len = m->m_len;
1532
1533                 m->m_len = len;
1534                 m->m_flags &= ~M_PKTHDR;
1535
1536                 m_head->m_pkthdr.len += len;
1537                 m_tail->m_next = m;
1538                 m_tail = m;
1539         }
1540
1541         return (0);
1542
1543 fail:
1544         sc->vtnet_stats.rx_mergeable_failed++;
1545         m_freem(m_head);
1546
1547         return (1);
1548 }
1549
1550 static int
1551 vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
1552 {
1553         struct virtio_net_hdr lhdr;
1554         struct ifnet *ifp;
1555         struct virtqueue *vq;
1556         struct mbuf *m;
1557         struct ether_header *eh;
1558         struct virtio_net_hdr *hdr;
1559         struct virtio_net_hdr_mrg_rxbuf *mhdr;
1560         int len, deq, nbufs, adjsz, rx_npkts;
1561
1562         ifp = sc->vtnet_ifp;
1563         vq = sc->vtnet_rx_vq;
1564         hdr = &lhdr;
1565         deq = 0;
1566         rx_npkts = 0;
1567
1568         ASSERT_SERIALIZED(&sc->vtnet_slz);
1569
1570         while (--count >= 0) {
1571                 m = virtqueue_dequeue(vq, &len);
1572                 if (m == NULL)
1573                         break;
1574                 deq++;
1575
1576                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1577                         ifp->if_ierrors++;
1578                         vtnet_discard_rxbuf(sc, m);
1579                         continue;
1580                 }
1581
1582                 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1583                         nbufs = 1;
1584                         adjsz = sizeof(struct vtnet_rx_header);
1585                         /*
1586                          * Account for our pad between the header and
1587                          * the actual start of the frame.
1588                          */
1589                         len += VTNET_RX_HEADER_PAD;
1590                 } else {
1591                         mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1592                         nbufs = mhdr->num_buffers;
1593                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1594                 }
1595
1596                 if (vtnet_replace_rxbuf(sc, m, len) != 0) {
1597                         ifp->if_iqdrops++;
1598                         vtnet_discard_rxbuf(sc, m);
1599                         if (nbufs > 1)
1600                                 vtnet_discard_merged_rxbuf(sc, nbufs);
1601                         continue;
1602                 }
1603
1604                 m->m_pkthdr.len = len;
1605                 m->m_pkthdr.rcvif = ifp;
1606                 m->m_pkthdr.csum_flags = 0;
1607
1608                 if (nbufs > 1) {
1609                         if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
1610                                 continue;
1611                 }
1612
1613                 ifp->if_ipackets++;
1614
1615                 /*
1616                  * Save copy of header before we strip it. For both mergeable
1617                  * and non-mergeable, the VirtIO header is placed first in the
1618                  * mbuf's data. We no longer need num_buffers, so always use a
1619                  * virtio_net_hdr.
1620                  */
1621                 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1622                 m_adj(m, adjsz);
1623
1624                 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1625                         eh = mtod(m, struct ether_header *);
1626                         if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1627                                 vtnet_vlan_tag_remove(m);
1628
1629                                 /*
1630                                  * With the 802.1Q header removed, update the
1631                                  * checksum starting location accordingly.
1632                                  */
1633                                 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1634                                         hdr->csum_start -=
1635                                             ETHER_VLAN_ENCAP_LEN;
1636                         }
1637                 }
1638
1639                 if (ifp->if_capenable & IFCAP_RXCSUM &&
1640                     hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1641                         if (vtnet_rx_csum(sc, m, hdr) != 0)
1642                                 sc->vtnet_stats.rx_csum_failed++;
1643                 }
1644
1645                 lwkt_serialize_exit(&sc->vtnet_slz);
1646                 rx_npkts++;
1647                 ifp->if_input(ifp, m, NULL, -1);
1648                 lwkt_serialize_enter(&sc->vtnet_slz);
1649
1650                 /*
1651                  * The interface may have been stopped while we were
1652                  * passing the packet up the network stack.
1653                  */
1654                 if ((ifp->if_flags & IFF_RUNNING) == 0)
1655                         break;
1656         }
1657
1658         virtqueue_notify(vq, &sc->vtnet_slz);
1659
1660         if (rx_npktsp != NULL)
1661                 *rx_npktsp = rx_npkts;
1662
1663         return (count > 0 ? 0 : EAGAIN);
1664 }
1665
1666 static void
1667 vtnet_rx_intr_task(void *arg)
1668 {
1669         struct vtnet_softc *sc;
1670         struct ifnet *ifp;
1671         int more;
1672
1673         sc = arg;
1674         ifp = sc->vtnet_ifp;
1675
1676 next:
1677 //      lwkt_serialize_enter(&sc->vtnet_slz);
1678
1679         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1680                 vtnet_enable_rx_intr(sc);
1681 //              lwkt_serialize_exit(&sc->vtnet_slz);
1682                 return;
1683         }
1684
1685         more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
1686         if (!more && vtnet_enable_rx_intr(sc) != 0) {
1687                 vtnet_disable_rx_intr(sc);
1688                 more = 1;
1689         }
1690
1691 //      lwkt_serialize_exit(&sc->vtnet_slz);
1692
1693         if (more) {
1694                 sc->vtnet_stats.rx_task_rescheduled++;
1695                 goto next;
1696         }
1697 }
1698
1699 static int
1700 vtnet_rx_vq_intr(void *xsc)
1701 {
1702         struct vtnet_softc *sc;
1703
1704         sc = xsc;
1705
1706         vtnet_disable_rx_intr(sc);
1707         vtnet_rx_intr_task(sc);
1708
1709         return (1);
1710 }
1711
1712 static void
1713 vtnet_txeof(struct vtnet_softc *sc)
1714 {
1715         struct virtqueue *vq;
1716         struct ifnet *ifp;
1717         struct vtnet_tx_header *txhdr;
1718         int deq;
1719
1720         vq = sc->vtnet_tx_vq;
1721         ifp = sc->vtnet_ifp;
1722         deq = 0;
1723
1724         ASSERT_SERIALIZED(&sc->vtnet_slz);
1725
1726         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
1727                 deq++;
1728                 ifp->if_opackets++;
1729                 m_freem(txhdr->vth_mbuf);
1730         }
1731
1732         if (deq > 0) {
1733                 ifq_clr_oactive(&ifp->if_snd);
1734                 if (virtqueue_empty(vq))
1735                         sc->vtnet_watchdog_timer = 0;
1736         }
1737 }
1738
1739 static struct mbuf *
1740 vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
1741     struct virtio_net_hdr *hdr)
1742 {
1743         struct ifnet *ifp;
1744         struct ether_header *eh;
1745         struct ether_vlan_header *evh;
1746         struct ip *ip;
1747         struct ip6_hdr *ip6;
1748         struct tcphdr *tcp;
1749         int ip_offset;
1750         uint16_t eth_type, csum_start;
1751         uint8_t ip_proto, gso_type;
1752
1753         ifp = sc->vtnet_ifp;
1754         M_ASSERTPKTHDR(m);
1755
1756         ip_offset = sizeof(struct ether_header);
1757         if (m->m_len < ip_offset) {
1758                 if ((m = m_pullup(m, ip_offset)) == NULL)
1759                         return (NULL);
1760         }
1761
1762         eh = mtod(m, struct ether_header *);
1763         eth_type = ntohs(eh->ether_type);
1764         if (eth_type == ETHERTYPE_VLAN) {
1765                 ip_offset = sizeof(struct ether_vlan_header);
1766                 if (m->m_len < ip_offset) {
1767                         if ((m = m_pullup(m, ip_offset)) == NULL)
1768                                 return (NULL);
1769                 }
1770                 evh = mtod(m, struct ether_vlan_header *);
1771                 eth_type = ntohs(evh->evl_proto);
1772         }
1773
1774         switch (eth_type) {
1775         case ETHERTYPE_IP:
1776                 if (m->m_len < ip_offset + sizeof(struct ip)) {
1777                         m = m_pullup(m, ip_offset + sizeof(struct ip));
1778                         if (m == NULL)
1779                                 return (NULL);
1780                 }
1781
1782                 ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
1783                 ip_proto = ip->ip_p;
1784                 csum_start = ip_offset + (ip->ip_hl << 2);
1785                 gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1786                 break;
1787
1788         case ETHERTYPE_IPV6:
1789                 if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
1790                         m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
1791                         if (m == NULL)
1792                                 return (NULL);
1793                 }
1794
1795                 ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
1796                 /*
1797                  * XXX Assume no extension headers are present. Presently,
1798                  * this will always be true in the case of TSO, and FreeBSD
1799                  * does not perform checksum offloading of IPv6 yet.
1800                  */
1801                 ip_proto = ip6->ip6_nxt;
1802                 csum_start = ip_offset + sizeof(struct ip6_hdr);
1803                 gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1804                 break;
1805
1806         default:
1807                 return (m);
1808         }
1809
1810         if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
1811                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
1812                 hdr->csum_start = csum_start;
1813                 hdr->csum_offset = m->m_pkthdr.csum_data;
1814
1815                 sc->vtnet_stats.tx_csum_offloaded++;
1816         }
1817
1818         if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1819                 if (ip_proto != IPPROTO_TCP)
1820                         return (m);
1821
1822                 if (m->m_len < csum_start + sizeof(struct tcphdr)) {
1823                         m = m_pullup(m, csum_start + sizeof(struct tcphdr));
1824                         if (m == NULL)
1825                                 return (NULL);
1826                 }
1827
1828                 tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
1829                 hdr->gso_type = gso_type;
1830                 hdr->hdr_len = csum_start + (tcp->th_off << 2);
1831                 hdr->gso_size = m->m_pkthdr.tso_segsz;
1832
1833                 if (tcp->th_flags & TH_CWR) {
1834                         /*
1835                          * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
1836                          * ECN support is only configurable globally with the
1837                          * net.inet.tcp.ecn.enable sysctl knob.
1838                          */
1839                         if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1840                                 if_printf(ifp, "TSO with ECN not supported "
1841                                     "by host\n");
1842                                 m_freem(m);
1843                                 return (NULL);
1844                         }
1845
1846                         hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1847                 }
1848
1849                 sc->vtnet_stats.tx_tso_offloaded++;
1850         }
1851
1852         return (m);
1853 }
1854
1855 static int
1856 vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
1857     struct vtnet_tx_header *txhdr)
1858 {
1859         struct sglist sg;
1860         struct sglist_seg segs[VTNET_MAX_TX_SEGS];
1861         struct virtqueue *vq;
1862         struct mbuf *m;
1863         int collapsed, error;
1864
1865         vq = sc->vtnet_tx_vq;
1866         m = *m_head;
1867         collapsed = 0;
1868
1869         sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
1870         error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
1871         KASSERT(error == 0 && sg.sg_nseg == 1,
1872             ("cannot add header to sglist"));
1873
1874 again:
1875         error = sglist_append_mbuf(&sg, m);
1876         if (error) {
1877                 if (collapsed)
1878                         goto fail;
1879
1880                 //m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1);
1881                 m = m_defrag(m, M_NOWAIT);
1882                 if (m == NULL)
1883                         goto fail;
1884
1885                 *m_head = m;
1886                 collapsed = 1;
1887                 goto again;
1888         }
1889
1890         txhdr->vth_mbuf = m;
1891
1892         return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0));
1893
1894 fail:
1895         m_freem(*m_head);
1896         *m_head = NULL;
1897
1898         return (ENOBUFS);
1899 }
1900
1901 static struct mbuf *
1902 vtnet_vlan_tag_insert(struct mbuf *m)
1903 {
1904         struct mbuf *n;
1905         struct ether_vlan_header *evl;
1906
1907         if (M_WRITABLE(m) == 0) {
1908                 n = m_dup(m, M_NOWAIT);
1909                 m_freem(m);
1910                 if ((m = n) == NULL)
1911                         return (NULL);
1912         }
1913
1914         M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1915         if (m == NULL)
1916                 return (NULL);
1917         if (m->m_len < sizeof(struct ether_vlan_header)) {
1918                 m = m_pullup(m, sizeof(struct ether_vlan_header));
1919                 if (m == NULL)
1920                         return (NULL);
1921         }
1922
1923         /* Insert 802.1Q header into the existing Ethernet header. */
1924         evl = mtod(m, struct ether_vlan_header *);
1925         bcopy((char *) evl + ETHER_VLAN_ENCAP_LEN,
1926               (char *) evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1927         evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1928         evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1929         m->m_flags &= ~M_VLANTAG;
1930
1931         return (m);
1932 }
1933
1934 static int
1935 vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
1936 {
1937         struct vtnet_tx_header *txhdr;
1938         struct virtio_net_hdr *hdr;
1939         struct mbuf *m;
1940         int error;
1941
1942         txhdr = &sc->vtnet_txhdrarea[sc->vtnet_txhdridx];
1943         memset(txhdr, 0, sizeof(struct vtnet_tx_header));
1944
1945         /*
1946          * Always use the non-mergeable header to simplify things. When
1947          * the mergeable feature is negotiated, the num_buffers field
1948          * must be set to zero. We use vtnet_hdr_size later to enqueue
1949          * the correct header size to the host.
1950          */
1951         hdr = &txhdr->vth_uhdr.hdr;
1952         m = *m_head;
1953
1954         error = ENOBUFS;
1955
1956         if (m->m_flags & M_VLANTAG) {
1957                 //m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
1958                 m = vtnet_vlan_tag_insert(m);
1959                 if ((*m_head = m) == NULL)
1960                         goto fail;
1961                 m->m_flags &= ~M_VLANTAG;
1962         }
1963
1964         if (m->m_pkthdr.csum_flags != 0) {
1965                 m = vtnet_tx_offload(sc, m, hdr);
1966                 if ((*m_head = m) == NULL)
1967                         goto fail;
1968         }
1969
1970         error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
1971         if (error == 0)
1972                 sc->vtnet_txhdridx =
1973                     (sc->vtnet_txhdridx + 1) % ((sc->vtnet_tx_size / 2) + 1);
1974 fail:
1975         return (error);
1976 }
1977
1978 static void
1979 vtnet_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1980 {
1981         struct vtnet_softc *sc;
1982
1983         sc = ifp->if_softc;
1984
1985         ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1986         lwkt_serialize_enter(&sc->vtnet_slz);
1987         vtnet_start_locked(ifp, ifsq);
1988         lwkt_serialize_exit(&sc->vtnet_slz);
1989 }
1990
1991 static void
1992 vtnet_start_locked(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1993 {
1994         struct vtnet_softc *sc;
1995         struct virtqueue *vq;
1996         struct mbuf *m0;
1997         int enq;
1998
1999         sc = ifp->if_softc;
2000         vq = sc->vtnet_tx_vq;
2001         enq = 0;
2002
2003         ASSERT_SERIALIZED(&sc->vtnet_slz);
2004
2005         if ((ifp->if_flags & (IFF_RUNNING)) !=
2006             IFF_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
2007                 return;
2008
2009 #ifdef VTNET_TX_INTR_MODERATION
2010         if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
2011                 vtnet_txeof(sc);
2012 #endif
2013
2014         while (!ifsq_is_empty(ifsq)) {
2015                 if (virtqueue_full(vq)) {
2016                         ifq_set_oactive(&ifp->if_snd);
2017                         break;
2018                 }
2019
2020                 m0 = ifq_dequeue(&ifp->if_snd);
2021                 if (m0 == NULL)
2022                         break;
2023
2024                 if (vtnet_encap(sc, &m0) != 0) {
2025                         if (m0 == NULL)
2026                                 break;
2027                         ifq_prepend(&ifp->if_snd, m0);
2028                         ifq_set_oactive(&ifp->if_snd);
2029                         break;
2030                 }
2031
2032                 enq++;
2033                 ETHER_BPF_MTAP(ifp, m0);
2034         }
2035
2036         if (enq > 0) {
2037                 virtqueue_notify(vq, &sc->vtnet_slz);
2038                 sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
2039         }
2040 }
2041
2042 static void
2043 vtnet_tick(void *xsc)
2044 {
2045         struct vtnet_softc *sc;
2046
2047         sc = xsc;
2048
2049 #if 0
2050         ASSERT_SERIALIZED(&sc->vtnet_slz);
2051 #ifdef VTNET_DEBUG
2052         virtqueue_dump(sc->vtnet_rx_vq);
2053         virtqueue_dump(sc->vtnet_tx_vq);
2054 #endif
2055
2056         vtnet_watchdog(sc);
2057         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2058 #endif
2059 }
2060
2061 static void
2062 vtnet_tx_intr_task(void *arg)
2063 {
2064         struct vtnet_softc *sc;
2065         struct ifnet *ifp;
2066         struct ifaltq_subque *ifsq;
2067
2068         sc = arg;
2069         ifp = sc->vtnet_ifp;
2070         ifsq = ifq_get_subq_default(&ifp->if_snd);
2071
2072 next:
2073 //      lwkt_serialize_enter(&sc->vtnet_slz);
2074
2075         if ((ifp->if_flags & IFF_RUNNING) == 0) {
2076                 vtnet_enable_tx_intr(sc);
2077 //              lwkt_serialize_exit(&sc->vtnet_slz);
2078                 return;
2079         }
2080
2081         vtnet_txeof(sc);
2082
2083         if (!ifsq_is_empty(ifsq))
2084                 vtnet_start_locked(ifp, ifsq);
2085
2086         if (vtnet_enable_tx_intr(sc) != 0) {
2087                 vtnet_disable_tx_intr(sc);
2088                 sc->vtnet_stats.tx_task_rescheduled++;
2089 //              lwkt_serialize_exit(&sc->vtnet_slz);
2090                 goto next;
2091         }
2092
2093 //      lwkt_serialize_exit(&sc->vtnet_slz);
2094 }
2095
2096 static int
2097 vtnet_tx_vq_intr(void *xsc)
2098 {
2099         struct vtnet_softc *sc;
2100
2101         sc = xsc;
2102
2103         vtnet_disable_tx_intr(sc);
2104         vtnet_tx_intr_task(sc);
2105
2106         return (1);
2107 }
2108
2109 static void
2110 vtnet_stop(struct vtnet_softc *sc)
2111 {
2112         device_t dev;
2113         struct ifnet *ifp;
2114
2115         dev = sc->vtnet_dev;
2116         ifp = sc->vtnet_ifp;
2117
2118         ASSERT_SERIALIZED(&sc->vtnet_slz);
2119
2120         sc->vtnet_watchdog_timer = 0;
2121         callout_stop(&sc->vtnet_tick_ch);
2122         ifq_clr_oactive(&ifp->if_snd);
2123         ifp->if_flags &= ~(IFF_RUNNING);
2124
2125         vtnet_disable_rx_intr(sc);
2126         vtnet_disable_tx_intr(sc);
2127
2128         /*
2129          * Stop the host VirtIO adapter. Note this will reset the host
2130          * adapter's state back to the pre-initialized state, so in
2131          * order to make the device usable again, we must drive it
2132          * through virtio_reinit() and virtio_reinit_complete().
2133          */
2134         virtio_stop(dev);
2135
2136         sc->vtnet_flags &= ~VTNET_FLAG_LINK;
2137
2138         vtnet_free_rx_mbufs(sc);
2139         vtnet_free_tx_mbufs(sc);
2140 }
2141
2142 static int
2143 vtnet_reinit(struct vtnet_softc *sc)
2144 {
2145         struct ifnet *ifp;
2146         uint64_t features;
2147
2148         ifp = sc->vtnet_ifp;
2149         features = sc->vtnet_features;
2150
2151         /*
2152          * Re-negotiate with the host, removing any disabled receive
2153          * features. Transmit features are disabled only on our side
2154          * via if_capenable and if_hwassist.
2155          */
2156
2157         if (ifp->if_capabilities & IFCAP_RXCSUM) {
2158                 if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
2159                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
2160         }
2161
2162         if (ifp->if_capabilities & IFCAP_LRO) {
2163                 if ((ifp->if_capenable & IFCAP_LRO) == 0)
2164                         features &= ~VTNET_LRO_FEATURES;
2165         }
2166
2167         if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2168                 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2169                         features &= ~VIRTIO_NET_F_CTRL_VLAN;
2170         }
2171
2172         return (virtio_reinit(sc->vtnet_dev, features));
2173 }
2174
2175 static void
2176 vtnet_init_locked(struct vtnet_softc *sc)
2177 {
2178         device_t dev;
2179         struct ifnet *ifp;
2180         int error;
2181
2182         dev = sc->vtnet_dev;
2183         ifp = sc->vtnet_ifp;
2184
2185         ASSERT_SERIALIZED(&sc->vtnet_slz);
2186
2187         if (ifp->if_flags & IFF_RUNNING)
2188                 return;
2189
2190         /* Stop host's adapter, cancel any pending I/O. */
2191         vtnet_stop(sc);
2192
2193         /* Reinitialize the host device. */
2194         error = vtnet_reinit(sc);
2195         if (error) {
2196                 device_printf(dev,
2197                     "reinitialization failed, stopping device...\n");
2198                 vtnet_stop(sc);
2199                 return;
2200         }
2201
2202         /* Update host with assigned MAC address. */
2203         bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2204         vtnet_set_hwaddr(sc);
2205
2206         ifp->if_hwassist = 0;
2207         if (ifp->if_capenable & IFCAP_TXCSUM)
2208                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2209         if (ifp->if_capenable & IFCAP_TSO4)
2210                 ifp->if_hwassist |= CSUM_TSO;
2211
2212         error = vtnet_init_rx_vq(sc);
2213         if (error) {
2214                 device_printf(dev,
2215                     "cannot allocate mbufs for Rx virtqueue\n");
2216                 vtnet_stop(sc);
2217                 return;
2218         }
2219
2220         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
2221                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2222                         /* Restore promiscuous and all-multicast modes. */
2223                         vtnet_rx_filter(sc);
2224
2225                         /* Restore filtered MAC addresses. */
2226                         vtnet_rx_filter_mac(sc);
2227                 }
2228
2229                 /* Restore VLAN filters. */
2230                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2231                         vtnet_rx_filter_vlan(sc);
2232         }
2233
2234         {
2235                 vtnet_enable_rx_intr(sc);
2236                 vtnet_enable_tx_intr(sc);
2237         }
2238
2239         ifp->if_flags |= IFF_RUNNING;
2240         ifq_clr_oactive(&ifp->if_snd);
2241
2242         virtio_reinit_complete(dev);
2243
2244         vtnet_update_link_status(sc);
2245         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2246 }
2247
2248 static void
2249 vtnet_init(void *xsc)
2250 {
2251         struct vtnet_softc *sc;
2252
2253         sc = xsc;
2254
2255         lwkt_serialize_enter(&sc->vtnet_slz);
2256         vtnet_init_locked(sc);
2257         lwkt_serialize_exit(&sc->vtnet_slz);
2258 }
2259
2260 static void
2261 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
2262     struct sglist *sg, int readable, int writable)
2263 {
2264         struct virtqueue *vq;
2265         void *c;
2266
2267         vq = sc->vtnet_ctrl_vq;
2268
2269         ASSERT_SERIALIZED(&sc->vtnet_slz);
2270         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
2271             ("no control virtqueue"));
2272         KASSERT(virtqueue_empty(vq),
2273             ("control command already enqueued"));
2274
2275         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
2276                 return;
2277
2278         virtqueue_notify(vq, &sc->vtnet_slz);
2279
2280         /*
2281          * Poll until the command is complete. Previously, we would
2282          * sleep until the control virtqueue interrupt handler woke
2283          * us up, but dropping the VTNET_MTX leads to serialization
2284          * difficulties.
2285          *
2286          * Furthermore, it appears QEMU/KVM only allocates three MSIX
2287          * vectors. Two of those vectors are needed for the Rx and Tx
2288          * virtqueues. We do not support sharing both a Vq and config
2289          * changed notification on the same MSIX vector.
2290          */
2291         c = virtqueue_poll(vq, NULL);
2292         KASSERT(c == cookie, ("unexpected control command response"));
2293 }
2294
2295 static int
2296 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
2297 {
2298         struct {
2299                 struct virtio_net_ctrl_hdr hdr __aligned(2);
2300                 uint8_t pad1;
2301                 char aligned_hwaddr[ETHER_ADDR_LEN] __aligned(8);
2302                 uint8_t pad2;
2303                 uint8_t ack;
2304         } s;
2305         struct sglist_seg segs[3];
2306         struct sglist sg;
2307         int error;
2308
2309         s.hdr.class = VIRTIO_NET_CTRL_MAC;
2310         s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
2311         s.ack = VIRTIO_NET_ERR;
2312
2313         /* Copy the mac address into physically contiguous memory */
2314         memcpy(s.aligned_hwaddr, hwaddr, ETHER_ADDR_LEN);
2315
2316         sglist_init(&sg, 3, segs);
2317         error = 0;
2318         error |= sglist_append(&sg, &s.hdr,
2319             sizeof(struct virtio_net_ctrl_hdr));
2320         error |= sglist_append(&sg, s.aligned_hwaddr, ETHER_ADDR_LEN);
2321         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2322         KASSERT(error == 0 && sg.sg_nseg == 3,
2323             ("%s: error %d adding set MAC msg to sglist", __func__, error));
2324
2325         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2326
2327         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2328 }
2329
2330 static void
2331 vtnet_rx_filter(struct vtnet_softc *sc)
2332 {
2333         device_t dev;
2334         struct ifnet *ifp;
2335
2336         dev = sc->vtnet_dev;
2337         ifp = sc->vtnet_ifp;
2338
2339         ASSERT_SERIALIZED(&sc->vtnet_slz);
2340         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2341             ("CTRL_RX feature not negotiated"));
2342
2343         if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
2344                 device_printf(dev, "cannot %s promiscuous mode\n",
2345                     ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
2346
2347         if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
2348                 device_printf(dev, "cannot %s all-multicast mode\n",
2349                     ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
2350 }
2351
2352 static int
2353 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
2354 {
2355         struct virtio_net_ctrl_hdr hdr __aligned(2);
2356         struct sglist_seg segs[3];
2357         struct sglist sg;
2358         uint8_t onoff, ack;
2359         int error;
2360
2361         if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
2362                 return (ENOTSUP);
2363
2364         error = 0;
2365
2366         hdr.class = VIRTIO_NET_CTRL_RX;
2367         hdr.cmd = cmd;
2368         onoff = !!on;
2369         ack = VIRTIO_NET_ERR;
2370
2371         sglist_init(&sg, 3, segs);
2372         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2373         error |= sglist_append(&sg, &onoff, sizeof(uint8_t));
2374         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2375         KASSERT(error == 0 && sg.sg_nseg == 3,
2376             ("error adding Rx filter message to sglist"));
2377
2378         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2379
2380         return (ack == VIRTIO_NET_OK ? 0 : EIO);
2381 }
2382
2383 static int
2384 vtnet_set_promisc(struct vtnet_softc *sc, int on)
2385 {
2386
2387         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
2388 }
2389
2390 static int
2391 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
2392 {
2393
2394         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
2395 }
2396
2397 static void
2398 vtnet_rx_filter_mac(struct vtnet_softc *sc)
2399 {
2400         struct virtio_net_ctrl_hdr hdr __aligned(2);
2401         struct vtnet_mac_filter *filter;
2402         struct sglist_seg segs[4];
2403         struct sglist sg;
2404         struct ifnet *ifp;
2405         struct ifaddr *ifa;
2406         struct ifaddr_container *ifac;
2407         struct ifmultiaddr *ifma;
2408         int ucnt, mcnt, promisc, allmulti, error;
2409         uint8_t ack;
2410
2411         ifp = sc->vtnet_ifp;
2412         ucnt = 0;
2413         mcnt = 0;
2414         promisc = 0;
2415         allmulti = 0;
2416
2417         ASSERT_SERIALIZED(&sc->vtnet_slz);
2418         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2419             ("%s: CTRL_RX feature not negotiated", __func__));
2420
2421         /* Use the MAC filtering table allocated in vtnet_attach. */
2422         filter = sc->vtnet_macfilter;
2423         memset(filter, 0, sizeof(struct vtnet_mac_filter));
2424
2425         /* Unicast MAC addresses: */
2426         //if_addr_rlock(ifp);
2427         TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2428                 ifa = ifac->ifa;
2429                 if (ifa->ifa_addr->sa_family != AF_LINK)
2430                         continue;
2431                 else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2432                     sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
2433                         continue;
2434                 else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
2435                         promisc = 1;
2436                         break;
2437                 }
2438
2439                 bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2440                     &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
2441                 ucnt++;
2442         }
2443         //if_addr_runlock(ifp);
2444
2445         if (promisc != 0) {
2446                 filter->vmf_unicast.nentries = 0;
2447                 if_printf(ifp, "more than %d MAC addresses assigned, "
2448                     "falling back to promiscuous mode\n",
2449                     VTNET_MAX_MAC_ENTRIES);
2450         } else
2451                 filter->vmf_unicast.nentries = ucnt;
2452
2453         /* Multicast MAC addresses: */
2454         //if_maddr_rlock(ifp);
2455         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2456                 if (ifma->ifma_addr->sa_family != AF_LINK)
2457                         continue;
2458                 else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
2459                         allmulti = 1;
2460                         break;
2461                 }
2462
2463                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2464                     &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
2465                 mcnt++;
2466         }
2467         //if_maddr_runlock(ifp);
2468
2469         if (allmulti != 0) {
2470                 filter->vmf_multicast.nentries = 0;
2471                 if_printf(ifp, "more than %d multicast MAC addresses "
2472                     "assigned, falling back to all-multicast mode\n",
2473                     VTNET_MAX_MAC_ENTRIES);
2474         } else
2475                 filter->vmf_multicast.nentries = mcnt;
2476
2477         if (promisc != 0 && allmulti != 0)
2478                 goto out;
2479
2480         hdr.class = VIRTIO_NET_CTRL_MAC;
2481         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
2482         ack = VIRTIO_NET_ERR;
2483
2484         sglist_init(&sg, 4, segs);
2485         error = 0;
2486         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2487         error |= sglist_append(&sg, &filter->vmf_unicast,
2488             sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
2489         error |= sglist_append(&sg, &filter->vmf_multicast,
2490             sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
2491         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2492         KASSERT(error == 0 && sg.sg_nseg == 4,
2493             ("%s: error %d adding MAC filter msg to sglist", __func__, error));
2494
2495         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2496
2497         if (ack != VIRTIO_NET_OK)
2498                 if_printf(ifp, "error setting host MAC filter table\n");
2499
2500 out:
2501         if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
2502                 if_printf(ifp, "cannot enable promiscuous mode\n");
2503         if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
2504                 if_printf(ifp, "cannot enable all-multicast mode\n");
2505 }
2506
2507 static int
2508 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2509 {
2510         struct sglist_seg segs[3];
2511         struct sglist sg;
2512         struct {
2513                 struct virtio_net_ctrl_hdr hdr __aligned(2);
2514                 uint8_t pad1;
2515                 uint16_t tag;
2516                 uint8_t pad2;
2517                 uint8_t ack;
2518         } s;
2519         int error;
2520
2521         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
2522         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
2523         s.tag = tag;
2524         s.ack = VIRTIO_NET_ERR;
2525
2526         sglist_init(&sg, 3, segs);
2527         error = 0;
2528         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2529         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
2530         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2531         KASSERT(error == 0 && sg.sg_nseg == 3,
2532             ("%s: error %d adding VLAN message to sglist", __func__, error));
2533
2534         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2535
2536         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2537 }
2538
2539 static void
2540 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
2541 {
2542         device_t dev;
2543         uint32_t w, mask;
2544         uint16_t tag;
2545         int i, nvlans, error;
2546
2547         ASSERT_SERIALIZED(&sc->vtnet_slz);
2548         KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2549             ("VLAN_FILTER feature not negotiated"));
2550
2551         dev = sc->vtnet_dev;
2552         nvlans = sc->vtnet_nvlans;
2553         error = 0;
2554
2555         /* Enable filtering for each configured VLAN. */
2556         for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
2557                 w = sc->vtnet_vlan_shadow[i];
2558                 for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) {
2559                         if ((w & mask) != 0) {
2560                                 w &= ~mask;
2561                                 nvlans--;
2562                                 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0)
2563                                         error++;
2564                         }
2565                 }
2566         }
2567
2568         KASSERT(nvlans == 0, ("VLAN count incorrect"));
2569         if (error)
2570                 device_printf(dev, "cannot restore VLAN filter table\n");
2571 }
2572
2573 static void
2574 vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2575 {
2576         struct ifnet *ifp;
2577         int idx, bit;
2578
2579         KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2580             ("VLAN_FILTER feature not negotiated"));
2581
2582         if ((tag == 0) || (tag > 4095))
2583                 return;
2584
2585         ifp = sc->vtnet_ifp;
2586         idx = (tag >> 5) & 0x7F;
2587         bit = tag & 0x1F;
2588
2589         lwkt_serialize_enter(&sc->vtnet_slz);
2590
2591         /* Update shadow VLAN table. */
2592         if (add) {
2593                 sc->vtnet_nvlans++;
2594                 sc->vtnet_vlan_shadow[idx] |= (1 << bit);
2595         } else {
2596                 sc->vtnet_nvlans--;
2597                 sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
2598         }
2599
2600         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
2601                 if (vtnet_exec_vlan_filter(sc, add, tag) != 0) {
2602                         device_printf(sc->vtnet_dev,
2603                             "cannot %s VLAN %d %s the host filter table\n",
2604                             add ? "add" : "remove", tag,
2605                             add ? "to" : "from");
2606                 }
2607         }
2608
2609         lwkt_serialize_exit(&sc->vtnet_slz);
2610 }
2611
2612 static void
2613 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2614 {
2615
2616         if (ifp->if_softc != arg)
2617                 return;
2618
2619         vtnet_set_vlan_filter(arg, 1, tag);
2620 }
2621
2622 static void
2623 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2624 {
2625
2626         if (ifp->if_softc != arg)
2627                 return;
2628
2629         vtnet_set_vlan_filter(arg, 0, tag);
2630 }
2631
2632 static int
2633 vtnet_ifmedia_upd(struct ifnet *ifp)
2634 {
2635         struct vtnet_softc *sc;
2636         struct ifmedia *ifm;
2637
2638         sc = ifp->if_softc;
2639         ifm = &sc->vtnet_media;
2640
2641         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2642                 return (EINVAL);
2643
2644         return (0);
2645 }
2646
2647 static void
2648 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2649 {
2650         struct vtnet_softc *sc;
2651
2652         sc = ifp->if_softc;
2653
2654         ifmr->ifm_status = IFM_AVALID;
2655         ifmr->ifm_active = IFM_ETHER;
2656
2657         lwkt_serialize_enter(&sc->vtnet_slz);
2658         if (vtnet_is_link_up(sc) != 0) {
2659                 ifmr->ifm_status |= IFM_ACTIVE;
2660                 ifmr->ifm_active |= VTNET_MEDIATYPE;
2661         } else
2662                 ifmr->ifm_active |= IFM_NONE;
2663         lwkt_serialize_exit(&sc->vtnet_slz);
2664 }
2665
2666 static void
2667 vtnet_add_statistics(struct vtnet_softc *sc)
2668 {
2669         device_t dev;
2670         struct vtnet_statistics *stats;
2671         struct sysctl_ctx_list *ctx;
2672         struct sysctl_oid *tree;
2673         struct sysctl_oid_list *child;
2674
2675         dev = sc->vtnet_dev;
2676         stats = &sc->vtnet_stats;
2677         ctx = device_get_sysctl_ctx(dev);
2678         tree = device_get_sysctl_tree(dev);
2679         child = SYSCTL_CHILDREN(tree);
2680
2681         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed",
2682             CTLFLAG_RD, &stats->mbuf_alloc_failed,
2683             "Mbuf cluster allocation failures");
2684         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large",
2685             CTLFLAG_RD, &stats->rx_frame_too_large,
2686             "Received frame larger than the mbuf chain");
2687         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
2688             CTLFLAG_RD, &stats->rx_enq_replacement_failed,
2689             "Enqueuing the replacement receive mbuf failed");
2690         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed",
2691             CTLFLAG_RD, &stats->rx_mergeable_failed,
2692             "Mergeable buffers receive failures");
2693         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
2694             CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
2695             "Received checksum offloaded buffer with unsupported "
2696             "Ethernet type");
2697         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start",
2698             CTLFLAG_RD, &stats->rx_csum_bad_start,
2699             "Received checksum offloaded buffer with incorrect start offset");
2700         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
2701             CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
2702             "Received checksum offloaded buffer with incorrect IP protocol");
2703         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset",
2704             CTLFLAG_RD, &stats->rx_csum_bad_offset,
2705             "Received checksum offloaded buffer with incorrect offset");
2706         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed",
2707             CTLFLAG_RD, &stats->rx_csum_failed,
2708             "Received buffer checksum offload failed");
2709         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded",
2710             CTLFLAG_RD, &stats->rx_csum_offloaded,
2711             "Received buffer checksum offload succeeded");
2712         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled",
2713             CTLFLAG_RD, &stats->rx_task_rescheduled,
2714             "Times the receive interrupt task rescheduled itself");
2715
2716         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded",
2717             CTLFLAG_RD, &stats->tx_csum_offloaded,
2718             "Offloaded checksum of transmitted buffer");
2719         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded",
2720             CTLFLAG_RD, &stats->tx_tso_offloaded,
2721             "Segmentation offload of transmitted buffer");
2722         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
2723             CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
2724             "Aborted transmit of checksum offloaded buffer with unknown "
2725             "Ethernet type");
2726         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
2727             CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
2728             "Aborted transmit of TSO buffer with unknown Ethernet type");
2729         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled",
2730             CTLFLAG_RD, &stats->tx_task_rescheduled,
2731             "Times the transmit interrupt task rescheduled itself");
2732 }
2733
2734 static int
2735 vtnet_enable_rx_intr(struct vtnet_softc *sc)
2736 {
2737
2738         return (virtqueue_enable_intr(sc->vtnet_rx_vq));
2739 }
2740
2741 static void
2742 vtnet_disable_rx_intr(struct vtnet_softc *sc)
2743 {
2744
2745         virtqueue_disable_intr(sc->vtnet_rx_vq);
2746 }
2747
2748 static int
2749 vtnet_enable_tx_intr(struct vtnet_softc *sc)
2750 {
2751
2752 #ifdef VTNET_TX_INTR_MODERATION
2753         return (0);
2754 #else
2755         return (virtqueue_enable_intr(sc->vtnet_tx_vq));
2756 #endif
2757 }
2758
2759 static void
2760 vtnet_disable_tx_intr(struct vtnet_softc *sc)
2761 {
2762
2763         virtqueue_disable_intr(sc->vtnet_tx_vq);
2764 }