2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
48 #include <net/ethernet.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
54 #include <netinet/in_var.h>
57 #include <net/tap/if_tap.h>
65 #define VKE_DEVNAME "vke"
67 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
69 #define NETFIFOSIZE 256
70 #define NETFIFOMASK (NETFIFOSIZE -1)
71 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
73 #define VKE_COTD_RUN 0
74 #define VKE_COTD_EXIT 1
75 #define VKE_COTD_DEAD 2
78 struct mbuf *array[NETFIFOSIZE];
82 typedef struct vke_fifo *fifo_t;
99 fifo_t sc_txfifo_done;
102 struct sysctl_ctx_list sc_sysctl_ctx;
103 struct sysctl_oid *sc_sysctl_tree;
105 int sc_tap_unit; /* unit of backend tap(4) */
106 in_addr_t sc_addr; /* address */
107 in_addr_t sc_mask; /* netmask */
110 static void vke_start(struct ifnet *);
111 static void vke_init(void *);
112 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static int vke_attach(const struct vknetif_info *, int);
115 static int vke_stop(struct vke_softc *);
116 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
117 static void vke_tx_intr(cothread_t cotd);
118 static void vke_tx_thread(cothread_t cotd);
119 static void vke_rx_intr(cothread_t cotd);
120 static void vke_rx_thread(cothread_t cotd);
122 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
123 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
125 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
126 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
128 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
129 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
132 vke_sysinit(void *arg __unused)
136 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
139 for (i = 0; i < NetifNum; ++i) {
140 if (vke_attach(&NetifInfo[i], unit) == 0)
144 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
147 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
148 * the cothread cannot free transmit mbufs after processing we put them on
149 * the done fifo so the kernel can free them.
152 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
154 fifo_t fifo = sc->sc_txfifo_done;
156 while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
160 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
167 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
170 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
172 fifo_t fifo = sc->sc_txfifo_done;
175 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
178 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
179 fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
186 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
189 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
191 fifo_t fifo = sc->sc_txfifo;
193 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
196 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
204 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
208 vke_txfifo_dequeue(struct vke_softc *sc)
210 fifo_t fifo = sc->sc_txfifo;
213 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
216 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
217 fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
225 vke_txfifo_empty(struct vke_softc *sc)
227 fifo_t fifo = sc->sc_txfifo;
229 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
235 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
236 * exists replacing it with newm which should point to a newly allocated
240 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
242 fifo_t fifo = sc->sc_rxfifo;
245 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
248 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
249 fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
256 * Return the next mbuf if available but do NOT remove it from the FIFO.
259 vke_rxfifo_sniff(struct vke_softc *sc)
261 fifo_t fifo = sc->sc_rxfifo;
264 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
267 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
275 struct vke_softc *sc = xsc;
276 struct ifnet *ifp = &sc->arpcom.ac_if;
279 ASSERT_SERIALIZED(ifp->if_serializer);
283 ifp->if_flags |= IFF_RUNNING;
284 ifp->if_flags &= ~IFF_OACTIVE;
286 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
287 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
289 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
290 for (i = 0; i < NETFIFOSIZE; i++) {
291 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
292 sc->sc_txfifo->array[i] = NULL;
293 sc->sc_txfifo_done->array[i] = NULL;
296 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
297 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
298 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
300 if (sc->sc_addr != 0) {
301 in_addr_t addr, mask;
307 * Make sure vkernel assigned
308 * address will not be added
314 vke_init_addr(ifp, addr, mask);
320 * Called from kernel.
322 * NOTE: We can't make any kernel callbacks while holding cothread lock
323 * because the cothread lock is not governed by the kernel scheduler
324 * (so mplock, tokens, etc will not be released).
327 vke_start(struct ifnet *ifp)
329 struct vke_softc *sc = ifp->if_softc;
331 cothread_t cotd = sc->cotd_tx;
334 ASSERT_SERIALIZED(ifp->if_serializer);
336 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
340 while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
341 if (vke_txfifo_enqueue(sc, m) != -1) {
342 if (count++ == VKE_CHUNK) {
343 cothread_lock(cotd, 0);
344 cothread_signal(cotd);
345 cothread_unlock(cotd, 0);
353 cothread_lock(cotd, 0);
354 cothread_signal(cotd);
355 cothread_unlock(cotd, 0);
360 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
362 struct vke_softc *sc = ifp->if_softc;
365 ASSERT_SERIALIZED(ifp->if_serializer);
369 if (ifp->if_flags & IFF_UP) {
370 if ((ifp->if_flags & IFF_RUNNING) == 0)
373 if (ifp->if_flags & IFF_RUNNING)
382 case SIOCGIFSTATUS: {
383 struct ifstat *ifs = (struct ifstat *)data;
386 len = strlen(ifs->ascii);
387 if (len < sizeof(ifs->ascii)) {
388 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
389 "\tBacked by tap%d\n", sc->sc_tap_unit);
394 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
396 * If we are explicitly requested to change address,
397 * we should invalidate address/netmask passed in
398 * from vkernel command line.
405 error = ether_ioctl(ifp, cmd, data);
412 vke_stop(struct vke_softc *sc)
414 struct ifnet *ifp = &sc->arpcom.ac_if;
417 ASSERT_SERIALIZED(ifp->if_serializer);
419 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
423 cothread_lock(sc->cotd_tx, 0);
424 if (sc->cotd_tx_exit == VKE_COTD_RUN)
425 sc->cotd_tx_exit = VKE_COTD_EXIT;
426 cothread_signal(sc->cotd_tx);
427 cothread_unlock(sc->cotd_tx, 0);
428 cothread_delete(&sc->cotd_tx);
431 cothread_lock(sc->cotd_rx, 0);
432 if (sc->cotd_rx_exit == VKE_COTD_RUN)
433 sc->cotd_rx_exit = VKE_COTD_EXIT;
434 cothread_signal(sc->cotd_rx);
435 cothread_unlock(sc->cotd_rx, 0);
436 cothread_delete(&sc->cotd_rx);
439 for (i = 0; i < NETFIFOSIZE; i++) {
440 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
441 m_freem(sc->sc_rxfifo->array[i]);
442 sc->sc_rxfifo->array[i] = NULL;
444 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
445 m_freem(sc->sc_txfifo->array[i]);
446 sc->sc_txfifo->array[i] = NULL;
448 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
449 m_freem(sc->sc_txfifo_done->array[i]);
450 sc->sc_txfifo_done->array[i] = NULL;
455 kfree(sc->sc_txfifo, M_DEVBUF);
456 sc->sc_txfifo = NULL;
459 if (sc->sc_txfifo_done) {
460 kfree(sc->sc_txfifo_done, M_DEVBUF);
461 sc->sc_txfifo_done = NULL;
465 kfree(sc->sc_rxfifo, M_DEVBUF);
466 sc->sc_rxfifo = NULL;
475 * vke_rx_intr() is the interrupt function for the receive cothread.
478 vke_rx_intr(cothread_t cotd)
482 struct vke_softc *sc = cotd->arg;
483 struct ifnet *ifp = &sc->arpcom.ac_if;
484 static int count = 0;
486 ifnet_serialize_all(ifp);
487 cothread_lock(cotd, 0);
489 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
490 cothread_unlock(cotd, 0);
491 ifnet_deserialize_all(ifp);
494 cothread_unlock(cotd, 0);
496 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
497 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
499 vke_rxfifo_dequeue(sc, nm);
500 ifp->if_input(ifp, m);
501 if (count++ == VKE_CHUNK) {
502 cothread_lock(cotd, 0);
503 cothread_signal(cotd);
504 cothread_unlock(cotd, 0);
508 vke_rxfifo_dequeue(sc, m);
513 cothread_lock(cotd, 0);
514 cothread_signal(cotd);
515 cothread_unlock(cotd, 0);
517 ifnet_deserialize_all(ifp);
521 * vke_tx_intr() is the interrupt function for the transmit cothread.
522 * Calls vke_start() to handle processing transmit mbufs.
525 vke_tx_intr(cothread_t cotd)
527 struct vke_softc *sc = cotd->arg;
528 struct ifnet *ifp = &sc->arpcom.ac_if;
531 ifnet_serialize_all(ifp);
532 cothread_lock(cotd, 0);
533 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
534 cothread_unlock(cotd, 0);
535 ifnet_deserialize_all(ifp);
538 cothread_unlock(cotd, 0);
541 * Free TX mbufs that have been processed before starting new
542 * ones going to be pipeline friendly.
544 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
548 if ((ifp->if_flags & IFF_RUNNING) == 0)
551 ifnet_deserialize_all(ifp);
555 * vke_rx_thread() is the body of the receive cothread.
558 vke_rx_thread(cothread_t cotd)
561 struct vke_softc *sc = cotd->arg;
562 struct ifnet *ifp = &sc->arpcom.ac_if;
563 fifo_t fifo = sc->sc_rxfifo;
569 /* Select timeout cannot be infinite since we need to check for
570 * the exit flag sc->cotd_rx_exit.
578 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
580 * Wait for the RX FIFO to be loaded with
583 if (NETFIFOINDEX(fifo->windex + 1) ==
584 NETFIFOINDEX(fifo->rindex)) {
590 * Load data into the rx fifo
592 m = fifo->array[NETFIFOINDEX(fifo->windex)];
595 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
598 m->m_pkthdr.rcvif = ifp;
599 m->m_pkthdr.len = m->m_len = n;
602 if (count++ == VKE_CHUNK) {
611 FD_SET(sc->sc_fd, &fdset);
613 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
614 kprintf(VKE_DEVNAME "%d: select failed for "
615 "TAP device\n", sc->sc_unit);
621 sc->cotd_rx_exit = VKE_COTD_DEAD;
625 * vke_tx_thread() is the body of the transmit cothread.
628 vke_tx_thread(cothread_t cotd)
631 struct vke_softc *sc = cotd->arg;
632 struct ifnet *ifp = &sc->arpcom.ac_if;
635 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
637 * Write outgoing packets to the TAP interface
639 m = vke_txfifo_dequeue(sc);
641 if (m->m_pkthdr.len <= MCLBYTES) {
642 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
643 sc->sc_txbuf_len = m->m_pkthdr.len;
645 if (write(sc->sc_fd, sc->sc_txbuf,
646 sc->sc_txbuf_len) < 0) {
652 if (count++ == VKE_CHUNK) {
656 vke_txfifo_done_enqueue(sc, m);
662 cothread_lock(cotd, 1);
663 if (vke_txfifo_empty(sc))
665 cothread_unlock(cotd, 1);
669 sc->cotd_tx_exit = VKE_COTD_DEAD;
673 vke_attach(const struct vknetif_info *info, int unit)
675 struct vke_softc *sc;
677 struct tapinfo tapinfo;
678 uint8_t enaddr[ETHER_ADDR_LEN];
681 KKASSERT(info->tap_fd >= 0);
685 * This is only a TAP device if tap_unit is non-zero. If
686 * connecting to a virtual socket we generate a unique MAC.
688 if (info->tap_unit >= 0) {
689 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
690 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
691 "failed: %s\n", unit, strerror(errno));
695 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
696 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
697 "failed: %s\n", unit, strerror(errno));
701 int fd = open("/dev/urandom", O_RDONLY);
703 read(fd, enaddr + 2, 4);
706 enaddr[4] = (int)getpid() >> 8;
707 enaddr[5] = (int)getpid() & 255;
712 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
714 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
717 sc->sc_tap_unit = info->tap_unit;
718 sc->sc_addr = info->netif_addr;
719 sc->sc_mask = info->netif_mask;
721 ifp = &sc->arpcom.ac_if;
722 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
724 /* NB: after if_initname() */
725 sysctl_ctx_init(&sc->sc_sysctl_ctx);
726 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
727 SYSCTL_STATIC_CHILDREN(_hw),
728 OID_AUTO, ifp->if_xname,
730 if (sc->sc_sysctl_tree == NULL) {
731 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
733 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
734 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
735 OID_AUTO, "tap_unit",
736 CTLFLAG_RD, &sc->sc_tap_unit, 0,
737 "Backend tap(4) unit");
741 ifp->if_ioctl = vke_ioctl;
742 ifp->if_start = vke_start;
743 ifp->if_init = vke_init;
744 ifp->if_mtu = tapinfo.mtu;
745 ifp->if_baudrate = tapinfo.baudrate;
746 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
747 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
748 ifq_set_ready(&ifp->if_snd);
752 ether_ifattach(ifp, enaddr, NULL);
754 if (bootverbose && sc->sc_addr != 0) {
755 if_printf(ifp, "pre-configured "
756 "address 0x%08x, netmask 0x%08x\n",
757 ntohl(sc->sc_addr), ntohl(sc->sc_mask));
764 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
766 struct ifaliasreq ifra;
767 struct sockaddr_in *sin;
770 ASSERT_SERIALIZED(ifp->if_serializer);
773 if_printf(ifp, "add pre-configured "
774 "address 0x%08x, netmask 0x%08x\n",
775 ntohl(addr), ntohl(mask));
778 bzero(&ifra, sizeof(ifra));
780 /* NB: no need to set ifaliasreq.ifra_name */
782 sin = (struct sockaddr_in *)&ifra.ifra_addr;
783 sin->sin_family = AF_INET;
784 sin->sin_len = sizeof(*sin);
785 sin->sin_addr.s_addr = addr;
788 sin = (struct sockaddr_in *)&ifra.ifra_mask;
789 sin->sin_len = sizeof(*sin);
790 sin->sin_addr.s_addr = mask;
794 * Temporarily release serializer, in_control() will hold
795 * it again before calling ifnet.if_ioctl().
797 ifnet_deserialize_all(ifp);
798 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
799 ifnet_serialize_all(ifp);