2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
48 #include <net/ethernet.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
54 #include <netinet/in_var.h>
57 #include <net/tap/if_tap.h>
65 #define VKE_DEVNAME "vke"
67 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
69 #define NETFIFOSIZE 256
70 #define NETFIFOMASK (NETFIFOSIZE -1)
71 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
73 #define VKE_COTD_RUN 0
74 #define VKE_COTD_EXIT 1
75 #define VKE_COTD_DEAD 2
78 struct mbuf *array[NETFIFOSIZE];
82 typedef struct vke_fifo *fifo_t;
99 fifo_t sc_txfifo_done;
102 struct sysctl_ctx_list sc_sysctl_ctx;
103 struct sysctl_oid *sc_sysctl_tree;
105 int sc_tap_unit; /* unit of backend tap(4) */
106 in_addr_t sc_addr; /* address */
107 in_addr_t sc_mask; /* netmask */
110 static void vke_start(struct ifnet *, struct ifaltq_subque *);
111 static void vke_init(void *);
112 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static int vke_attach(const struct vknetif_info *, int);
115 static int vke_stop(struct vke_softc *);
116 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
117 static void vke_tx_intr(cothread_t cotd);
118 static void vke_tx_thread(cothread_t cotd);
119 static void vke_rx_intr(cothread_t cotd);
120 static void vke_rx_thread(cothread_t cotd);
122 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
123 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
125 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
126 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
128 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
129 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
132 vke_sysinit(void *arg __unused)
136 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
139 for (i = 0; i < NetifNum; ++i) {
140 if (vke_attach(&NetifInfo[i], unit) == 0)
144 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
147 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
148 * the cothread cannot free transmit mbufs after processing we put them on
149 * the done fifo so the kernel can free them.
152 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
154 fifo_t fifo = sc->sc_txfifo_done;
156 while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
160 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
167 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
170 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
172 fifo_t fifo = sc->sc_txfifo_done;
175 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
178 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
179 fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
186 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
189 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
191 fifo_t fifo = sc->sc_txfifo;
193 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
196 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
204 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
208 vke_txfifo_dequeue(struct vke_softc *sc)
210 fifo_t fifo = sc->sc_txfifo;
213 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
216 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
217 fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
225 vke_txfifo_empty(struct vke_softc *sc)
227 fifo_t fifo = sc->sc_txfifo;
229 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
235 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
236 * exists replacing it with newm which should point to a newly allocated
240 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
242 fifo_t fifo = sc->sc_rxfifo;
245 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
248 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
249 fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
256 * Return the next mbuf if available but do NOT remove it from the FIFO.
259 vke_rxfifo_sniff(struct vke_softc *sc)
261 fifo_t fifo = sc->sc_rxfifo;
264 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
267 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
275 struct vke_softc *sc = xsc;
276 struct ifnet *ifp = &sc->arpcom.ac_if;
279 ASSERT_SERIALIZED(ifp->if_serializer);
283 ifp->if_flags |= IFF_RUNNING;
284 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
286 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
287 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
289 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
290 for (i = 0; i < NETFIFOSIZE; i++) {
291 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
292 sc->sc_txfifo->array[i] = NULL;
293 sc->sc_txfifo_done->array[i] = NULL;
296 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
297 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
298 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
300 if (sc->sc_addr != 0) {
301 in_addr_t addr, mask;
307 * Make sure vkernel assigned
308 * address will not be added
314 vke_init_addr(ifp, addr, mask);
320 * Called from kernel.
322 * NOTE: We can't make any kernel callbacks while holding cothread lock
323 * because the cothread lock is not governed by the kernel scheduler
324 * (so mplock, tokens, etc will not be released).
327 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
329 struct vke_softc *sc = ifp->if_softc;
331 cothread_t cotd = sc->cotd_tx;
334 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
335 ASSERT_SERIALIZED(ifp->if_serializer);
337 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
341 while ((m = ifsq_dequeue(ifsq, NULL)) != NULL) {
342 if (vke_txfifo_enqueue(sc, m) != -1) {
343 if (count++ == VKE_CHUNK) {
344 cothread_lock(cotd, 0);
345 cothread_signal(cotd);
346 cothread_unlock(cotd, 0);
354 cothread_lock(cotd, 0);
355 cothread_signal(cotd);
356 cothread_unlock(cotd, 0);
361 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
363 struct vke_softc *sc = ifp->if_softc;
366 ASSERT_SERIALIZED(ifp->if_serializer);
370 if (ifp->if_flags & IFF_UP) {
371 if ((ifp->if_flags & IFF_RUNNING) == 0)
374 if (ifp->if_flags & IFF_RUNNING)
383 case SIOCGIFSTATUS: {
384 struct ifstat *ifs = (struct ifstat *)data;
387 len = strlen(ifs->ascii);
388 if (len < sizeof(ifs->ascii)) {
389 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
390 "\tBacked by tap%d\n", sc->sc_tap_unit);
395 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
397 * If we are explicitly requested to change address,
398 * we should invalidate address/netmask passed in
399 * from vkernel command line.
406 error = ether_ioctl(ifp, cmd, data);
413 vke_stop(struct vke_softc *sc)
415 struct ifnet *ifp = &sc->arpcom.ac_if;
418 ASSERT_SERIALIZED(ifp->if_serializer);
420 ifp->if_flags &= ~IFF_RUNNING;
421 ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
425 cothread_lock(sc->cotd_tx, 0);
426 if (sc->cotd_tx_exit == VKE_COTD_RUN)
427 sc->cotd_tx_exit = VKE_COTD_EXIT;
428 cothread_signal(sc->cotd_tx);
429 cothread_unlock(sc->cotd_tx, 0);
430 cothread_delete(&sc->cotd_tx);
433 cothread_lock(sc->cotd_rx, 0);
434 if (sc->cotd_rx_exit == VKE_COTD_RUN)
435 sc->cotd_rx_exit = VKE_COTD_EXIT;
436 cothread_signal(sc->cotd_rx);
437 cothread_unlock(sc->cotd_rx, 0);
438 cothread_delete(&sc->cotd_rx);
441 for (i = 0; i < NETFIFOSIZE; i++) {
442 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
443 m_freem(sc->sc_rxfifo->array[i]);
444 sc->sc_rxfifo->array[i] = NULL;
446 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
447 m_freem(sc->sc_txfifo->array[i]);
448 sc->sc_txfifo->array[i] = NULL;
450 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
451 m_freem(sc->sc_txfifo_done->array[i]);
452 sc->sc_txfifo_done->array[i] = NULL;
457 kfree(sc->sc_txfifo, M_DEVBUF);
458 sc->sc_txfifo = NULL;
461 if (sc->sc_txfifo_done) {
462 kfree(sc->sc_txfifo_done, M_DEVBUF);
463 sc->sc_txfifo_done = NULL;
467 kfree(sc->sc_rxfifo, M_DEVBUF);
468 sc->sc_rxfifo = NULL;
477 * vke_rx_intr() is the interrupt function for the receive cothread.
480 vke_rx_intr(cothread_t cotd)
484 struct vke_softc *sc = cotd->arg;
485 struct ifnet *ifp = &sc->arpcom.ac_if;
486 static int count = 0;
488 ifnet_serialize_all(ifp);
489 cothread_lock(cotd, 0);
491 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
492 cothread_unlock(cotd, 0);
493 ifnet_deserialize_all(ifp);
496 cothread_unlock(cotd, 0);
498 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
499 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
501 vke_rxfifo_dequeue(sc, nm);
502 ifp->if_input(ifp, m);
503 if (count++ == VKE_CHUNK) {
504 cothread_lock(cotd, 0);
505 cothread_signal(cotd);
506 cothread_unlock(cotd, 0);
510 vke_rxfifo_dequeue(sc, m);
515 cothread_lock(cotd, 0);
516 cothread_signal(cotd);
517 cothread_unlock(cotd, 0);
519 ifnet_deserialize_all(ifp);
523 * vke_tx_intr() is the interrupt function for the transmit cothread.
524 * Calls vke_start() to handle processing transmit mbufs.
527 vke_tx_intr(cothread_t cotd)
529 struct vke_softc *sc = cotd->arg;
530 struct ifnet *ifp = &sc->arpcom.ac_if;
533 ifnet_serialize_all(ifp);
534 cothread_lock(cotd, 0);
535 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
536 cothread_unlock(cotd, 0);
537 ifnet_deserialize_all(ifp);
540 cothread_unlock(cotd, 0);
543 * Free TX mbufs that have been processed before starting new
544 * ones going to be pipeline friendly.
546 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
550 if ((ifp->if_flags & IFF_RUNNING) == 0)
553 ifnet_deserialize_all(ifp);
557 * vke_rx_thread() is the body of the receive cothread.
560 vke_rx_thread(cothread_t cotd)
563 struct vke_softc *sc = cotd->arg;
564 struct ifnet *ifp = &sc->arpcom.ac_if;
565 fifo_t fifo = sc->sc_rxfifo;
571 /* Select timeout cannot be infinite since we need to check for
572 * the exit flag sc->cotd_rx_exit.
580 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
582 * Wait for the RX FIFO to be loaded with
585 if (NETFIFOINDEX(fifo->windex + 1) ==
586 NETFIFOINDEX(fifo->rindex)) {
592 * Load data into the rx fifo
594 m = fifo->array[NETFIFOINDEX(fifo->windex)];
597 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
599 IFNET_STAT_INC(ifp, ipackets, 1);
600 m->m_pkthdr.rcvif = ifp;
601 m->m_pkthdr.len = m->m_len = n;
604 if (count++ == VKE_CHUNK) {
613 FD_SET(sc->sc_fd, &fdset);
615 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
616 kprintf(VKE_DEVNAME "%d: select failed for "
617 "TAP device\n", sc->sc_unit);
623 sc->cotd_rx_exit = VKE_COTD_DEAD;
627 * vke_tx_thread() is the body of the transmit cothread.
630 vke_tx_thread(cothread_t cotd)
633 struct vke_softc *sc = cotd->arg;
634 struct ifnet *ifp = &sc->arpcom.ac_if;
637 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
639 * Write outgoing packets to the TAP interface
641 m = vke_txfifo_dequeue(sc);
643 if (m->m_pkthdr.len <= MCLBYTES) {
644 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
645 sc->sc_txbuf_len = m->m_pkthdr.len;
647 if (write(sc->sc_fd, sc->sc_txbuf,
648 sc->sc_txbuf_len) < 0) {
649 IFNET_STAT_INC(ifp, oerrors, 1);
651 IFNET_STAT_INC(ifp, opackets, 1);
654 if (count++ == VKE_CHUNK) {
658 vke_txfifo_done_enqueue(sc, m);
664 cothread_lock(cotd, 1);
665 if (vke_txfifo_empty(sc))
667 cothread_unlock(cotd, 1);
671 sc->cotd_tx_exit = VKE_COTD_DEAD;
675 vke_attach(const struct vknetif_info *info, int unit)
677 struct vke_softc *sc;
679 struct tapinfo tapinfo;
680 uint8_t enaddr[ETHER_ADDR_LEN];
683 KKASSERT(info->tap_fd >= 0);
687 * This is only a TAP device if tap_unit is non-zero. If
688 * connecting to a virtual socket we generate a unique MAC.
690 if (info->tap_unit >= 0) {
691 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
692 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
693 "failed: %s\n", unit, strerror(errno));
697 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
698 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
699 "failed: %s\n", unit, strerror(errno));
703 int fd = open("/dev/urandom", O_RDONLY);
705 read(fd, enaddr + 2, 4);
708 enaddr[4] = (int)getpid() >> 8;
709 enaddr[5] = (int)getpid() & 255;
714 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
716 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
719 sc->sc_tap_unit = info->tap_unit;
720 sc->sc_addr = info->netif_addr;
721 sc->sc_mask = info->netif_mask;
723 ifp = &sc->arpcom.ac_if;
724 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
726 /* NB: after if_initname() */
727 sysctl_ctx_init(&sc->sc_sysctl_ctx);
728 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
729 SYSCTL_STATIC_CHILDREN(_hw),
730 OID_AUTO, ifp->if_xname,
732 if (sc->sc_sysctl_tree == NULL) {
733 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
735 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
736 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
737 OID_AUTO, "tap_unit",
738 CTLFLAG_RD, &sc->sc_tap_unit, 0,
739 "Backend tap(4) unit");
743 ifp->if_ioctl = vke_ioctl;
744 ifp->if_start = vke_start;
745 ifp->if_init = vke_init;
746 ifp->if_mtu = tapinfo.mtu;
747 ifp->if_baudrate = tapinfo.baudrate;
748 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
749 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
750 ifq_set_ready(&ifp->if_snd);
754 ether_ifattach(ifp, enaddr, NULL);
756 if (bootverbose && sc->sc_addr != 0) {
757 if_printf(ifp, "pre-configured "
758 "address 0x%08x, netmask 0x%08x\n",
759 ntohl(sc->sc_addr), ntohl(sc->sc_mask));
766 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
768 struct ifaliasreq ifra;
769 struct sockaddr_in *sin;
772 ASSERT_SERIALIZED(ifp->if_serializer);
775 if_printf(ifp, "add pre-configured "
776 "address 0x%08x, netmask 0x%08x\n",
777 ntohl(addr), ntohl(mask));
780 bzero(&ifra, sizeof(ifra));
782 /* NB: no need to set ifaliasreq.ifra_name */
784 sin = (struct sockaddr_in *)&ifra.ifra_addr;
785 sin->sin_family = AF_INET;
786 sin->sin_len = sizeof(*sin);
787 sin->sin_addr.s_addr = addr;
790 sin = (struct sockaddr_in *)&ifra.ifra_mask;
791 sin->sin_len = sizeof(*sin);
792 sin->sin_addr.s_addr = mask;
796 * Temporarily release serializer, in_control() will hold
797 * it again before calling ifnet.if_ioctl().
799 ifnet_deserialize_all(ifp);
800 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
801 ifnet_serialize_all(ifp);