2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.10 2008/05/27 23:44:46 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/serialize.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
47 #include <machine/md_var.h>
48 #include <machine/cothread.h>
50 #include <net/ethernet.h>
53 #include <net/if_arp.h>
54 #include <net/ifq_var.h>
56 #include <netinet/in_var.h>
59 #include <net/tap/if_tap.h>
67 #define VKE_DEVNAME "vke"
69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
71 #define NETFIFOSIZE 256
72 #define NETFIFOMASK (NETFIFOSIZE -1)
73 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
75 #define VKE_COTD_RUN 0
76 #define VKE_COTD_EXIT 1
77 #define VKE_COTD_DEAD 2
80 struct mbuf *array[NETFIFOSIZE];
84 typedef struct vke_fifo *fifo_t;
101 fifo_t sc_txfifo_done;
104 struct sysctl_ctx_list sc_sysctl_ctx;
105 struct sysctl_oid *sc_sysctl_tree;
107 int sc_tap_unit; /* unit of backend tap(4) */
108 in_addr_t sc_addr; /* address */
109 in_addr_t sc_mask; /* netmask */
112 static void vke_start(struct ifnet *);
113 static void vke_init(void *);
114 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
116 static int vke_attach(const struct vknetif_info *, int);
117 static int vke_stop(struct vke_softc *);
118 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
119 static void vke_tx_intr(cothread_t cotd);
120 static void vke_tx_thread(cothread_t cotd);
121 static void vke_rx_intr(cothread_t cotd);
122 static void vke_rx_thread(cothread_t cotd);
124 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
125 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
127 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
128 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
130 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
131 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
134 vke_sysinit(void *arg __unused)
138 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d\n", NetifNum));
141 for (i = 0; i < NetifNum; ++i) {
142 if (vke_attach(&NetifInfo[i], unit) == 0)
146 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
149 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
150 * the cothread cannot free transmit mbufs after processing we put them on
151 * the done fifo so the kernel can free them.
154 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
156 fifo_t fifo = sc->sc_txfifo_done;
158 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
161 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
168 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
171 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
173 fifo_t fifo = sc->sc_txfifo_done;
176 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
179 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
180 fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
187 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. Wake up the
188 * cothread via cothread_signal().
191 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
193 fifo_t fifo = sc->sc_txfifo;
194 cothread_t cotd = sc->cotd_tx;
196 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
199 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
201 cothread_signal(cotd);
208 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
212 vke_txfifo_dequeue(struct vke_softc *sc)
214 fifo_t fifo = sc->sc_txfifo;
217 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
220 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
221 fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
229 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
230 * exists replacing it with newm which should point to a newly allocated
234 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
236 fifo_t fifo = sc->sc_rxfifo;
239 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
242 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
243 fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
250 * Return the next mbuf if available but do NOT remove it from the FIFO.
253 vke_rxfifo_sniff(struct vke_softc *sc)
255 fifo_t fifo = sc->sc_rxfifo;
258 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
261 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
269 struct vke_softc *sc = xsc;
270 struct ifnet *ifp = &sc->arpcom.ac_if;
273 ASSERT_SERIALIZED(ifp->if_serializer);
278 ifp->if_flags |= IFF_RUNNING;
279 ifp->if_flags &= ~IFF_OACTIVE;
281 if (sc->sc_addr != 0) {
282 in_addr_t addr, mask;
288 * Make sure vkernel assigned
289 * address will not be added
295 vke_init_addr(ifp, addr, mask);
298 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
299 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
301 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
302 for (i = 0; i < NETFIFOSIZE; i++) {
303 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
304 sc->sc_txfifo->array[i] = NULL;
305 sc->sc_txfifo_done->array[i] = NULL;
308 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
309 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
310 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
314 vke_start(struct ifnet *ifp)
316 struct vke_softc *sc = ifp->if_softc;
318 cothread_t cotd = sc->cotd_tx;
321 ASSERT_SERIALIZED(ifp->if_serializer);
323 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
326 cothread_lock(cotd, 0);
329 while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
330 if (vke_txfifo_enqueue(sc, m) != -1) {
331 if (count++ == VKE_CHUNK) {
332 cothread_signal(cotd);
341 cothread_signal(cotd);
344 cothread_unlock(cotd, 0);
348 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
350 struct vke_softc *sc = ifp->if_softc;
353 ASSERT_SERIALIZED(ifp->if_serializer);
357 if (ifp->if_flags & IFF_UP) {
358 if ((ifp->if_flags & IFF_RUNNING) == 0)
361 if (ifp->if_flags & IFF_RUNNING)
370 case SIOCGIFSTATUS: {
371 struct ifstat *ifs = (struct ifstat *)data;
374 len = strlen(ifs->ascii);
375 if (len < sizeof(ifs->ascii)) {
376 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
377 "\tBacked by tap%d\n", sc->sc_tap_unit);
382 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
384 * If we are explicitly requested to change address,
385 * we should invalidate address/netmask passed in
386 * from vkernel command line.
393 error = ether_ioctl(ifp, cmd, data);
400 vke_stop(struct vke_softc *sc)
402 struct ifnet *ifp = &sc->arpcom.ac_if;
405 ASSERT_SERIALIZED(ifp->if_serializer);
407 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
411 cothread_lock(sc->cotd_tx, 0);
412 if (sc->cotd_tx_exit == VKE_COTD_RUN)
413 sc->cotd_tx_exit = VKE_COTD_EXIT;
414 cothread_signal(sc->cotd_tx);
415 cothread_unlock(sc->cotd_tx, 0);
416 cothread_delete(&sc->cotd_tx);
419 cothread_lock(sc->cotd_rx, 0);
420 if (sc->cotd_rx_exit == VKE_COTD_RUN)
421 sc->cotd_rx_exit = VKE_COTD_EXIT;
422 cothread_signal(sc->cotd_rx);
423 cothread_unlock(sc->cotd_rx, 0);
424 cothread_delete(&sc->cotd_rx);
427 for (i = 0; i < NETFIFOSIZE; i++) {
428 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
429 m_freem(sc->sc_rxfifo->array[i]);
430 sc->sc_rxfifo->array[i] = NULL;
432 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
433 m_freem(sc->sc_txfifo->array[i]);
434 sc->sc_txfifo->array[i] = NULL;
436 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
437 m_freem(sc->sc_txfifo_done->array[i]);
438 sc->sc_txfifo_done->array[i] = NULL;
443 kfree(sc->sc_txfifo, M_DEVBUF);
444 sc->sc_txfifo = NULL;
447 if (sc->sc_txfifo_done) {
448 kfree(sc->sc_txfifo_done, M_DEVBUF);
449 sc->sc_txfifo_done = NULL;
453 kfree(sc->sc_rxfifo, M_DEVBUF);
454 sc->sc_rxfifo = NULL;
463 * vke_rx_intr() is the interrupt function for the receive cothread.
466 vke_rx_intr(cothread_t cotd)
470 struct vke_softc *sc = cotd->arg;
471 struct ifnet *ifp = &sc->arpcom.ac_if;
472 static int count = 0;
474 ifnet_serialize_all(ifp);
475 cothread_lock(cotd, 0);
477 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
478 cothread_unlock(cotd, 0);
479 ifnet_deserialize_all(ifp);
483 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
484 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
486 vke_rxfifo_dequeue(sc, nm);
487 ifp->if_input(ifp, m);
488 if (count++ == VKE_CHUNK) {
489 cothread_signal(cotd);
493 vke_rxfifo_dequeue(sc, m);
498 cothread_signal(cotd);
500 cothread_unlock(cotd, 0);
501 ifnet_deserialize_all(ifp);
505 * vke_tx_intr() is the interrupt function for the transmit cothread.
506 * Calls vke_start() to handle processing transmit mbufs.
509 vke_tx_intr(cothread_t cotd)
511 struct vke_softc *sc = cotd->arg;
512 struct ifnet *ifp = &sc->arpcom.ac_if;
515 ifnet_serialize_all(ifp);
516 cothread_lock(cotd, 0);
518 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
519 cothread_unlock(cotd, 0);
520 ifnet_deserialize_all(ifp);
524 if ((ifp->if_flags & IFF_RUNNING) == 0)
527 /* Free TX mbufs that have been processed */
528 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
532 cothread_unlock(cotd, 0);
534 ifnet_deserialize_all(ifp);
538 * vke_rx_thread() is the body of the receive cothread.
541 vke_rx_thread(cothread_t cotd)
544 struct vke_softc *sc = cotd->arg;
545 struct ifnet *ifp = &sc->arpcom.ac_if;
547 fifo_t fifo = sc->sc_rxfifo;
551 /* Select timeout cannot be infinite since we need to check for
552 * the exit flag sc->cotd_rx_exit.
559 cothread_lock(cotd, 1);
564 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
565 /* Wait for the RX FIFO to drain */
566 while (NETFIFOINDEX(fifo->windex + 1) ==
567 NETFIFOINDEX(fifo->rindex)) {
571 if ((m = fifo->array[NETFIFOINDEX(fifo->windex)]) !=
573 cothread_unlock(cotd, 1);
574 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
575 cothread_lock(cotd, 1);
579 m->m_pkthdr.rcvif = ifp;
580 m->m_pkthdr.len = m->m_len = n;
583 if (count++ == VKE_CHUNK) {
594 if (sc->cotd_rx_exit != VKE_COTD_RUN)
597 cothread_unlock(cotd, 1);
599 /* Set up data for select() call */
600 FD_SET(sc->sc_fd, &fdset);
602 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1)
603 kprintf(VKE_DEVNAME "%d: select failed for TAP device\n", sc->sc_unit);
605 cothread_lock(cotd, 1);
608 sc->cotd_rx_exit = VKE_COTD_DEAD;
609 cothread_unlock(cotd, 1);
613 * vke_tx_thread() is the body of the transmit cothread.
616 vke_tx_thread(cothread_t cotd)
619 struct vke_softc *sc = cotd->arg;
620 struct ifnet *ifp = &sc->arpcom.ac_if;
623 cothread_lock(cotd, 1);
625 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
626 /* Write outgoing packets to the TAP interface */
627 while ((m = vke_txfifo_dequeue(sc)) != NULL) {
628 if (m->m_pkthdr.len <= MCLBYTES) {
629 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
630 sc->sc_txbuf_len = m->m_pkthdr.len;
631 cothread_unlock(cotd, 1);
633 if (write(sc->sc_fd, sc->sc_txbuf, sc->sc_txbuf_len) < 0) {
634 cothread_lock(cotd, 1);
637 cothread_lock(cotd, 1);
638 vke_txfifo_done_enqueue(sc, m);
640 if (count++ == VKE_CHUNK) {
652 cothread_wait(cotd); /* interlocks cothread lock */
655 sc->cotd_tx_exit = VKE_COTD_DEAD;
656 cothread_unlock(cotd, 1);
660 vke_attach(const struct vknetif_info *info, int unit)
662 struct vke_softc *sc;
664 struct tapinfo tapinfo;
665 uint8_t enaddr[ETHER_ADDR_LEN];
668 KKASSERT(info->tap_fd >= 0);
672 * This is only a TAP device if tap_unit is non-zero. If
673 * connecting to a virtual socket we generate a unique MAC.
675 if (info->tap_unit >= 0) {
676 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
677 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
678 "failed: %s\n", unit, strerror(errno));
682 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
683 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
684 "failed: %s\n", unit, strerror(errno));
688 int fd = open("/dev/urandom", O_RDONLY);
690 read(fd, enaddr + 2, 4);
693 enaddr[4] = (int)getpid() >> 8;
694 enaddr[5] = (int)getpid() & 255;
699 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
701 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
704 sc->sc_tap_unit = info->tap_unit;
705 sc->sc_addr = info->netif_addr;
706 sc->sc_mask = info->netif_mask;
708 ifp = &sc->arpcom.ac_if;
709 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
711 /* NB: after if_initname() */
712 sysctl_ctx_init(&sc->sc_sysctl_ctx);
713 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
714 SYSCTL_STATIC_CHILDREN(_hw),
715 OID_AUTO, ifp->if_xname,
717 if (sc->sc_sysctl_tree == NULL) {
718 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
720 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
721 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
722 OID_AUTO, "tap_unit",
723 CTLFLAG_RD, &sc->sc_tap_unit, 0,
724 "Backend tap(4) unit");
728 ifp->if_ioctl = vke_ioctl;
729 ifp->if_start = vke_start;
730 ifp->if_init = vke_init;
731 ifp->if_mtu = tapinfo.mtu;
732 ifp->if_baudrate = tapinfo.baudrate;
733 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
734 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
735 ifq_set_ready(&ifp->if_snd);
739 ether_ifattach(ifp, enaddr, NULL);
741 if (bootverbose && sc->sc_addr != 0) {
742 if_printf(ifp, "pre-configured "
743 "address 0x%08x, netmask 0x%08x\n",
744 ntohl(sc->sc_addr), ntohl(sc->sc_mask));
751 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
753 struct ifaliasreq ifra;
754 struct sockaddr_in *sin;
757 ASSERT_SERIALIZED(ifp->if_serializer);
760 if_printf(ifp, "add pre-configured "
761 "address 0x%08x, netmask 0x%08x\n",
762 ntohl(addr), ntohl(mask));
765 bzero(&ifra, sizeof(ifra));
767 /* NB: no need to set ifaliasreq.ifra_name */
769 sin = (struct sockaddr_in *)&ifra.ifra_addr;
770 sin->sin_family = AF_INET;
771 sin->sin_len = sizeof(*sin);
772 sin->sin_addr.s_addr = addr;
775 sin = (struct sockaddr_in *)&ifra.ifra_mask;
776 sin->sin_len = sizeof(*sin);
777 sin->sin_addr.s_addr = mask;
781 * Temporarily release serializer, in_control() will hold
782 * it again before calling ifnet.if_ioctl().
784 ifnet_deserialize_all(ifp);
785 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
786 ifnet_serialize_all(ifp);