2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.10 2008/05/27 23:44:46 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/serialize.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
47 #include <machine/md_var.h>
48 #include <machine/cothread.h>
50 #include <net/ethernet.h>
53 #include <net/if_arp.h>
54 #include <net/ifq_var.h>
56 #include <netinet/in_var.h>
59 #include <net/tap/if_tap.h>
67 #define VKE_DEVNAME "vke"
69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
71 #define NETFIFOSIZE 256
72 #define NETFIFOMASK (NETFIFOSIZE -1)
73 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
75 #define VKE_COTD_RUN 0
76 #define VKE_COTD_EXIT 1
77 #define VKE_COTD_DEAD 2
80 struct mbuf *array[NETFIFOSIZE];
84 typedef struct vke_fifo *fifo_t;
101 fifo_t sc_txfifo_done;
104 struct sysctl_ctx_list sc_sysctl_ctx;
105 struct sysctl_oid *sc_sysctl_tree;
107 int sc_tap_unit; /* unit of backend tap(4) */
108 in_addr_t sc_addr; /* address */
109 in_addr_t sc_mask; /* netmask */
112 static void vke_start(struct ifnet *);
113 static void vke_init(void *);
114 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
116 static int vke_attach(const struct vknetif_info *, int);
117 static int vke_stop(struct vke_softc *);
118 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
119 static void vke_tx_intr(cothread_t cotd);
120 static void vke_tx_thread(cothread_t cotd);
121 static void vke_rx_intr(cothread_t cotd);
122 static void vke_rx_thread(cothread_t cotd);
124 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
125 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
127 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
128 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
130 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
131 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
134 vke_sysinit(void *arg __unused)
138 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d\n", NetifNum));
141 for (i = 0; i < NetifNum; ++i) {
142 if (vke_attach(&NetifInfo[i], unit) == 0)
146 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
149 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
150 * the cothread cannot free transmit mbufs after processing we put them on
151 * the done fifo so the kernel can free them.
154 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
156 fifo_t fifo = sc->sc_txfifo_done;
158 while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
162 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
169 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
172 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
174 fifo_t fifo = sc->sc_txfifo_done;
177 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
180 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
181 fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
188 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
191 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
193 fifo_t fifo = sc->sc_txfifo;
195 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
198 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
206 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
210 vke_txfifo_dequeue(struct vke_softc *sc)
212 fifo_t fifo = sc->sc_txfifo;
215 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
218 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
219 fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
227 vke_txfifo_empty(struct vke_softc *sc)
229 fifo_t fifo = sc->sc_txfifo;
231 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
237 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
238 * exists replacing it with newm which should point to a newly allocated
242 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
244 fifo_t fifo = sc->sc_rxfifo;
247 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
250 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
251 fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
258 * Return the next mbuf if available but do NOT remove it from the FIFO.
261 vke_rxfifo_sniff(struct vke_softc *sc)
263 fifo_t fifo = sc->sc_rxfifo;
266 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
269 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
277 struct vke_softc *sc = xsc;
278 struct ifnet *ifp = &sc->arpcom.ac_if;
281 ASSERT_SERIALIZED(ifp->if_serializer);
285 ifp->if_flags |= IFF_RUNNING;
286 ifp->if_flags &= ~IFF_OACTIVE;
288 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
289 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
291 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
292 for (i = 0; i < NETFIFOSIZE; i++) {
293 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
294 sc->sc_txfifo->array[i] = NULL;
295 sc->sc_txfifo_done->array[i] = NULL;
298 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
299 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
300 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
302 if (sc->sc_addr != 0) {
303 in_addr_t addr, mask;
309 * Make sure vkernel assigned
310 * address will not be added
316 vke_init_addr(ifp, addr, mask);
322 * Called from kernel.
324 * NOTE: We can't make any kernel callbacks while holding cothread lock
325 * because the cothread lock is not governed by the kernel scheduler
326 * (so mplock, tokens, etc will not bbe released).
329 vke_start(struct ifnet *ifp)
331 struct vke_softc *sc = ifp->if_softc;
333 cothread_t cotd = sc->cotd_tx;
336 ASSERT_SERIALIZED(ifp->if_serializer);
338 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
342 while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
343 if (vke_txfifo_enqueue(sc, m) != -1) {
344 if (count++ == VKE_CHUNK) {
345 cothread_lock(cotd, 0);
346 cothread_signal(cotd);
347 cothread_unlock(cotd, 0);
355 cothread_lock(cotd, 0);
356 cothread_signal(cotd);
357 cothread_unlock(cotd, 0);
362 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
364 struct vke_softc *sc = ifp->if_softc;
367 ASSERT_SERIALIZED(ifp->if_serializer);
371 if (ifp->if_flags & IFF_UP) {
372 if ((ifp->if_flags & IFF_RUNNING) == 0)
375 if (ifp->if_flags & IFF_RUNNING)
384 case SIOCGIFSTATUS: {
385 struct ifstat *ifs = (struct ifstat *)data;
388 len = strlen(ifs->ascii);
389 if (len < sizeof(ifs->ascii)) {
390 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
391 "\tBacked by tap%d\n", sc->sc_tap_unit);
396 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
398 * If we are explicitly requested to change address,
399 * we should invalidate address/netmask passed in
400 * from vkernel command line.
407 error = ether_ioctl(ifp, cmd, data);
414 vke_stop(struct vke_softc *sc)
416 struct ifnet *ifp = &sc->arpcom.ac_if;
419 ASSERT_SERIALIZED(ifp->if_serializer);
421 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
425 cothread_lock(sc->cotd_tx, 0);
426 if (sc->cotd_tx_exit == VKE_COTD_RUN)
427 sc->cotd_tx_exit = VKE_COTD_EXIT;
428 cothread_signal(sc->cotd_tx);
429 cothread_unlock(sc->cotd_tx, 0);
430 cothread_delete(&sc->cotd_tx);
433 cothread_lock(sc->cotd_rx, 0);
434 if (sc->cotd_rx_exit == VKE_COTD_RUN)
435 sc->cotd_rx_exit = VKE_COTD_EXIT;
436 cothread_signal(sc->cotd_rx);
437 cothread_unlock(sc->cotd_rx, 0);
438 cothread_delete(&sc->cotd_rx);
441 for (i = 0; i < NETFIFOSIZE; i++) {
442 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
443 m_freem(sc->sc_rxfifo->array[i]);
444 sc->sc_rxfifo->array[i] = NULL;
446 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
447 m_freem(sc->sc_txfifo->array[i]);
448 sc->sc_txfifo->array[i] = NULL;
450 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
451 m_freem(sc->sc_txfifo_done->array[i]);
452 sc->sc_txfifo_done->array[i] = NULL;
457 kfree(sc->sc_txfifo, M_DEVBUF);
458 sc->sc_txfifo = NULL;
461 if (sc->sc_txfifo_done) {
462 kfree(sc->sc_txfifo_done, M_DEVBUF);
463 sc->sc_txfifo_done = NULL;
467 kfree(sc->sc_rxfifo, M_DEVBUF);
468 sc->sc_rxfifo = NULL;
477 * vke_rx_intr() is the interrupt function for the receive cothread.
480 vke_rx_intr(cothread_t cotd)
484 struct vke_softc *sc = cotd->arg;
485 struct ifnet *ifp = &sc->arpcom.ac_if;
486 static int count = 0;
488 ifnet_serialize_all(ifp);
489 cothread_lock(cotd, 0);
491 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
492 cothread_unlock(cotd, 0);
493 ifnet_deserialize_all(ifp);
496 cothread_unlock(cotd, 0);
498 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
499 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
501 vke_rxfifo_dequeue(sc, nm);
502 ifp->if_input(ifp, m);
503 if (count++ == VKE_CHUNK) {
504 cothread_lock(cotd, 0);
505 cothread_signal(cotd);
506 cothread_unlock(cotd, 0);
510 vke_rxfifo_dequeue(sc, m);
515 cothread_lock(cotd, 0);
516 cothread_signal(cotd);
517 cothread_unlock(cotd, 0);
519 ifnet_deserialize_all(ifp);
523 * vke_tx_intr() is the interrupt function for the transmit cothread.
524 * Calls vke_start() to handle processing transmit mbufs.
527 vke_tx_intr(cothread_t cotd)
529 struct vke_softc *sc = cotd->arg;
530 struct ifnet *ifp = &sc->arpcom.ac_if;
533 ifnet_serialize_all(ifp);
534 cothread_lock(cotd, 0);
535 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
536 cothread_unlock(cotd, 0);
537 ifnet_deserialize_all(ifp);
540 cothread_unlock(cotd, 0);
543 * Free TX mbufs that have been processed before starting new
544 * ones going to be pipeline friendly.
546 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
550 if ((ifp->if_flags & IFF_RUNNING) == 0)
553 ifnet_deserialize_all(ifp);
557 * vke_rx_thread() is the body of the receive cothread.
560 vke_rx_thread(cothread_t cotd)
563 struct vke_softc *sc = cotd->arg;
564 struct ifnet *ifp = &sc->arpcom.ac_if;
565 fifo_t fifo = sc->sc_rxfifo;
571 /* Select timeout cannot be infinite since we need to check for
572 * the exit flag sc->cotd_rx_exit.
580 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
582 * Wait for the RX FIFO to be loaded with
585 if (NETFIFOINDEX(fifo->windex + 1) ==
586 NETFIFOINDEX(fifo->rindex)) {
592 * Load data into the rx fifo
594 m = fifo->array[NETFIFOINDEX(fifo->windex)];
597 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
600 m->m_pkthdr.rcvif = ifp;
601 m->m_pkthdr.len = m->m_len = n;
604 if (count++ == VKE_CHUNK) {
613 FD_SET(sc->sc_fd, &fdset);
615 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
616 kprintf(VKE_DEVNAME "%d: select failed for "
617 "TAP device\n", sc->sc_unit);
623 sc->cotd_rx_exit = VKE_COTD_DEAD;
627 * vke_tx_thread() is the body of the transmit cothread.
630 vke_tx_thread(cothread_t cotd)
633 struct vke_softc *sc = cotd->arg;
634 struct ifnet *ifp = &sc->arpcom.ac_if;
637 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
639 * Write outgoing packets to the TAP interface
641 m = vke_txfifo_dequeue(sc);
643 if (m->m_pkthdr.len <= MCLBYTES) {
644 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
645 sc->sc_txbuf_len = m->m_pkthdr.len;
647 if (write(sc->sc_fd, sc->sc_txbuf,
648 sc->sc_txbuf_len) < 0) {
654 if (count++ == VKE_CHUNK) {
658 vke_txfifo_done_enqueue(sc, m);
664 cothread_lock(cotd, 1);
665 if (vke_txfifo_empty(sc))
667 cothread_unlock(cotd, 1);
671 sc->cotd_tx_exit = VKE_COTD_DEAD;
675 vke_attach(const struct vknetif_info *info, int unit)
677 struct vke_softc *sc;
679 struct tapinfo tapinfo;
680 uint8_t enaddr[ETHER_ADDR_LEN];
683 KKASSERT(info->tap_fd >= 0);
687 * This is only a TAP device if tap_unit is non-zero. If
688 * connecting to a virtual socket we generate a unique MAC.
690 if (info->tap_unit >= 0) {
691 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
692 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
693 "failed: %s\n", unit, strerror(errno));
697 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
698 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
699 "failed: %s\n", unit, strerror(errno));
703 int fd = open("/dev/urandom", O_RDONLY);
705 read(fd, enaddr + 2, 4);
708 enaddr[4] = (int)getpid() >> 8;
709 enaddr[5] = (int)getpid() & 255;
714 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
716 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
719 sc->sc_tap_unit = info->tap_unit;
720 sc->sc_addr = info->netif_addr;
721 sc->sc_mask = info->netif_mask;
723 ifp = &sc->arpcom.ac_if;
724 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
726 /* NB: after if_initname() */
727 sysctl_ctx_init(&sc->sc_sysctl_ctx);
728 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
729 SYSCTL_STATIC_CHILDREN(_hw),
730 OID_AUTO, ifp->if_xname,
732 if (sc->sc_sysctl_tree == NULL) {
733 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
735 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
736 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
737 OID_AUTO, "tap_unit",
738 CTLFLAG_RD, &sc->sc_tap_unit, 0,
739 "Backend tap(4) unit");
743 ifp->if_ioctl = vke_ioctl;
744 ifp->if_start = vke_start;
745 ifp->if_init = vke_init;
746 ifp->if_mtu = tapinfo.mtu;
747 ifp->if_baudrate = tapinfo.baudrate;
748 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
749 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
750 ifq_set_ready(&ifp->if_snd);
754 ether_ifattach(ifp, enaddr, NULL);
756 if (bootverbose && sc->sc_addr != 0) {
757 if_printf(ifp, "pre-configured "
758 "address 0x%08x, netmask 0x%08x\n",
759 ntohl(sc->sc_addr), ntohl(sc->sc_mask));
766 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
768 struct ifaliasreq ifra;
769 struct sockaddr_in *sin;
772 ASSERT_SERIALIZED(ifp->if_serializer);
775 if_printf(ifp, "add pre-configured "
776 "address 0x%08x, netmask 0x%08x\n",
777 ntohl(addr), ntohl(mask));
780 bzero(&ifra, sizeof(ifra));
782 /* NB: no need to set ifaliasreq.ifra_name */
784 sin = (struct sockaddr_in *)&ifra.ifra_addr;
785 sin->sin_family = AF_INET;
786 sin->sin_len = sizeof(*sin);
787 sin->sin_addr.s_addr = addr;
790 sin = (struct sockaddr_in *)&ifra.ifra_mask;
791 sin->sin_len = sizeof(*sin);
792 sin->sin_addr.s_addr = mask;
796 * Temporarily release serializer, in_control() will hold
797 * it again before calling ifnet.if_ioctl().
799 ifnet_deserialize_all(ifp);
800 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
801 ifnet_serialize_all(ifp);