2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.10 2008/05/27 23:44:46 dillon Exp $
37 #include <sys/param.h>
38 #include <sys/endian.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
42 #include <sys/serialize.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
47 #include <machine/md_var.h>
48 #include <machine/cothread.h>
50 #include <net/ethernet.h>
53 #include <net/if_arp.h>
54 #include <net/ifq_var.h>
56 #include <netinet/in_var.h>
59 #include <net/tap/if_tap.h>
67 #define VKE_DEVNAME "vke"
69 #define VKE_CHUNK 8 /* number of mbufs to queue before interrupting */
71 #define NETFIFOSIZE 256
72 #define NETFIFOMASK (NETFIFOSIZE -1)
73 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
75 #define VKE_COTD_RUN 0
76 #define VKE_COTD_EXIT 1
77 #define VKE_COTD_DEAD 2
80 struct mbuf *array[NETFIFOSIZE];
84 typedef struct vke_fifo *fifo_t;
101 fifo_t sc_txfifo_done;
104 struct sysctl_ctx_list sc_sysctl_ctx;
105 struct sysctl_oid *sc_sysctl_tree;
107 int sc_tap_unit; /* unit of backend tap(4) */
108 in_addr_t sc_addr; /* address */
109 in_addr_t sc_mask; /* netmask */
112 static void vke_start(struct ifnet *);
113 static void vke_init(void *);
114 static int vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
116 static int vke_attach(const struct vknetif_info *, int);
117 static int vke_stop(struct vke_softc *);
118 static int vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
119 static void vke_tx_intr(cothread_t cotd);
120 static void vke_tx_thread(cothread_t cotd);
121 static void vke_rx_intr(cothread_t cotd);
122 static void vke_rx_thread(cothread_t cotd);
124 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
125 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
127 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
128 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
130 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
131 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
134 vke_sysinit(void *arg __unused)
138 KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d\n", NetifNum));
141 for (i = 0; i < NetifNum; ++i) {
142 if (vke_attach(&NetifInfo[i], unit) == 0)
146 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
149 * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo. Since
150 * the cothread cannot free transmit mbufs after processing we put them on
151 * the done fifo so the kernel can free them.
154 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
156 fifo_t fifo = sc->sc_txfifo_done;
158 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
161 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
168 * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
171 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
173 fifo_t fifo = sc->sc_txfifo_done;
176 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
179 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
180 fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
187 * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo. Wake up the
188 * cothread via cothread_signal().
191 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
193 fifo_t fifo = sc->sc_txfifo;
194 cothread_t cotd = sc->cotd_tx;
196 if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
199 fifo->array[NETFIFOINDEX(fifo->windex)] = m;
201 cothread_signal(cotd);
208 * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
212 vke_txfifo_dequeue(struct vke_softc *sc)
214 fifo_t fifo = sc->sc_txfifo;
217 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
220 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
221 fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
229 * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
230 * exists replacing it with newm which should point to a newly allocated
234 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
236 fifo_t fifo = sc->sc_rxfifo;
239 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
242 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
243 fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
250 * Return the next mbuf if available but do NOT remove it from the FIFO.
253 vke_rxfifo_sniff(struct vke_softc *sc)
255 fifo_t fifo = sc->sc_rxfifo;
258 if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
261 m = fifo->array[NETFIFOINDEX(fifo->rindex)];
269 struct vke_softc *sc = xsc;
270 struct ifnet *ifp = &sc->arpcom.ac_if;
273 ASSERT_SERIALIZED(ifp->if_serializer);
278 ifp->if_flags |= IFF_RUNNING;
279 ifp->if_flags &= ~IFF_OACTIVE;
281 if (sc->sc_addr != 0) {
282 in_addr_t addr, mask;
288 * Make sure vkernel assigned
289 * address will not be added
295 vke_init_addr(ifp, addr, mask);
298 sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
299 sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
301 sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
302 for (i = 0; i < NETFIFOSIZE; i++) {
303 sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
304 sc->sc_txfifo->array[i] = NULL;
305 sc->sc_txfifo_done->array[i] = NULL;
308 sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
309 sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
310 sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
314 vke_start(struct ifnet *ifp)
316 struct vke_softc *sc = ifp->if_softc;
318 cothread_t cotd = sc->cotd_tx;
321 ASSERT_SERIALIZED(ifp->if_serializer);
323 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
329 while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
330 if (vke_txfifo_enqueue(sc, m) != -1) {
331 if (count++ == VKE_CHUNK) {
332 cothread_signal(cotd);
341 cothread_signal(cotd);
344 cothread_unlock(cotd);
348 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
350 struct vke_softc *sc = ifp->if_softc;
353 ASSERT_SERIALIZED(ifp->if_serializer);
357 if (ifp->if_flags & IFF_UP) {
358 if ((ifp->if_flags & IFF_RUNNING) == 0)
361 if (ifp->if_flags & IFF_RUNNING)
370 case SIOCGIFSTATUS: {
371 struct ifstat *ifs = (struct ifstat *)data;
374 len = strlen(ifs->ascii);
375 if (len < sizeof(ifs->ascii)) {
376 ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
377 "\tBacked by tap%d\n", sc->sc_tap_unit);
382 if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
384 * If we are explicitly requested to change address,
385 * we should invalidate address/netmask passed in
386 * from vkernel command line.
393 error = ether_ioctl(ifp, cmd, data);
400 vke_stop(struct vke_softc *sc)
402 struct ifnet *ifp = &sc->arpcom.ac_if;
405 ASSERT_SERIALIZED(ifp->if_serializer);
407 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
411 cothread_lock(sc->cotd_tx);
412 if (sc->cotd_tx_exit == VKE_COTD_RUN)
413 sc->cotd_tx_exit = VKE_COTD_EXIT;
414 cothread_signal(sc->cotd_tx);
415 cothread_unlock(sc->cotd_tx);
416 while (sc->cotd_tx_exit != VKE_COTD_DEAD) {
421 cothread_lock(sc->cotd_rx);
422 if (sc->cotd_rx_exit == VKE_COTD_RUN)
423 sc->cotd_rx_exit = VKE_COTD_EXIT;
424 cothread_signal(sc->cotd_rx);
425 cothread_unlock(sc->cotd_rx);
426 while (sc->cotd_rx_exit != VKE_COTD_DEAD) {
431 for (i = 0; i < NETFIFOSIZE; i++) {
432 if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
433 m_freem(sc->sc_rxfifo->array[i]);
434 sc->sc_rxfifo->array[i] = NULL;
436 if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
437 m_freem(sc->sc_txfifo->array[i]);
438 sc->sc_txfifo->array[i] = NULL;
440 if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
441 m_freem(sc->sc_txfifo_done->array[i]);
442 sc->sc_txfifo_done->array[i] = NULL;
447 kfree(sc->sc_txfifo, M_DEVBUF);
448 sc->sc_txfifo = NULL;
451 if (sc->sc_txfifo_done) {
452 kfree(sc->sc_txfifo_done, M_DEVBUF);
453 sc->sc_txfifo_done = NULL;
457 kfree(sc->sc_rxfifo, M_DEVBUF);
458 sc->sc_rxfifo = NULL;
467 * vke_rx_intr() is the interrupt function for the receive cothread.
470 vke_rx_intr(cothread_t cotd)
474 struct vke_softc *sc = cotd->arg;
475 struct ifnet *ifp = &sc->arpcom.ac_if;
476 static int count = 0;
478 ifnet_serialize_all(ifp);
481 if (sc->cotd_rx_exit != VKE_COTD_RUN) {
482 cothread_unlock(cotd);
483 ifnet_deserialize_all(ifp);
487 while ((m = vke_rxfifo_sniff(sc)) != NULL) {
488 nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
490 vke_rxfifo_dequeue(sc, nm);
491 ifp->if_input(ifp, m);
492 if (count++ == VKE_CHUNK) {
493 cothread_signal(cotd);
497 vke_rxfifo_dequeue(sc, m);
502 cothread_signal(cotd);
504 cothread_unlock(cotd);
505 ifnet_deserialize_all(ifp);
509 * vke_tx_intr() is the interrupt function for the transmit cothread.
510 * Calls vke_start() to handle processing transmit mbufs.
513 vke_tx_intr(cothread_t cotd)
515 struct vke_softc *sc = cotd->arg;
516 struct ifnet *ifp = &sc->arpcom.ac_if;
519 ifnet_serialize_all(ifp);
522 if (sc->cotd_tx_exit != VKE_COTD_RUN) {
523 cothread_unlock(cotd);
524 ifnet_deserialize_all(ifp);
528 if ((ifp->if_flags & IFF_RUNNING) == 0)
531 /* Free TX mbufs that have been processed */
532 while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
536 cothread_unlock(cotd);
538 ifnet_deserialize_all(ifp);
542 * vke_rx_thread() is the body of the receive cothread.
545 vke_rx_thread(cothread_t cotd)
548 struct vke_softc *sc = cotd->arg;
549 struct ifnet *ifp = &sc->arpcom.ac_if;
551 fifo_t fifo = sc->sc_rxfifo;
555 /* Select timeout cannot be infinite since we need to check for
556 * the exit flag sc->cotd_rx_exit.
568 while (sc->cotd_rx_exit == VKE_COTD_RUN) {
569 /* Wait for the RX FIFO to drain */
570 while (NETFIFOINDEX(fifo->windex + 1) ==
571 NETFIFOINDEX(fifo->rindex)) {
575 if ((m = fifo->array[NETFIFOINDEX(fifo->windex)]) !=
577 cothread_unlock(cotd);
578 n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
584 m->m_pkthdr.rcvif = ifp;
585 m->m_pkthdr.len = m->m_len = n;
588 if (count++ == VKE_CHUNK) {
599 if (sc->cotd_rx_exit != VKE_COTD_RUN)
602 cothread_unlock(cotd);
604 /* Set up data for select() call */
605 FD_SET(sc->sc_fd, &fdset);
607 if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1)
608 kprintf(VKE_DEVNAME "%d: select failed for TAP device\n", sc->sc_unit);
613 sc->cotd_rx_exit = VKE_COTD_DEAD;
614 cothread_unlock(cotd);
618 * vke_tx_thread() is the body of the transmit cothread.
621 vke_tx_thread(cothread_t cotd)
624 struct vke_softc *sc = cotd->arg;
625 struct ifnet *ifp = &sc->arpcom.ac_if;
630 while (sc->cotd_tx_exit == VKE_COTD_RUN) {
631 /* Write outgoing packets to the TAP interface */
632 while ((m = vke_txfifo_dequeue(sc)) != NULL) {
633 if (m->m_pkthdr.len <= MCLBYTES) {
634 m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
635 sc->sc_txbuf_len = m->m_pkthdr.len;
636 cothread_unlock(cotd);
637 if (write(sc->sc_fd, sc->sc_txbuf, sc->sc_txbuf_len) < 0) {
642 vke_txfifo_done_enqueue(sc, m);
644 if (count++ == VKE_CHUNK) {
656 cothread_wait(cotd); /* interlocks cothread lock */
659 sc->cotd_tx_exit = VKE_COTD_DEAD;
660 cothread_unlock(cotd);
664 vke_attach(const struct vknetif_info *info, int unit)
666 struct vke_softc *sc;
668 struct tapinfo tapinfo;
669 uint8_t enaddr[ETHER_ADDR_LEN];
672 KKASSERT(info->tap_fd >= 0);
676 * This is only a TAP device if tap_unit is non-zero. If
677 * connecting to a virtual socket we generate a unique MAC.
679 if (info->tap_unit >= 0) {
680 if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
681 kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
682 "failed: %s\n", unit, strerror(errno));
686 if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
687 kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
688 "failed: %s\n", unit, strerror(errno));
692 int fd = open("/dev/urandom", O_RDONLY);
694 read(fd, enaddr + 2, 4);
697 enaddr[4] = (int)getpid() >> 8;
698 enaddr[5] = (int)getpid() & 255;
703 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
705 sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
708 sc->sc_tap_unit = info->tap_unit;
709 sc->sc_addr = info->netif_addr;
710 sc->sc_mask = info->netif_mask;
712 ifp = &sc->arpcom.ac_if;
713 if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
715 /* NB: after if_initname() */
716 sysctl_ctx_init(&sc->sc_sysctl_ctx);
717 sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
718 SYSCTL_STATIC_CHILDREN(_hw),
719 OID_AUTO, ifp->if_xname,
721 if (sc->sc_sysctl_tree == NULL) {
722 kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
724 SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
725 SYSCTL_CHILDREN(sc->sc_sysctl_tree),
726 OID_AUTO, "tap_unit",
727 CTLFLAG_RD, &sc->sc_tap_unit, 0,
728 "Backend tap(4) unit");
732 ifp->if_ioctl = vke_ioctl;
733 ifp->if_start = vke_start;
734 ifp->if_init = vke_init;
735 ifp->if_mtu = tapinfo.mtu;
736 ifp->if_baudrate = tapinfo.baudrate;
737 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
738 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
739 ifq_set_ready(&ifp->if_snd);
743 ether_ifattach(ifp, enaddr, NULL);
745 if (bootverbose && sc->sc_addr != 0) {
746 if_printf(ifp, "pre-configured "
747 "address 0x%08x, netmask 0x%08x\n",
748 ntohl(sc->sc_addr), ntohl(sc->sc_mask));
755 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
757 struct ifaliasreq ifra;
758 struct sockaddr_in *sin;
761 ASSERT_SERIALIZED(ifp->if_serializer);
764 if_printf(ifp, "add pre-configured "
765 "address 0x%08x, netmask 0x%08x\n",
766 ntohl(addr), ntohl(mask));
769 bzero(&ifra, sizeof(ifra));
771 /* NB: no need to set ifaliasreq.ifra_name */
773 sin = (struct sockaddr_in *)&ifra.ifra_addr;
774 sin->sin_family = AF_INET;
775 sin->sin_len = sizeof(*sin);
776 sin->sin_addr.s_addr = addr;
779 sin = (struct sockaddr_in *)&ifra.ifra_mask;
780 sin->sin_len = sizeof(*sin);
781 sin->sin_addr.s_addr = mask;
785 * Temporarily release serializer, in_control() will hold
786 * it again before calling ifnet.if_ioctl().
788 ifnet_deserialize_all(ifp);
789 ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
790 ifnet_serialize_all(ifp);