2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * -------------------------------------------------------------------------
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
34 * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35 * $DragonFly: src/sys/net/tap/if_tap.c,v 1.36 2007/07/03 17:40:51 dillon Exp $
36 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
41 #include <sys/param.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/thread2.h>
57 #include <sys/ttycom.h>
59 #include <sys/vnode.h>
60 #include <sys/serialize.h>
63 #include <net/ethernet.h>
65 #include <net/ifq_var.h>
66 #include <net/if_arp.h>
67 #include <net/route.h>
69 #include <netinet/in.h>
71 #include "if_tapvar.h"
75 #define CDEV_NAME "tap"
76 #define CDEV_MAJOR 149
77 #define TAPDEBUG if (tapdebug) if_printf
81 #define VMNET_DEV_MASK 0x00010000
84 static int tapmodevent (module_t, int, void *);
87 static void tapcreate (cdev_t);
89 /* network interface */
90 static void tapifstart (struct ifnet *);
91 static int tapifioctl (struct ifnet *, u_long, caddr_t,
93 static void tapifinit (void *);
95 /* character device */
96 static d_open_t tapopen;
97 static d_close_t tapclose;
98 static d_read_t tapread;
99 static d_write_t tapwrite;
100 static d_ioctl_t tapioctl;
101 static d_poll_t tappoll;
102 static d_kqfilter_t tapkqfilter;
104 static struct dev_ops tap_ops = {
105 { CDEV_NAME, CDEV_MAJOR, 0 },
112 .d_kqfilter = tapkqfilter
115 static int taprefcnt = 0; /* module ref. counter */
116 static int taplastunit = -1; /* max. open unit number */
117 static int tapdebug = 0; /* debug flag */
119 MALLOC_DECLARE(M_TAP);
120 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
121 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
122 DEV_MODULE(if_tap, tapmodevent, NULL);
127 * module event handler
130 tapmodevent(module_t mod, int type, void *data)
132 static int attached = 0;
133 struct ifnet *ifp = NULL;
141 dev_ops_add(&tap_ops, 0, 0);
149 dev_ops_remove(&tap_ops, 0, 0);
151 /* XXX: maintain tap ifs in a local list */
153 while (unit <= taplastunit) {
154 TAILQ_FOREACH(ifp, &ifnet, if_link) {
155 if ((strcmp(ifp->if_dname, TAP) == 0) ||
156 (strcmp(ifp->if_dname, VMNET) == 0)) {
157 if (ifp->if_dunit == unit)
163 struct tap_softc *tp = ifp->if_softc;
165 TAPDEBUG(ifp, "detached. minor = %#x, " \
166 "taplastunit = %d\n",
167 minor(tp->tap_dev), taplastunit);
170 destroy_dev(tp->tap_dev);
191 * to create interface
194 tapcreate(cdev_t dev)
196 struct ifnet *ifp = NULL;
197 struct tap_softc *tp = NULL;
198 uint8_t ether_addr[ETHER_ADDR_LEN];
202 /* allocate driver storage and create device */
203 MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK);
204 bzero(tp, sizeof(*tp));
206 /* select device: tap or vmnet */
207 if (minor(dev) & VMNET_DEV_MASK) {
209 unit = lminor(dev) & 0xff;
210 tp->tap_flags |= TAP_VMNET;
217 tp->tap_dev = make_dev(&tap_ops, minor(dev), UID_ROOT, GID_WHEEL,
218 0600, "%s%d", name, unit);
219 tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
220 reference_dev(tp->tap_dev); /* so we can destroy it later */
222 /* generate fake MAC address: 00 bd xx xx xx unit_no */
223 ether_addr[0] = 0x00;
224 ether_addr[1] = 0xbd;
225 bcopy(&ticks, ðer_addr[2], 3);
226 ether_addr[5] = (u_char)unit;
228 /* fill the rest and attach interface */
232 if_initname(ifp, name, unit);
233 if (unit > taplastunit)
236 ifp->if_init = tapifinit;
237 ifp->if_start = tapifstart;
238 ifp->if_ioctl = tapifioctl;
239 ifp->if_mtu = ETHERMTU;
240 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
241 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
242 ifq_set_ready(&ifp->if_snd);
244 ether_ifattach(ifp, ether_addr, NULL);
246 tp->tap_flags |= TAP_INITED;
248 TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
255 * to open tunnel. must be superuser
258 tapopen(struct dev_open_args *ap)
260 cdev_t dev = ap->a_head.a_dev;
261 struct tap_softc *tp = NULL;
262 struct ifnet *ifp = NULL;
265 if ((error = suser_cred(ap->a_cred, 0)) != 0)
273 ifp = &tp->arpcom.ac_if;
275 ifp = &tp->arpcom.ac_if;
277 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
279 /* Announce the return of the interface. */
280 rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
283 if (tp->tap_flags & TAP_OPEN) {
288 bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
290 tp->tap_td = curthread;
291 tp->tap_flags |= TAP_OPEN;
294 TAPDEBUG(ifp, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
295 minor(tp->tap_dev), taprefcnt, taplastunit);
305 * close the device - mark i/f down & delete routing info
308 tapclose(struct dev_close_args *ap)
310 cdev_t dev = ap->a_head.a_dev;
311 struct tap_softc *tp = dev->si_drv1;
312 struct ifnet *ifp = &tp->tap_if;
314 /* junk all pending output */
317 lwkt_serialize_enter(ifp->if_serializer);
318 ifq_purge(&ifp->if_snd);
319 lwkt_serialize_exit(ifp->if_serializer);
322 * do not bring the interface down, and do not anything with
323 * interface, if we are in VMnet mode. just close the device.
326 if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
327 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
329 /* Announce the departure of the interface. */
330 rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
333 lwkt_serialize_enter(ifp->if_serializer);
334 if (ifp->if_flags & IFF_RUNNING) {
335 /* find internet addresses and delete routes */
336 struct ifaddr *ifa = NULL;
338 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
339 if (ifa->ifa_addr->sa_family == AF_INET) {
340 rtinit(ifa, (int)RTM_DELETE, 0);
342 /* remove address from interface */
344 sizeof(*(ifa->ifa_addr)));
345 bzero(ifa->ifa_dstaddr,
346 sizeof(*(ifa->ifa_dstaddr)));
347 bzero(ifa->ifa_netmask,
348 sizeof(*(ifa->ifa_netmask)));
352 ifp->if_flags &= ~IFF_RUNNING;
354 lwkt_serialize_exit(ifp->if_serializer);
357 funsetown(tp->tap_sigio);
358 selwakeup(&tp->tap_rsel);
360 tp->tap_flags &= ~TAP_OPEN;
366 if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
367 "set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
370 TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
371 minor(tp->tap_dev), taprefcnt, taplastunit);
381 * Network interface initialization function (called with if serializer held)
388 struct tap_softc *tp = (struct tap_softc *)xtp;
389 struct ifnet *ifp = &tp->tap_if;
391 TAPDEBUG(ifp, "initializing, minor = %#x\n", minor(tp->tap_dev));
393 ifp->if_flags |= IFF_RUNNING;
394 ifp->if_flags &= ~IFF_OACTIVE;
396 /* attempt to start output */
404 * Process an ioctl request on network interface (called with if serializer
410 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
412 struct tap_softc *tp = (struct tap_softc *)(ifp->if_softc);
413 struct ifstat *ifs = NULL;
420 dummy = ether_ioctl(ifp, cmd, data);
424 if ((tp->tap_flags & TAP_VMNET) == 0) {
426 * Only for non-vmnet tap(4)
428 if (ifp->if_flags & IFF_UP) {
429 if ((ifp->if_flags & IFF_RUNNING) == 0)
434 case SIOCADDMULTI: /* XXX -- just like vmnet does */
439 ifs = (struct ifstat *)data;
440 dummy = strlen(ifs->ascii);
441 if (tp->tap_td != NULL && dummy < sizeof(ifs->ascii)) {
442 if (tp->tap_td->td_proc) {
443 ksnprintf(ifs->ascii + dummy,
444 sizeof(ifs->ascii) - dummy,
445 "\tOpened by pid %d\n",
446 (int)tp->tap_td->td_proc->p_pid);
448 ksnprintf(ifs->ascii + dummy,
449 sizeof(ifs->ascii) - dummy,
450 "\tOpened by td %p\n", tp->tap_td);
466 * Queue packets from higher level ready to put out (called with if serializer
472 tapifstart(struct ifnet *ifp)
474 struct tap_softc *tp = ifp->if_softc;
476 TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
479 * do not junk pending output if we are in VMnet mode.
480 * XXX: can this do any harm because of queue overflow?
483 if (((tp->tap_flags & TAP_VMNET) == 0) &&
484 ((tp->tap_flags & TAP_READY) != TAP_READY)) {
485 TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
486 minor(tp->tap_dev), tp->tap_flags);
488 ifq_purge(&ifp->if_snd);
492 ifp->if_flags |= IFF_OACTIVE;
494 if (!ifq_is_empty(&ifp->if_snd)) {
495 if (tp->tap_flags & TAP_RWAIT) {
496 tp->tap_flags &= ~TAP_RWAIT;
499 KNOTE(&tp->tap_rsel.si_note, 0);
501 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
503 pgsigio(tp->tap_sigio, SIGIO, 0);
508 * selwakeup is not MPSAFE. tapifstart is.
511 selwakeup(&tp->tap_rsel);
513 ifp->if_opackets ++; /* obytes are counted in ether_output */
516 ifp->if_flags &= ~IFF_OACTIVE;
523 * The ops interface is now pretty minimal. Called via fileops with nothing
529 tapioctl(struct dev_ioctl_args *ap)
531 cdev_t dev = ap->a_head.a_dev;
532 caddr_t data = ap->a_data;
533 struct tap_softc *tp = dev->si_drv1;
534 struct ifnet *ifp = &tp->tap_if;
535 struct tapinfo *tapp = NULL;
540 lwkt_serialize_enter(ifp->if_serializer);
545 tapp = (struct tapinfo *)data;
546 ifp->if_mtu = tapp->mtu;
547 ifp->if_type = tapp->type;
548 ifp->if_baudrate = tapp->baudrate;
552 tapp = (struct tapinfo *)data;
553 tapp->mtu = ifp->if_mtu;
554 tapp->type = ifp->if_type;
555 tapp->baudrate = ifp->if_baudrate;
559 tapdebug = *(int *)data;
563 *(int *)data = tapdebug;
568 tp->tap_flags |= TAP_ASYNC;
570 tp->tap_flags &= ~TAP_ASYNC;
575 if ((mb = ifq_poll(&ifp->if_snd)) != NULL) {
576 for(; mb != NULL; mb = mb->m_next)
577 *(int *)data += mb->m_len;
582 error = fsetown(*(int *)data, &tp->tap_sigio);
586 *(int *)data = fgetown(tp->tap_sigio);
589 /* this is deprecated, FIOSETOWN should be used instead */
591 error = fsetown(-(*(int *)data), &tp->tap_sigio);
594 /* this is deprecated, FIOGETOWN should be used instead */
596 *(int *)data = -fgetown(tp->tap_sigio);
599 /* VMware/VMnet port ioctl's */
601 case SIOCGIFFLAGS: /* get ifnet flags */
602 bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
605 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
608 f &= ~IFF_CANTCHANGE;
610 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
613 case OSIOCGIFADDR: /* get MAC address of the remote side */
615 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
618 case SIOCSIFADDR: /* set MAC address of the remote side */
619 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
626 lwkt_serialize_exit(ifp->if_serializer);
634 * The ops read interface - reads a packet at a time, or at
635 * least as much of a packet as can be read.
637 * Called from the fileops interface with nothing held.
642 tapread(struct dev_read_args *ap)
644 cdev_t dev = ap->a_head.a_dev;
645 struct uio *uio = ap->a_uio;
646 struct tap_softc *tp = dev->si_drv1;
647 struct ifnet *ifp = &tp->tap_if;
648 struct mbuf *m0 = NULL;
651 TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
653 if ((tp->tap_flags & TAP_READY) != TAP_READY) {
654 TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
655 minor(tp->tap_dev), tp->tap_flags);
660 tp->tap_flags &= ~TAP_RWAIT;
662 /* sleep until we get a packet */
664 lwkt_serialize_enter(ifp->if_serializer);
665 m0 = ifq_dequeue(&ifp->if_snd, NULL);
667 if (ap->a_ioflag & IO_NDELAY) {
668 lwkt_serialize_exit(ifp->if_serializer);
669 return (EWOULDBLOCK);
671 tp->tap_flags |= TAP_RWAIT;
673 tsleep_interlock(tp);
674 lwkt_serialize_exit(ifp->if_serializer);
675 error = tsleep(tp, PCATCH, "taprd", 0);
680 lwkt_serialize_exit(ifp->if_serializer);
682 } while (m0 == NULL);
686 /* xfer packet to user space */
687 while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
688 len = min(uio->uio_resid, m0->m_len);
692 error = uiomove(mtod(m0, caddr_t), len, uio);
697 TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
708 * The ops write interface - an atomic write is a packet - or else!
710 * Called from the fileops interface with nothing held.
715 tapwrite(struct dev_write_args *ap)
717 cdev_t dev = ap->a_head.a_dev;
718 struct uio *uio = ap->a_uio;
719 struct tap_softc *tp = dev->si_drv1;
720 struct ifnet *ifp = &tp->tap_if;
721 struct mbuf *top = NULL, **mp = NULL, *m = NULL;
722 int error = 0, tlen, mlen;
724 TAPDEBUG(ifp, "writing, minor = %#x\n", minor(tp->tap_dev));
726 if (uio->uio_resid == 0)
729 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
730 TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
731 uio->uio_resid, minor(tp->tap_dev));
735 tlen = uio->uio_resid;
737 /* get a header mbuf */
738 MGETHDR(m, MB_DONTWAIT, MT_DATA);
745 while ((error == 0) && (uio->uio_resid > 0)) {
746 m->m_len = min(mlen, uio->uio_resid);
747 error = uiomove(mtod(m, caddr_t), m->m_len, uio);
750 if (uio->uio_resid > 0) {
751 MGET(m, MB_DONTWAIT, MT_DATA);
766 top->m_pkthdr.len = tlen;
767 top->m_pkthdr.rcvif = ifp;
770 * Ethernet bridge and bpf are handled in ether_input
772 * adjust mbuf and give packet to the ether_input
774 lwkt_serialize_enter(ifp->if_serializer);
775 ifp->if_input(ifp, top);
776 ifp->if_ipackets ++; /* ibytes are counted in ether_input */
777 lwkt_serialize_exit(ifp->if_serializer);
785 * The poll interface, this is only useful on reads really. The write
786 * detect always returns true, write never blocks anyway, it either
787 * accepts the packet or drops it
789 * Called from the fileops interface with nothing held.
794 tappoll(struct dev_poll_args *ap)
796 cdev_t dev = ap->a_head.a_dev;
797 struct tap_softc *tp = dev->si_drv1;
798 struct ifnet *ifp = &tp->tap_if;
801 TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
803 lwkt_serialize_enter(ifp->if_serializer);
804 if (ap->a_events & (POLLIN | POLLRDNORM)) {
805 if (!ifq_is_empty(&ifp->if_snd)) {
807 "has data in queue. minor = %#x\n",
810 revents |= (ap->a_events & (POLLIN | POLLRDNORM));
812 TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
816 selrecord(curthread, &tp->tap_rsel);
820 lwkt_serialize_exit(ifp->if_serializer);
822 if (ap->a_events & (POLLOUT | POLLWRNORM))
823 revents |= (ap->a_events & (POLLOUT | POLLWRNORM));
824 ap->a_events = revents;
829 * tapkqfilter - called from the fileops interface with nothing held
833 static int filt_tapread(struct knote *kn, long hint);
834 static void filt_tapdetach(struct knote *kn);
835 static struct filterops tapread_filtops =
836 { 1, NULL, filt_tapdetach, filt_tapread };
839 tapkqfilter(struct dev_kqfilter_args *ap)
841 cdev_t dev = ap->a_head.a_dev;
842 struct knote *kn = ap->a_kn;
843 struct tap_softc *tp;
852 switch(kn->kn_filter) {
854 list = &tp->tap_rsel.si_note;
855 kn->kn_fop = &tapread_filtops;
856 kn->kn_hook = (void *)tp;
866 SLIST_INSERT_HEAD(list, kn, kn_selnext);
873 filt_tapread(struct knote *kn, long hint)
875 struct tap_softc *tp = (void *)kn->kn_hook;
876 struct ifnet *ifp = &tp->tap_if;
878 if (ifq_is_empty(&ifp->if_snd) == 0) {
886 filt_tapdetach(struct knote *kn)
888 struct tap_softc *tp = (void *)kn->kn_hook;
890 SLIST_REMOVE(&tp->tap_rsel.si_note, kn, knote, kn_selnext);