2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * -------------------------------------------------------------------------
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
34 * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35 * $DragonFly: src/sys/net/tap/if_tap.c,v 1.24 2005/12/30 18:37:09 dillon Exp $
36 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
41 #include <sys/param.h>
43 #include <sys/filedesc.h>
44 #include <sys/filio.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
50 #include <sys/signalvar.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/thread2.h>
56 #include <sys/ttycom.h>
58 #include <sys/vnode.h>
59 #include <sys/serialize.h>
62 #include <net/ethernet.h>
64 #include <net/ifq_var.h>
65 #include <net/if_arp.h>
66 #include <net/route.h>
68 #include <netinet/in.h>
70 #include "if_tapvar.h"
74 #define CDEV_NAME "tap"
75 #define CDEV_MAJOR 149
76 #define TAPDEBUG if (tapdebug) if_printf
80 #define VMNET_DEV_MASK 0x00010000
83 static int tapmodevent (module_t, int, void *);
86 static void tapcreate (dev_t);
88 /* network interface */
89 static void tapifstart (struct ifnet *);
90 static int tapifioctl (struct ifnet *, u_long, caddr_t,
92 static void tapifinit (void *);
94 /* character device */
95 static d_open_t tapopen;
96 static d_close_t tapclose;
97 static d_read_t tapread;
98 static d_write_t tapwrite;
99 static d_ioctl_t tapioctl;
100 static d_poll_t tappoll;
102 static struct cdevsw tap_cdevsw = {
103 /* dev name */ CDEV_NAME,
104 /* dev major */ CDEV_MAJOR,
110 /* close */ tapclose,
112 /* write */ tapwrite,
113 /* ioctl */ tapioctl,
116 /* startegy */ nostrategy,
121 static int taprefcnt = 0; /* module ref. counter */
122 static int taplastunit = -1; /* max. open unit number */
123 static int tapdebug = 0; /* debug flag */
125 MALLOC_DECLARE(M_TAP);
126 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
127 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
128 DEV_MODULE(if_tap, tapmodevent, NULL);
133 * module event handler
136 tapmodevent(module_t mod, int type, void *data)
138 static int attached = 0;
139 struct ifnet *ifp = NULL;
147 cdevsw_add(&tap_cdevsw, 0, 0);
155 cdevsw_remove(&tap_cdevsw, 0, 0);
157 /* XXX: maintain tap ifs in a local list */
159 while (unit <= taplastunit) {
160 TAILQ_FOREACH(ifp, &ifnet, if_link) {
161 if ((strcmp(ifp->if_dname, TAP) == 0) ||
162 (strcmp(ifp->if_dname, VMNET) == 0)) {
163 if (ifp->if_dunit == unit)
169 struct tap_softc *tp = ifp->if_softc;
171 TAPDEBUG(ifp, "detached. minor = %#x, " \
172 "taplastunit = %d\n",
173 minor(tp->tap_dev), taplastunit);
176 destroy_dev(tp->tap_dev);
197 * to create interface
202 struct ifnet *ifp = NULL;
203 struct tap_softc *tp = NULL;
204 uint8_t ether_addr[ETHER_ADDR_LEN];
208 /* allocate driver storage and create device */
209 MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK);
210 bzero(tp, sizeof(*tp));
212 /* select device: tap or vmnet */
213 if (minor(dev) & VMNET_DEV_MASK) {
215 unit = lminor(dev) & 0xff;
216 tp->tap_flags |= TAP_VMNET;
223 tp->tap_dev = make_dev(&tap_cdevsw, minor(dev), UID_ROOT, GID_WHEEL,
224 0600, "%s%d", name, unit);
225 tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
226 reference_dev(tp->tap_dev); /* so we can destroy it later */
228 /* generate fake MAC address: 00 bd xx xx xx unit_no */
229 ether_addr[0] = 0x00;
230 ether_addr[1] = 0xbd;
231 bcopy(&ticks, ether_addr, 4);
232 ether_addr[5] = (u_char)unit;
234 /* fill the rest and attach interface */
238 if_initname(ifp, name, unit);
239 if (unit > taplastunit)
242 ifp->if_init = tapifinit;
243 ifp->if_start = tapifstart;
244 ifp->if_ioctl = tapifioctl;
245 ifp->if_mtu = ETHERMTU;
246 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
247 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
248 ifq_set_ready(&ifp->if_snd);
250 ether_ifattach(ifp, ether_addr, NULL);
252 tp->tap_flags |= TAP_INITED;
254 TAPDEBUG(ifp, "created. minor = %#x\n", minor(tp->tap_dev));
261 * to open tunnel. must be superuser
264 tapopen(dev_t dev, int flag, int mode, d_thread_t *td)
266 struct tap_softc *tp = NULL;
269 if ((error = suser(td)) != 0)
278 if (tp->tap_flags & TAP_OPEN)
281 bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
284 tp->tap_flags |= TAP_OPEN;
287 TAPDEBUG(&tp->arpcom.ac_if,
288 "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
289 minor(tp->tap_dev), taprefcnt, taplastunit);
298 * close the device - mark i/f down & delete routing info
301 tapclose(dev_t dev, int foo, int bar, d_thread_t *td)
303 struct tap_softc *tp = dev->si_drv1;
304 struct ifnet *ifp = &tp->tap_if;
306 /* junk all pending output */
308 lwkt_serialize_enter(ifp->if_serializer);
309 ifq_purge(&ifp->if_snd);
310 lwkt_serialize_exit(ifp->if_serializer);
313 * do not bring the interface down, and do not anything with
314 * interface, if we are in VMnet mode. just close the device.
317 if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
319 lwkt_serialize_enter(ifp->if_serializer);
320 if (ifp->if_flags & IFF_RUNNING) {
321 /* find internet addresses and delete routes */
322 struct ifaddr *ifa = NULL;
324 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
325 if (ifa->ifa_addr->sa_family == AF_INET) {
326 rtinit(ifa, (int)RTM_DELETE, 0);
328 /* remove address from interface */
330 sizeof(*(ifa->ifa_addr)));
331 bzero(ifa->ifa_dstaddr,
332 sizeof(*(ifa->ifa_dstaddr)));
333 bzero(ifa->ifa_netmask,
334 sizeof(*(ifa->ifa_netmask)));
338 ifp->if_flags &= ~IFF_RUNNING;
340 lwkt_serialize_exit(ifp->if_serializer);
343 funsetown(tp->tap_sigio);
344 selwakeup(&tp->tap_rsel);
346 tp->tap_flags &= ~TAP_OPEN;
352 if_printf(ifp, "minor = %#x, refcnt = %d is out of sync. "
353 "set refcnt to 0\n", minor(tp->tap_dev), taprefcnt);
356 TAPDEBUG(ifp, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
357 minor(tp->tap_dev), taprefcnt, taplastunit);
366 * network interface initialization function
371 struct tap_softc *tp = (struct tap_softc *)xtp;
372 struct ifnet *ifp = &tp->tap_if;
374 TAPDEBUG(ifp, "initializing, minor = %#x\n", minor(tp->tap_dev));
376 ifp->if_flags |= IFF_RUNNING;
377 ifp->if_flags &= ~IFF_OACTIVE;
379 /* attempt to start output */
387 * Process an ioctl request on network interface
392 tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
394 struct tap_softc *tp = (struct tap_softc *)(ifp->if_softc);
395 struct ifstat *ifs = NULL;
402 dummy = ether_ioctl(ifp, cmd, data);
405 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
411 ifs = (struct ifstat *)data;
412 dummy = strlen(ifs->ascii);
413 if (tp->tap_td != NULL && dummy < sizeof(ifs->ascii)) {
414 if (tp->tap_td->td_proc) {
415 snprintf(ifs->ascii + dummy,
416 sizeof(ifs->ascii) - dummy,
417 "\tOpened by pid %d\n",
418 (int)tp->tap_td->td_proc->p_pid);
420 snprintf(ifs->ascii + dummy,
421 sizeof(ifs->ascii) - dummy,
422 "\tOpened by td %p\n", tp->tap_td);
438 * queue packets from higher level ready to put out
441 tapifstart(struct ifnet *ifp)
443 struct tap_softc *tp = ifp->if_softc;
445 TAPDEBUG(ifp, "starting, minor = %#x\n", minor(tp->tap_dev));
448 * do not junk pending output if we are in VMnet mode.
449 * XXX: can this do any harm because of queue overflow?
452 if (((tp->tap_flags & TAP_VMNET) == 0) &&
453 ((tp->tap_flags & TAP_READY) != TAP_READY)) {
454 TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
455 minor(tp->tap_dev), tp->tap_flags);
457 ifq_purge(&ifp->if_snd);
461 ifp->if_flags |= IFF_OACTIVE;
463 if (!ifq_is_empty(&ifp->if_snd)) {
464 if (tp->tap_flags & TAP_RWAIT) {
465 tp->tap_flags &= ~TAP_RWAIT;
469 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL))
470 pgsigio(tp->tap_sigio, SIGIO, 0);
473 * selwakeup is not MPSAFE. tapifstart is.
476 selwakeup(&tp->tap_rsel);
478 ifp->if_opackets ++; /* obytes are counted in ether_output */
481 ifp->if_flags &= ~IFF_OACTIVE;
488 * the cdevsw interface is now pretty minimal
491 tapioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
493 struct tap_softc *tp = dev->si_drv1;
494 struct ifnet *ifp = &tp->tap_if;
495 struct tapinfo *tapp = NULL;
500 lwkt_serialize_enter(ifp->if_serializer);
505 tapp = (struct tapinfo *)data;
506 ifp->if_mtu = tapp->mtu;
507 ifp->if_type = tapp->type;
508 ifp->if_baudrate = tapp->baudrate;
512 tapp = (struct tapinfo *)data;
513 tapp->mtu = ifp->if_mtu;
514 tapp->type = ifp->if_type;
515 tapp->baudrate = ifp->if_baudrate;
519 tapdebug = *(int *)data;
523 *(int *)data = tapdebug;
531 tp->tap_flags |= TAP_ASYNC;
533 tp->tap_flags &= ~TAP_ASYNC;
538 if ((mb = ifq_poll(&ifp->if_snd)) != NULL) {
539 for(; mb != NULL; mb = mb->m_next)
540 *(int *)data += mb->m_len;
545 error = fsetown(*(int *)data, &tp->tap_sigio);
549 *(int *)data = fgetown(tp->tap_sigio);
552 /* this is deprecated, FIOSETOWN should be used instead */
554 error = fsetown(-(*(int *)data), &tp->tap_sigio);
557 /* this is deprecated, FIOGETOWN should be used instead */
559 *(int *)data = -fgetown(tp->tap_sigio);
562 /* VMware/VMnet port ioctl's */
564 case SIOCGIFFLAGS: /* get ifnet flags */
565 bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
568 case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
571 f &= ~IFF_CANTCHANGE;
573 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
576 case OSIOCGIFADDR: /* get MAC address of the remote side */
578 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
581 case SIOCSIFADDR: /* set MAC address of the remote side */
582 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
589 lwkt_serialize_exit(ifp->if_serializer);
597 * the cdevsw read interface - reads a packet at a time, or at
598 * least as much of a packet as can be read
601 tapread(dev_t dev, struct uio *uio, int flag)
603 struct tap_softc *tp = dev->si_drv1;
604 struct ifnet *ifp = &tp->tap_if;
605 struct mbuf *m0 = NULL;
608 TAPDEBUG(ifp, "reading, minor = %#x\n", minor(tp->tap_dev));
610 if ((tp->tap_flags & TAP_READY) != TAP_READY) {
611 TAPDEBUG(ifp, "not ready. minor = %#x, tap_flags = 0x%x\n",
612 minor(tp->tap_dev), tp->tap_flags);
617 tp->tap_flags &= ~TAP_RWAIT;
619 /* sleep until we get a packet */
621 lwkt_serialize_enter(ifp->if_serializer);
622 m0 = ifq_dequeue(&ifp->if_snd, NULL);
624 tp->tap_flags |= TAP_RWAIT;
625 tsleep_interlock(tp);
626 lwkt_serialize_exit(ifp->if_serializer);
627 if (flag & IO_NDELAY)
628 return (EWOULDBLOCK);
629 error = tsleep(tp, PCATCH, "taprd", 0);
633 lwkt_serialize_exit(ifp->if_serializer);
635 } while (m0 == NULL);
639 /* xfer packet to user space */
640 while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
641 len = min(uio->uio_resid, m0->m_len);
645 error = uiomove(mtod(m0, caddr_t), len, uio);
650 TAPDEBUG(ifp, "dropping mbuf, minor = %#x\n",
662 * the cdevsw write interface - an atomic write is a packet - or else!
665 tapwrite(dev_t dev, struct uio *uio, int flag)
667 struct tap_softc *tp = dev->si_drv1;
668 struct ifnet *ifp = &tp->tap_if;
669 struct mbuf *top = NULL, **mp = NULL, *m = NULL;
670 int error = 0, tlen, mlen;
672 TAPDEBUG(ifp, "writting, minor = %#x\n", minor(tp->tap_dev));
674 if (uio->uio_resid == 0)
677 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
678 TAPDEBUG(ifp, "invalid packet len = %d, minor = %#x\n",
679 uio->uio_resid, minor(tp->tap_dev));
683 tlen = uio->uio_resid;
685 /* get a header mbuf */
686 MGETHDR(m, MB_DONTWAIT, MT_DATA);
693 while ((error == 0) && (uio->uio_resid > 0)) {
694 m->m_len = min(mlen, uio->uio_resid);
695 error = uiomove(mtod(m, caddr_t), m->m_len, uio);
698 if (uio->uio_resid > 0) {
699 MGET(m, MB_DONTWAIT, MT_DATA);
714 top->m_pkthdr.len = tlen;
715 top->m_pkthdr.rcvif = ifp;
718 * Ethernet bridge and bpf are handled in ether_input
720 * adjust mbuf and give packet to the ether_input
722 lwkt_serialize_enter(ifp->if_serializer);
723 ifp->if_input(ifp, top);
724 ifp->if_ipackets ++; /* ibytes are counted in ether_input */
725 lwkt_serialize_exit(ifp->if_serializer);
734 * the poll interface, this is only useful on reads
735 * really. the write detect always returns true, write never blocks
736 * anyway, it either accepts the packet or drops it
739 tappoll(dev_t dev, int events, d_thread_t *td)
741 struct tap_softc *tp = dev->si_drv1;
742 struct ifnet *ifp = &tp->tap_if;
745 TAPDEBUG(ifp, "polling, minor = %#x\n", minor(tp->tap_dev));
747 lwkt_serialize_enter(ifp->if_serializer);
748 if (events & (POLLIN | POLLRDNORM)) {
749 if (!ifq_is_empty(&ifp->if_snd)) {
751 "has data in queue. minor = %#x\n",
754 revents |= (events & (POLLIN | POLLRDNORM));
757 TAPDEBUG(ifp, "waiting for data, minor = %#x\n",
760 selrecord(td, &tp->tap_rsel);
763 lwkt_serialize_exit(ifp->if_serializer);
765 if (events & (POLLOUT | POLLWRNORM))
766 revents |= (events & (POLLOUT | POLLWRNORM));