2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * -------------------------------------------------------------------------
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
34 * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35 * $DragonFly: src/sys/net/tap/if_tap.c,v 1.2 2003/06/17 04:28:48 dillon Exp $
36 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
41 #include <sys/param.h>
43 #include <sys/filedesc.h>
44 #include <sys/filio.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
50 #include <sys/signalvar.h>
51 #include <sys/socket.h>
52 #include <sys/sockio.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/ttycom.h>
57 #include <sys/vnode.h>
60 #include <net/ethernet.h>
62 #include <net/if_arp.h>
63 #include <net/route.h>
65 #include <netinet/in.h>
67 #include <net/if_tapvar.h>
68 #include <net/if_tap.h>
71 #define CDEV_NAME "tap"
72 #define CDEV_MAJOR 149
73 #define TAPDEBUG if (tapdebug) printf
77 #define VMNET_DEV_MASK 0x00010000
80 static int tapmodevent __P((module_t, int, void *));
83 static void tapcreate __P((dev_t));
85 /* network interface */
86 static void tapifstart __P((struct ifnet *));
87 static int tapifioctl __P((struct ifnet *, u_long, caddr_t));
88 static void tapifinit __P((void *));
90 /* character device */
91 static d_open_t tapopen;
92 static d_close_t tapclose;
93 static d_read_t tapread;
94 static d_write_t tapwrite;
95 static d_ioctl_t tapioctl;
96 static d_poll_t tappoll;
98 static struct cdevsw tap_cdevsw = {
100 /* close */ tapclose,
102 /* write */ tapwrite,
103 /* ioctl */ tapioctl,
106 /* startegy */ nostrategy,
107 /* dev name */ CDEV_NAME,
108 /* dev major */ CDEV_MAJOR,
115 static int taprefcnt = 0; /* module ref. counter */
116 static int taplastunit = -1; /* max. open unit number */
117 static int tapdebug = 0; /* debug flag */
119 MALLOC_DECLARE(M_TAP);
120 MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
121 SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
122 DEV_MODULE(if_tap, tapmodevent, NULL);
127 * module event handler
130 tapmodevent(mod, type, data)
135 static int attached = 0;
136 struct ifnet *ifp = NULL;
144 cdevsw_add(&tap_cdevsw);
152 cdevsw_remove(&tap_cdevsw);
155 while (unit <= taplastunit) {
157 TAILQ_FOREACH(ifp, &ifnet, if_link)
158 if ((strcmp(ifp->if_name, TAP) == 0) ||
159 (strcmp(ifp->if_name, VMNET) == 0))
160 if (ifp->if_unit == unit)
165 struct tap_softc *tp = ifp->if_softc;
167 TAPDEBUG("detaching %s%d. minor = %#x, " \
168 "taplastunit = %d\n",
169 ifp->if_name, unit, minor(tp->tap_dev),
173 ether_ifdetach(ifp, 1);
175 destroy_dev(tp->tap_dev);
196 * to create interface
202 struct ifnet *ifp = NULL;
203 struct tap_softc *tp = NULL;
204 unsigned short macaddr_hi;
208 /* allocate driver storage and create device */
209 MALLOC(tp, struct tap_softc *, sizeof(*tp), M_TAP, M_WAITOK);
210 bzero(tp, sizeof(*tp));
212 /* select device: tap or vmnet */
213 if (minor(dev) & VMNET_DEV_MASK) {
215 unit = lminor(dev) & 0xff;
216 tp->tap_flags |= TAP_VMNET;
223 tp->tap_dev = make_dev(&tap_cdevsw, minor(dev), UID_ROOT, GID_WHEEL,
224 0600, "%s%d", name, unit);
225 tp->tap_dev->si_drv1 = dev->si_drv1 = tp;
227 /* generate fake MAC address: 00 bd xx xx xx unit_no */
228 macaddr_hi = htons(0x00bd);
229 bcopy(&macaddr_hi, &tp->arpcom.ac_enaddr[0], sizeof(short));
230 bcopy(&ticks, &tp->arpcom.ac_enaddr[2], sizeof(long));
231 tp->arpcom.ac_enaddr[5] = (u_char)unit;
233 /* fill the rest and attach interface */
238 if (unit > taplastunit)
242 ifp->if_init = tapifinit;
243 ifp->if_output = ether_output;
244 ifp->if_start = tapifstart;
245 ifp->if_ioctl = tapifioctl;
246 ifp->if_mtu = ETHERMTU;
247 ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
248 ifp->if_snd.ifq_maxlen = ifqmaxlen;
251 ether_ifattach(ifp, 1);
254 tp->tap_flags |= TAP_INITED;
256 TAPDEBUG("interface %s%d created. minor = %#x\n",
257 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
264 * to open tunnel. must be superuser
267 tapopen(dev, flag, mode, p)
273 struct tap_softc *tp = NULL;
276 if ((error = suser(p)) != 0)
285 if (tp->tap_flags & TAP_OPEN)
288 bcopy(tp->arpcom.ac_enaddr, tp->ether_addr, sizeof(tp->ether_addr));
290 tp->tap_pid = p->p_pid;
291 tp->tap_flags |= TAP_OPEN;
294 TAPDEBUG("%s%d is open. minor = %#x, refcnt = %d, taplastunit = %d\n",
295 tp->tap_if.if_name, tp->tap_if.if_unit,
296 minor(tp->tap_dev), taprefcnt, taplastunit);
305 * close the device - mark i/f down & delete routing info
308 tapclose(dev, foo, bar, p)
315 struct tap_softc *tp = dev->si_drv1;
316 struct ifnet *ifp = &tp->tap_if;
317 struct mbuf *m = NULL;
319 /* junk all pending output */
323 IF_DEQUEUE(&ifp->if_snd, m);
330 * do not bring the interface down, and do not anything with
331 * interface, if we are in VMnet mode. just close the device.
334 if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & IFF_UP)) {
337 if (ifp->if_flags & IFF_RUNNING) {
338 /* find internet addresses and delete routes */
339 struct ifaddr *ifa = NULL;
341 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
342 if (ifa->ifa_addr->sa_family == AF_INET) {
343 rtinit(ifa, (int)RTM_DELETE, 0);
345 /* remove address from interface */
347 sizeof(*(ifa->ifa_addr)));
348 bzero(ifa->ifa_dstaddr,
349 sizeof(*(ifa->ifa_dstaddr)));
350 bzero(ifa->ifa_netmask,
351 sizeof(*(ifa->ifa_netmask)));
355 ifp->if_flags &= ~IFF_RUNNING;
360 funsetown(tp->tap_sigio);
361 selwakeup(&tp->tap_rsel);
363 tp->tap_flags &= ~TAP_OPEN;
369 printf("%s%d minor = %#x, refcnt = %d is out of sync. " \
370 "set refcnt to 0\n", ifp->if_name, ifp->if_unit,
371 minor(tp->tap_dev), taprefcnt);
374 TAPDEBUG("%s%d is closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
375 ifp->if_name, ifp->if_unit, minor(tp->tap_dev),
376 taprefcnt, taplastunit);
385 * network interface initialization function
391 struct tap_softc *tp = (struct tap_softc *)xtp;
392 struct ifnet *ifp = &tp->tap_if;
394 TAPDEBUG("initializing %s%d, minor = %#x\n",
395 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
397 ifp->if_flags |= IFF_RUNNING;
398 ifp->if_flags &= ~IFF_OACTIVE;
400 /* attempt to start output */
408 * Process an ioctl request on network interface
411 tapifioctl(ifp, cmd, data)
416 struct tap_softc *tp = (struct tap_softc *)(ifp->if_softc);
417 struct ifstat *ifs = NULL;
425 dummy = ether_ioctl(ifp, cmd, data);
429 case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
436 ifs = (struct ifstat *)data;
437 dummy = strlen(ifs->ascii);
438 if (tp->tap_pid != 0 && dummy < sizeof(ifs->ascii))
439 snprintf(ifs->ascii + dummy,
440 sizeof(ifs->ascii) - dummy,
441 "\tOpened by PID %d\n", tp->tap_pid);
456 * queue packets from higher level ready to put out
462 struct tap_softc *tp = ifp->if_softc;
465 TAPDEBUG("%s%d starting, minor = %#x\n",
466 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
469 * do not junk pending output if we are in VMnet mode.
470 * XXX: can this do any harm because of queue overflow?
473 if (((tp->tap_flags & TAP_VMNET) == 0) &&
474 ((tp->tap_flags & TAP_READY) != TAP_READY)) {
475 struct mbuf *m = NULL;
477 TAPDEBUG("%s%d not ready. minor = %#x, tap_flags = 0x%x\n",
478 ifp->if_name, ifp->if_unit,
479 minor(tp->tap_dev), tp->tap_flags);
483 IF_DEQUEUE(&ifp->if_snd, m);
494 ifp->if_flags |= IFF_OACTIVE;
496 if (ifp->if_snd.ifq_len != 0) {
497 if (tp->tap_flags & TAP_RWAIT) {
498 tp->tap_flags &= ~TAP_RWAIT;
502 if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL))
503 pgsigio(tp->tap_sigio, SIGIO, 0);
505 selwakeup(&tp->tap_rsel);
506 ifp->if_opackets ++; /* obytes are counted in ether_output */
509 ifp->if_flags &= ~IFF_OACTIVE;
517 * the cdevsw interface is now pretty minimal
520 tapioctl(dev, cmd, data, flag, p)
527 struct tap_softc *tp = dev->si_drv1;
528 struct ifnet *ifp = &tp->tap_if;
529 struct tapinfo *tapp = NULL;
535 tapp = (struct tapinfo *)data;
536 ifp->if_mtu = tapp->mtu;
537 ifp->if_type = tapp->type;
538 ifp->if_baudrate = tapp->baudrate;
543 tapp = (struct tapinfo *)data;
544 tapp->mtu = ifp->if_mtu;
545 tapp->type = ifp->if_type;
546 tapp->baudrate = ifp->if_baudrate;
550 tapdebug = *(int *)data;
554 *(int *)data = tapdebug;
563 tp->tap_flags |= TAP_ASYNC;
565 tp->tap_flags &= ~TAP_ASYNC;
571 if (ifp->if_snd.ifq_head) {
572 struct mbuf *mb = ifp->if_snd.ifq_head;
574 for(*(int *)data = 0;mb != NULL;mb = mb->m_next)
575 *(int *)data += mb->m_len;
583 return (fsetown(*(int *)data, &tp->tap_sigio));
586 *(int *)data = fgetown(tp->tap_sigio);
589 /* this is deprecated, FIOSETOWN should be used instead */
591 return (fsetown(-(*(int *)data), &tp->tap_sigio));
593 /* this is deprecated, FIOGETOWN should be used instead */
595 *(int *)data = -fgetown(tp->tap_sigio);
598 /* VMware/VMnet port ioctl's */
600 case SIOCGIFFLAGS: /* get ifnet flags */
601 bcopy(&ifp->if_flags, data, sizeof(ifp->if_flags));
604 case VMIO_SIOCSIFFLAGS: { /* VMware/VMnet SIOCSIFFLAGS */
605 short f = *(short *)data;
608 f &= ~IFF_CANTCHANGE;
612 ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
616 case OSIOCGIFADDR: /* get MAC address of the remote side */
618 bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
621 case SIOCSIFADDR: /* set MAC address of the remote side */
622 bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
635 * the cdevsw read interface - reads a packet at a time, or at
636 * least as much of a packet as can be read
639 tapread(dev, uio, flag)
644 struct tap_softc *tp = dev->si_drv1;
645 struct ifnet *ifp = &tp->tap_if;
646 struct mbuf *m0 = NULL;
647 int error = 0, len, s;
649 TAPDEBUG("%s%d reading, minor = %#x\n",
650 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
652 if ((tp->tap_flags & TAP_READY) != TAP_READY) {
653 TAPDEBUG("%s%d not ready. minor = %#x, tap_flags = 0x%x\n",
654 ifp->if_name, ifp->if_unit,
655 minor(tp->tap_dev), tp->tap_flags);
660 tp->tap_flags &= ~TAP_RWAIT;
662 /* sleep until we get a packet */
665 IF_DEQUEUE(&ifp->if_snd, m0);
669 if (flag & IO_NDELAY)
670 return (EWOULDBLOCK);
672 tp->tap_flags |= TAP_RWAIT;
673 error = tsleep((caddr_t)tp,PCATCH|(PZERO+1),"taprd",0);
677 } while (m0 == NULL);
679 /* feed packet to bpf */
680 if (ifp->if_bpf != NULL)
683 /* xfer packet to user space */
684 while ((m0 != NULL) && (uio->uio_resid > 0) && (error == 0)) {
685 len = min(uio->uio_resid, m0->m_len);
689 error = uiomove(mtod(m0, caddr_t), len, uio);
694 TAPDEBUG("%s%d dropping mbuf, minor = %#x\n",
695 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
706 * the cdevsw write interface - an atomic write is a packet - or else!
709 tapwrite(dev, uio, flag)
714 struct tap_softc *tp = dev->si_drv1;
715 struct ifnet *ifp = &tp->tap_if;
716 struct mbuf *top = NULL, **mp = NULL, *m = NULL;
717 struct ether_header *eh = NULL;
718 int error = 0, tlen, mlen;
720 TAPDEBUG("%s%d writting, minor = %#x\n",
721 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
723 if (uio->uio_resid == 0)
726 if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
727 TAPDEBUG("%s%d invalid packet len = %d, minor = %#x\n",
728 ifp->if_name, ifp->if_unit,
729 uio->uio_resid, minor(tp->tap_dev));
733 tlen = uio->uio_resid;
735 /* get a header mbuf */
736 MGETHDR(m, M_DONTWAIT, MT_DATA);
743 while ((error == 0) && (uio->uio_resid > 0)) {
744 m->m_len = min(mlen, uio->uio_resid);
745 error = uiomove(mtod(m, caddr_t), m->m_len, uio);
748 if (uio->uio_resid > 0) {
749 MGET(m, M_DONTWAIT, MT_DATA);
764 top->m_pkthdr.len = tlen;
765 top->m_pkthdr.rcvif = ifp;
768 * Ethernet bridge and bpf are handled in ether_input
770 * adjust mbuf and give packet to the ether_input
773 eh = mtod(top, struct ether_header *);
774 m_adj(top, sizeof(struct ether_header));
775 ether_input(ifp, eh, top);
776 ifp->if_ipackets ++; /* ibytes are counted in ether_input */
785 * the poll interface, this is only useful on reads
786 * really. the write detect always returns true, write never blocks
787 * anyway, it either accepts the packet or drops it
790 tappoll(dev, events, p)
795 struct tap_softc *tp = dev->si_drv1;
796 struct ifnet *ifp = &tp->tap_if;
799 TAPDEBUG("%s%d polling, minor = %#x\n",
800 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
803 if (events & (POLLIN | POLLRDNORM)) {
804 if (ifp->if_snd.ifq_len > 0) {
805 TAPDEBUG("%s%d have data in queue. len = %d, " \
806 "minor = %#x\n", ifp->if_name, ifp->if_unit,
807 ifp->if_snd.ifq_len, minor(tp->tap_dev));
809 revents |= (events & (POLLIN | POLLRDNORM));
812 TAPDEBUG("%s%d waiting for data, minor = %#x\n",
813 ifp->if_name, ifp->if_unit, minor(tp->tap_dev));
815 selrecord(p, &tp->tap_rsel);
819 if (events & (POLLOUT | POLLWRNORM))
820 revents |= (events & (POLLOUT | POLLWRNORM));