2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
35 #include <sys/cdefs.h>
37 #include "opt_inet6.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
43 #include <sys/malloc.h>
45 #include <sys/module.h>
46 #include <sys/rmlock.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
50 #include <sys/errno.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
57 #include <machine/cpu.h>
60 #include <net/if_var.h>
61 #include <net/if_private.h>
62 #include <net/if_clone.h>
63 #include <net/if_types.h>
64 #include <net/netisr.h>
65 #include <net/route.h>
69 #include <netinet/in.h>
70 #include <netinet/in_systm.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_ecn.h>
74 #include <netinet/in_var.h>
75 #include <netinet/ip_var.h>
80 #include <netinet/in.h>
82 #include <netinet6/in6_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_ecn.h>
85 #include <netinet6/ip6_var.h>
88 #include <netinet/ip_encap.h>
89 #include <net/ethernet.h>
90 #include <net/if_bridgevar.h>
91 #include <net/if_gif.h>
93 #include <security/mac/mac_framework.h>
95 static const char gifname[] = "gif";
97 MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
98 static struct sx gif_ioctl_sx;
99 SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
101 void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
102 void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
103 void (*ng_gif_attach_p)(struct ifnet *ifp);
104 void (*ng_gif_detach_p)(struct ifnet *ifp);
107 static void gif_reassign(struct ifnet *, struct vnet *, char *);
109 static void gif_delete_tunnel(struct gif_softc *);
110 static int gif_ioctl(struct ifnet *, u_long, caddr_t);
111 static int gif_transmit(struct ifnet *, struct mbuf *);
112 static void gif_qflush(struct ifnet *);
113 static int gif_clone_create(struct if_clone *, int, caddr_t);
114 static void gif_clone_destroy(struct ifnet *);
115 VNET_DEFINE_STATIC(struct if_clone *, gif_cloner);
116 #define V_gif_cloner VNET(gif_cloner)
118 SYSCTL_DECL(_net_link);
119 static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
120 "Generic Tunnel Interface");
123 * This macro controls the default upper limitation on nesting of gif tunnels.
124 * Since, setting a large value to this macro with a careless configuration
125 * may introduce system crash, we don't allow any nestings by default.
126 * If you need to configure nested gif tunnels, you can define this macro
127 * in your kernel configuration file. However, if you do so, please be
128 * careful to configure the tunnels so that it won't make a loop.
130 #define MAX_GIF_NEST 1
132 VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST;
133 #define V_max_gif_nesting VNET(max_gif_nesting)
134 SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
135 &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
138 gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
140 struct gif_softc *sc;
142 sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
143 sc->gif_fibnum = curthread->td_proc->p_fibnum;
144 GIF2IFP(sc) = if_alloc(IFT_GIF);
145 GIF2IFP(sc)->if_softc = sc;
146 if_initname(GIF2IFP(sc), gifname, unit);
148 GIF2IFP(sc)->if_addrlen = 0;
149 GIF2IFP(sc)->if_mtu = GIF_MTU;
150 GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
151 GIF2IFP(sc)->if_ioctl = gif_ioctl;
152 GIF2IFP(sc)->if_transmit = gif_transmit;
153 GIF2IFP(sc)->if_qflush = gif_qflush;
154 GIF2IFP(sc)->if_output = gif_output;
156 GIF2IFP(sc)->if_reassign = gif_reassign;
158 GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
159 GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
160 if_attach(GIF2IFP(sc));
161 bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
162 if (ng_gif_attach_p != NULL)
163 (*ng_gif_attach_p)(GIF2IFP(sc));
170 gif_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
171 char *unused __unused)
173 struct gif_softc *sc;
175 sx_xlock(&gif_ioctl_sx);
178 gif_delete_tunnel(sc);
179 sx_xunlock(&gif_ioctl_sx);
184 gif_clone_destroy(struct ifnet *ifp)
186 struct gif_softc *sc;
188 sx_xlock(&gif_ioctl_sx);
190 gif_delete_tunnel(sc);
191 if (ng_gif_detach_p != NULL)
192 (*ng_gif_detach_p)(ifp);
195 ifp->if_softc = NULL;
196 sx_xunlock(&gif_ioctl_sx);
204 vnet_gif_init(const void *unused __unused)
207 V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
208 gif_clone_destroy, 0);
216 VNET_SYSINIT(vnet_gif_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
217 vnet_gif_init, NULL);
220 vnet_gif_uninit(const void *unused __unused)
223 if_clone_detach(V_gif_cloner);
231 VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
232 vnet_gif_uninit, NULL);
235 gifmodevent(module_t mod, int type, void *data)
248 static moduledata_t gif_mod = {
254 DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
255 MODULE_VERSION(if_gif, 1);
260 struct gif_list *hash;
263 hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
265 for (i = 0; i < GIF_HASH_SIZE; i++)
266 CK_LIST_INIT(&hash[i]);
272 gif_hashdestroy(struct gif_list *hash)
278 #define MTAG_GIF 1080679712
280 gif_transmit(struct ifnet *ifp, struct mbuf *m)
282 struct gif_softc *sc;
283 struct etherip_header *eth;
297 error = mac_ifnet_check_transmit(ifp, m);
305 if ((ifp->if_flags & IFF_MONITOR) != 0 ||
306 (ifp->if_flags & IFF_UP) == 0 ||
307 (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
308 sc->gif_family == 0 ||
309 (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF,
310 V_max_gif_nesting)) != 0) {
314 /* Now pull back the af that we stashed in the csum_data. */
318 af = m->m_pkthdr.csum_data;
319 m->m_flags &= ~(M_BCAST|M_MCAST);
320 M_SETFIB(m, sc->gif_fibnum);
321 BPF_MTAP2(ifp, &af, sizeof(af), m);
322 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
323 if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
324 /* inner AF-specific encapsulation */
329 proto = IPPROTO_IPV4;
330 if (m->m_len < sizeof(struct ip))
331 m = m_pullup(m, sizeof(struct ip));
336 ip = mtod(m, struct ip *);
337 ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
338 ECN_NOCARE, &ecn, &ip->ip_tos);
343 proto = IPPROTO_IPV6;
344 if (m->m_len < sizeof(struct ip6_hdr))
345 m = m_pullup(m, sizeof(struct ip6_hdr));
351 ip6 = mtod(m, struct ip6_hdr *);
352 ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
353 ECN_NOCARE, &t, &ip6->ip6_flow);
354 ecn = (ntohl(t) >> 20) & 0xff;
358 proto = IPPROTO_ETHERIP;
359 M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
364 eth = mtod(m, struct etherip_header *);
366 eth->eip_ver = ETHERIP_VERSION;
370 error = EAFNOSUPPORT;
374 /* XXX should we check if our outer source is legal? */
375 /* dispatch to output logic based on outer AF */
376 switch (sc->gif_family) {
379 error = in_gif_output(ifp, m, proto, ecn);
384 error = in6_gif_output(ifp, m, proto, ecn);
392 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
397 gif_qflush(struct ifnet *ifp __unused)
403 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
408 KASSERT(ifp->if_bridge == NULL,
409 ("%s: unexpectedly called with bridge attached", __func__));
411 /* BPF writes need to be handled specially. */
412 if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
413 memcpy(&af, dst->sa_data, sizeof(af));
415 af = RO_GET_FAMILY(ro, dst);
417 * Now save the af in the inbound pkt csum data, this is a cheat since
418 * we are using the inbound csum_data field to carry the af over to
419 * the gif_transmit() routine, avoiding using yet another mtag.
421 m->m_pkthdr.csum_data = af;
422 return (ifp->if_transmit(ifp, m));
426 gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
428 struct etherip_header *eip;
436 struct ether_header *eh;
437 struct ifnet *oldifp;
447 m->m_pkthdr.rcvif = ifp;
453 if (m->m_len < sizeof(struct ip))
454 m = m_pullup(m, sizeof(struct ip));
457 ip = mtod(m, struct ip *);
458 if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
459 ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
468 if (m->m_len < sizeof(struct ip6_hdr))
469 m = m_pullup(m, sizeof(struct ip6_hdr));
472 t = htonl((uint32_t)ecn << 20);
473 ip6 = mtod(m, struct ip6_hdr *);
474 if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
475 ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
481 case IPPROTO_ETHERIP:
490 mac_ifnet_create_mbuf(ifp, m);
493 if (bpf_peers_present(ifp->if_bpf)) {
495 bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
498 if ((ifp->if_flags & IFF_MONITOR) != 0) {
499 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
500 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
505 if (ng_gif_input_p != NULL) {
506 (*ng_gif_input_p)(ifp, &m, af);
512 * Put the packet to the network layer input queue according to the
513 * specified address family.
514 * Note: older versions of gif_input directly called network layer
515 * input functions, e.g. ip6_input, here. We changed the policy to
516 * prevent too many recursive calls of such input functions, which
517 * might cause kernel panic. But the change may introduce another
518 * problem; if the input queue is full, packets are discarded.
519 * The kernel stack overflow really happened, and we believed
520 * queue-full rarely occurs, so we changed the policy.
534 n = sizeof(struct etherip_header) +
535 sizeof(struct ether_header);
540 eip = mtod(m, struct etherip_header *);
541 if (eip->eip_ver != ETHERIP_VERSION) {
542 /* discard unknown versions */
547 m_adj_decap(m, sizeof(struct etherip_header));
549 m->m_flags &= ~(M_BCAST|M_MCAST);
550 m->m_pkthdr.rcvif = ifp;
552 if (ifp->if_bridge) {
554 eh = mtod(m, struct ether_header *);
555 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
556 if (ETHER_IS_BROADCAST(eh->ether_dhost))
557 m->m_flags |= M_BCAST;
559 m->m_flags |= M_MCAST;
560 if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
562 BRIDGE_INPUT(ifp, m);
564 if (m != NULL && ifp != oldifp) {
566 * The bridge gave us back itself or one of the
567 * members for which the frame is addressed.
578 if (ng_gif_input_orphan_p != NULL)
579 (*ng_gif_input_orphan_p)(ifp, m, af);
585 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
586 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
587 M_SETFIB(m, ifp->if_fib);
588 netisr_dispatch(isr, m);
591 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
595 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
597 struct ifreq *ifr = (struct ifreq*)data;
598 struct gif_softc *sc;
604 ifp->if_flags |= IFF_UP;
611 if (ifr->ifr_mtu < GIF_MTU_MIN ||
612 ifr->ifr_mtu > GIF_MTU_MAX)
615 ifp->if_mtu = ifr->ifr_mtu;
618 sx_xlock(&gif_ioctl_sx);
627 if (sc->gif_family == 0)
629 gif_delete_tunnel(sc);
633 case SIOCGIFPSRCADDR:
634 case SIOCGIFPDSTADDR:
635 error = in_gif_ioctl(sc, cmd, data);
639 case SIOCSIFPHYADDR_IN6:
640 case SIOCGIFPSRCADDR_IN6:
641 case SIOCGIFPDSTADDR_IN6:
642 error = in6_gif_ioctl(sc, cmd, data);
646 ifr->ifr_fib = sc->gif_fibnum;
649 if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
651 if (ifr->ifr_fib >= rt_numfibs)
654 sc->gif_fibnum = ifr->ifr_fib;
657 options = sc->gif_options;
658 error = copyout(&options, ifr_data_get_ptr(ifr),
662 if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
664 error = copyin(ifr_data_get_ptr(ifr), &options,
668 if (options & ~GIF_OPTMASK) {
672 if (sc->gif_options != options) {
673 switch (sc->gif_family) {
676 error = in_gif_setopts(sc, options);
681 error = in6_gif_setopts(sc, options);
685 /* No need to invoke AF-handler */
686 sc->gif_options = options;
694 if (error == 0 && sc->gif_family != 0) {
697 cmd == SIOCSIFPHYADDR ||
700 cmd == SIOCSIFPHYADDR_IN6 ||
703 if_link_state_change(ifp, LINK_STATE_UP);
707 sx_xunlock(&gif_ioctl_sx);
712 gif_delete_tunnel(struct gif_softc *sc)
715 sx_assert(&gif_ioctl_sx, SA_XLOCKED);
716 if (sc->gif_family != 0) {
717 CK_LIST_REMOVE(sc, srchash);
718 CK_LIST_REMOVE(sc, chain);
719 /* Wait until it become safe to free gif_hdr */
721 free(sc->gif_hdr, M_GIF);
724 GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
725 if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);