1 /* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.13.2.18 2003/01/24 05:11:35 sam Exp $ */
2 /* $DragonFly: src/sys/netinet6/ip6_output.c,v 1.37 2008/09/04 09:08:22 hasso Exp $ */
3 /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Copyright (c) 1982, 1986, 1988, 1990, 1993
36 * The Regents of the University of California. All rights reserved.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
69 #include "opt_ip6fw.h"
71 #include "opt_inet6.h"
72 #include "opt_ipsec.h"
74 #include <sys/param.h>
75 #include <sys/malloc.h>
77 #include <sys/errno.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
86 #include <sys/thread2.h>
87 #include <sys/msgport2.h>
90 #include <net/route.h>
93 #include <netinet/in.h>
94 #include <netinet/in_var.h>
95 #include <netinet6/in6_var.h>
96 #include <netinet/ip6.h>
97 #include <netinet/icmp6.h>
98 #include <netinet6/ip6_var.h>
99 #include <netinet/in_pcb.h>
100 #include <netinet6/nd6.h>
101 #include <netinet6/ip6protosw.h>
104 #include <netinet6/ipsec.h>
106 #include <netinet6/ipsec6.h>
108 #include <netproto/key/key.h>
112 #include <netproto/ipsec/ipsec.h>
113 #include <netproto/ipsec/ipsec6.h>
114 #include <netproto/ipsec/key.h>
117 #include <net/ip6fw/ip6_fw.h>
119 #include <net/net_osdep.h>
121 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
124 struct mbuf *ip6e_ip6;
125 struct mbuf *ip6e_hbh;
126 struct mbuf *ip6e_dest1;
127 struct mbuf *ip6e_rthdr;
128 struct mbuf *ip6e_dest2;
131 static int ip6_pcbopt (int, u_char *, int, struct ip6_pktopts **, int);
132 static int ip6_setpktoption (int, u_char *, int, struct ip6_pktopts *,
134 static int ip6_pcbopts (struct ip6_pktopts **, struct mbuf *,
135 struct socket *, struct sockopt *);
136 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
137 static int ip6_setmoptions (int, struct ip6_moptions **, struct mbuf *);
138 static int ip6_getmoptions (int, struct ip6_moptions *, struct mbuf **);
139 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
140 struct ifnet *, struct in6_addr *, u_long *, int *);
141 static int copyexthdr (void *, struct mbuf **);
142 static int ip6_insertfraghdr (struct mbuf *, struct mbuf *, int,
144 static int ip6_insert_jumboopt (struct ip6_exthdrs *, u_int32_t);
145 static struct mbuf *ip6_splithdr (struct mbuf *);
146 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
149 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
150 * header (with pri, len, nxt, hlim, src, dst).
151 * This function may modify ver and hlim only.
152 * The mbuf chain containing the packet will be freed.
153 * The mbuf opt, if present, will not be freed.
155 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
156 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
157 * which is rt_rmx.rmx_mtu.
160 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
161 int flags, struct ip6_moptions *im6o,
162 struct ifnet **ifpp, /* XXX: just for statistics */
165 struct ip6_hdr *ip6, *mhip6;
166 struct ifnet *ifp, *origifp;
170 int hlen, tlen, len, off;
171 struct route_in6 ip6route;
172 struct sockaddr_in6 *dst;
174 struct in6_ifaddr *ia = NULL;
176 int alwaysfrag, dontfrag;
177 u_int32_t optlen, plen = 0, unfragpartlen;
178 struct ip6_exthdrs exthdrs;
179 struct in6_addr finaldst;
180 struct route_in6 *ro_pmtu = NULL;
181 boolean_t hdrsplit = FALSE;
182 boolean_t needipsec = FALSE;
184 boolean_t needipsectun = FALSE;
185 struct secpolicy *sp = NULL;
186 struct socket *so = inp ? inp->inp_socket : NULL;
188 ip6 = mtod(m, struct ip6_hdr *);
191 boolean_t needipsectun = FALSE;
192 struct secpolicy *sp = NULL;
194 ip6 = mtod(m, struct ip6_hdr *);
197 bzero(&exthdrs, sizeof exthdrs);
200 if ((error = copyexthdr(opt->ip6po_hbh, &exthdrs.ip6e_hbh)))
202 if ((error = copyexthdr(opt->ip6po_dest1, &exthdrs.ip6e_dest1)))
204 if ((error = copyexthdr(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr)))
206 if ((error = copyexthdr(opt->ip6po_dest2, &exthdrs.ip6e_dest2)))
211 /* get a security policy for this packet */
213 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
215 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
218 ipsec6stat.out_inval++;
225 switch (sp->policy) {
226 case IPSEC_POLICY_DISCARD:
228 * This packet is just discarded.
230 ipsec6stat.out_polvio++;
233 case IPSEC_POLICY_BYPASS:
234 case IPSEC_POLICY_NONE:
235 /* no need to do IPsec. */
239 case IPSEC_POLICY_IPSEC:
240 if (sp->req == NULL) {
241 error = key_spdacquire(sp); /* acquire a policy */
247 case IPSEC_POLICY_ENTRUST:
249 kprintf("ip6_output: Invalid policy found. %d\n", sp->policy);
253 /* get a security policy for this packet */
255 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
257 sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
260 newipsecstat.ips_out_inval++;
267 switch (sp->policy) {
268 case IPSEC_POLICY_DISCARD:
270 * This packet is just discarded.
272 newipsecstat.ips_out_polvio++;
275 case IPSEC_POLICY_BYPASS:
276 case IPSEC_POLICY_NONE:
277 /* no need to do IPsec. */
281 case IPSEC_POLICY_IPSEC:
282 if (sp->req == NULL) {
283 error = key_spdacquire(sp); /* acquire a policy */
289 case IPSEC_POLICY_ENTRUST:
291 kprintf("ip6_output: Invalid policy found. %d\n", sp->policy);
293 #endif /* FAST_IPSEC */
296 * Calculate the total length of the extension header chain.
297 * Keep the length of the unfragmentable part for fragmentation.
299 optlen = m_lengthm(exthdrs.ip6e_hbh, NULL) +
300 m_lengthm(exthdrs.ip6e_dest1, NULL) +
301 m_lengthm(exthdrs.ip6e_rthdr, NULL);
303 unfragpartlen = optlen + sizeof(struct ip6_hdr);
305 /* NOTE: we don't add AH/ESP length here. do that later. */
306 optlen += m_lengthm(exthdrs.ip6e_dest2, NULL);
309 * If we need IPsec, or there is at least one extension header,
310 * separate IP6 header from the payload.
312 if ((needipsec || optlen) && !hdrsplit) {
313 exthdrs.ip6e_ip6 = ip6_splithdr(m);
314 if (exthdrs.ip6e_ip6 == NULL) {
318 m = exthdrs.ip6e_ip6;
323 ip6 = mtod(m, struct ip6_hdr *);
325 /* adjust mbuf packet header length */
326 m->m_pkthdr.len += optlen;
327 plen = m->m_pkthdr.len - sizeof(*ip6);
329 /* If this is a jumbo payload, insert a jumbo payload option. */
330 if (plen > IPV6_MAXPACKET) {
332 exthdrs.ip6e_ip6 = ip6_splithdr(m);
333 if (exthdrs.ip6e_ip6 == NULL) {
337 m = exthdrs.ip6e_ip6;
341 ip6 = mtod(m, struct ip6_hdr *);
342 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
346 ip6->ip6_plen = htons(plen);
349 * Concatenate headers and fill in next header fields.
350 * Here we have, on "m"
352 * and we insert headers accordingly. Finally, we should be getting:
353 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
355 * during the header composing process, "m" points to IPv6 header.
356 * "mprev" points to an extension header prior to esp.
359 nexthdrp = &ip6->ip6_nxt;
363 * we treat dest2 specially. this makes IPsec processing
364 * much easier. the goal here is to make mprev point the
365 * mbuf prior to dest2.
367 * result: IPv6 dest2 payload
368 * m and mprev will point to IPv6 header.
370 if (exthdrs.ip6e_dest2) {
372 panic("assumption failed: hdr not split");
373 exthdrs.ip6e_dest2->m_next = m->m_next;
374 m->m_next = exthdrs.ip6e_dest2;
375 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
376 ip6->ip6_nxt = IPPROTO_DSTOPTS;
380 * Place m1 after mprev.
382 #define MAKE_CHAIN(m1, mprev, nexthdrp, i)\
386 panic("assumption failed: hdr not split");\
387 *mtod(m1, u_char *) = *nexthdrp;\
389 nexthdrp = mtod(m1, u_char *);\
390 m1->m_next = mprev->m_next;\
397 * result: IPv6 hbh dest1 rthdr dest2 payload
398 * m will point to IPv6 header. mprev will point to the
399 * extension header prior to dest2 (rthdr in the above case).
401 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
402 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
403 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
405 #if defined(IPSEC) || defined(FAST_IPSEC)
407 struct ipsec_output_state state;
409 struct ip6_rthdr *rh = NULL;
412 * pointers after IPsec headers are not valid any more.
413 * other pointers need a great care too.
414 * (IPsec routines should not mangle mbufs prior to AH/ESP)
416 exthdrs.ip6e_dest2 = NULL;
418 if (exthdrs.ip6e_rthdr) {
419 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
420 segleft_org = rh->ip6r_segleft;
421 rh->ip6r_segleft = 0;
424 bzero(&state, sizeof state);
426 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
430 /* mbuf is already reclaimed in ipsec6_output_trans. */
440 kprintf("ip6_output (ipsec): error code %d\n",
444 /* don't show these error codes to the user */
450 if (exthdrs.ip6e_rthdr) {
451 /* ah6_output doesn't modify mbuf chain */
452 rh->ip6r_segleft = segleft_org;
458 * If there is a routing header, replace destination address field
459 * with the first hop of the routing header.
461 if (exthdrs.ip6e_rthdr) {
462 struct ip6_rthdr *rh;
464 finaldst = ip6->ip6_dst;
465 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
466 switch (rh->ip6r_type) {
467 default: /* is it possible? */
473 /* Source address validation */
474 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
475 !(flags & IPV6_DADOUTPUT)) {
477 ip6stat.ip6s_badscope++;
480 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
482 ip6stat.ip6s_badscope++;
486 ip6stat.ip6s_localout++;
493 bzero(ro, sizeof(*ro));
496 if (opt && opt->ip6po_rthdr)
497 ro = &opt->ip6po_route;
498 dst = (struct sockaddr_in6 *)&ro->ro_dst;
500 * If there is a cached route,
501 * check that it is to the same destination
502 * and is still up. If not, free it and try again.
504 if (ro->ro_rt != NULL &&
505 (!(ro->ro_rt->rt_flags & RTF_UP) || dst->sin6_family != AF_INET6 ||
506 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
510 if (ro->ro_rt == NULL) {
511 bzero(dst, sizeof(*dst));
512 dst->sin6_family = AF_INET6;
513 dst->sin6_len = sizeof(struct sockaddr_in6);
514 dst->sin6_addr = ip6->ip6_dst;
516 #if defined(IPSEC) || defined(FAST_IPSEC)
517 if (needipsec && needipsectun) {
518 struct ipsec_output_state state;
521 * All the extension headers will become inaccessible
522 * (since they can be encrypted).
523 * Don't panic, we need no more updates to extension headers
524 * on inner IPv6 packet (since they are now encapsulated).
526 * IPv6 [ESP|AH] IPv6 [extension headers] payload
528 bzero(&exthdrs, sizeof(exthdrs));
529 exthdrs.ip6e_ip6 = m;
531 bzero(&state, sizeof(state));
533 state.ro = (struct route *)ro;
534 state.dst = (struct sockaddr *)dst;
536 error = ipsec6_output_tunnel(&state, sp, flags);
539 ro = (struct route_in6 *)state.ro;
540 dst = (struct sockaddr_in6 *)state.dst;
542 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
553 kprintf("ip6_output (ipsec): error code %d\n", error);
556 /* don't show these error codes to the user */
563 exthdrs.ip6e_ip6 = m;
567 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
570 #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
571 #define sin6tosa(sin6) ((struct sockaddr *)(sin6))
573 * interface selection comes here
574 * if an interface is specified from an upper layer,
577 if (ro->ro_rt == NULL) {
579 * non-bsdi always clone routes, if parent is
582 rtalloc((struct route *)ro);
584 if (ro->ro_rt == NULL) {
585 ip6stat.ip6s_noroute++;
586 error = EHOSTUNREACH;
587 /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
590 ia = ifatoia6(ro->ro_rt->rt_ifa);
591 ifp = ro->ro_rt->rt_ifp;
593 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
594 dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
595 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
597 in6_ifstat_inc(ifp, ifs6_out_request);
600 * Check if the outgoing interface conflicts with
601 * the interface specified by ifi6_ifindex (if specified).
602 * Note that loopback interface is always okay.
603 * (this may happen when we are sending a packet to one of
604 * our own addresses.)
606 if (opt && opt->ip6po_pktinfo
607 && opt->ip6po_pktinfo->ipi6_ifindex) {
608 if (!(ifp->if_flags & IFF_LOOPBACK)
609 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
610 ip6stat.ip6s_noroute++;
611 in6_ifstat_inc(ifp, ifs6_out_discard);
612 error = EHOSTUNREACH;
617 if (opt && opt->ip6po_hlim != -1)
618 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
621 struct in6_multi *in6m;
623 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
626 * See if the caller provided any multicast options
630 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
631 if (im6o->im6o_multicast_ifp != NULL)
632 ifp = im6o->im6o_multicast_ifp;
634 ip6->ip6_hlim = ip6_defmcasthlim;
637 * See if the caller provided the outgoing interface
638 * as an ancillary data.
639 * Boundary check for ifindex is assumed to be already done.
641 if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
642 ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
645 * If the destination is a node-local scope multicast,
646 * the packet should be loop-backed only.
648 if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
650 * If the outgoing interface is already specified,
651 * it should be a loopback interface.
653 if (ifp && !(ifp->if_flags & IFF_LOOPBACK)) {
654 ip6stat.ip6s_badscope++;
655 error = ENETUNREACH; /* XXX: better error? */
656 /* XXX correct ifp? */
657 in6_ifstat_inc(ifp, ifs6_out_discard);
664 if (opt && opt->ip6po_hlim != -1)
665 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
668 * If caller did not provide an interface lookup a
669 * default in the routing table. This is either a
670 * default for the speicfied group (i.e. a host
671 * route), or a multicast default (a route for the
675 if (ro->ro_rt == NULL) {
677 rtpurelookup((struct sockaddr *)&ro->ro_dst);
679 if (ro->ro_rt == NULL) {
680 ip6stat.ip6s_noroute++;
681 error = EHOSTUNREACH;
682 /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
685 ia = ifatoia6(ro->ro_rt->rt_ifa);
686 ifp = ro->ro_rt->rt_ifp;
690 if (!(flags & IPV6_FORWARDING))
691 in6_ifstat_inc(ifp, ifs6_out_request);
692 in6_ifstat_inc(ifp, ifs6_out_mcast);
695 * Confirm that the outgoing interface supports multicast.
697 if (!(ifp->if_flags & IFF_MULTICAST)) {
698 ip6stat.ip6s_noroute++;
699 in6_ifstat_inc(ifp, ifs6_out_discard);
703 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
705 (im6o == NULL || im6o->im6o_multicast_loop)) {
707 * If we belong to the destination multicast group
708 * on the outgoing interface, and the caller did not
709 * forbid loopback, loop back a copy.
711 ip6_mloopback(ifp, m, dst);
714 * If we are acting as a multicast router, perform
715 * multicast forwarding as if the packet had just
716 * arrived on the interface to which we are about
717 * to send. The multicast forwarding function
718 * recursively calls this function, using the
719 * IPV6_FORWARDING flag to prevent infinite recursion.
721 * Multicasts that are looped back by ip6_mloopback(),
722 * above, will be forwarded by the ip6_input() routine,
725 if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
726 if (ip6_mforward(ip6, ifp, m) != 0) {
733 * Multicasts with a hoplimit of zero may be looped back,
734 * above, but must not be transmitted on a network.
735 * Also, multicasts addressed to the loopback interface
736 * are not sent -- the above call to ip6_mloopback() will
737 * loop back a copy if this host actually belongs to the
738 * destination group on the loopback interface.
740 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
747 * Fill the outgoing inteface to tell the upper layer
748 * to increment per-interface statistics.
753 /* Determine path MTU. */
754 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
759 * The caller of this function may specify to use the minimum MTU
761 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
762 * setting. The logic is a bit complicated; by default, unicast
763 * packets will follow path MTU while multicast packets will be sent at
764 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
765 * including unicast ones will be sent at the minimum MTU. Multicast
766 * packets will always be sent at the minimum MTU unless
767 * IP6PO_MINMTU_DISABLE is explicitly specified.
768 * See RFC 3542 for more details.
770 if (mtu > IPV6_MMTU) {
771 if ((flags & IPV6_MINMTU))
773 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
775 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
777 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
782 /* Fake scoped addresses */
783 if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
785 * If source or destination address is a scoped address, and
786 * the packet is going to be sent to a loopback interface,
787 * we should keep the original interface.
791 * XXX: this is a very experimental and temporary solution.
792 * We eventually have sockaddr_in6 and use the sin6_scope_id
793 * field of the structure here.
794 * We rely on the consistency between two scope zone ids
795 * of source and destination, which should already be assured.
796 * Larger scopes than link will be supported in the future.
799 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
800 origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
801 else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
802 origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
804 * XXX: origifp can be NULL even in those two cases above.
805 * For example, if we remove the (only) link-local address
806 * from the loopback interface, and try to send a link-local
807 * address without link-id information. Then the source
808 * address is ::1, and the destination address is the
809 * link-local address with its s6_addr16[1] being zero.
810 * What is worse, if the packet goes to the loopback interface
811 * by a default rejected route, the null pointer would be
812 * passed to looutput, and the kernel would hang.
813 * The following last resort would prevent such disaster.
821 * clear embedded scope identifiers if necessary.
822 * in6_clearscope will touch the addresses only when necessary.
824 in6_clearscope(&ip6->ip6_src);
825 in6_clearscope(&ip6->ip6_dst);
828 * Check with the firewall...
830 if (ip6_fw_enable && ip6_fw_chk_ptr) {
833 m->m_pkthdr.rcvif = NULL; /* XXX */
834 /* If ipfw says divert, we have to just drop packet */
835 if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
846 * If the outgoing packet contains a hop-by-hop options header,
847 * it must be examined and processed even by the source node.
848 * (RFC 2460, section 4.)
850 if (exthdrs.ip6e_hbh) {
851 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
852 u_int32_t dummy1; /* XXX unused */
853 u_int32_t dummy2; /* XXX unused */
856 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
857 panic("ip6e_hbh is not continuous");
860 * XXX: if we have to send an ICMPv6 error to the sender,
861 * we need the M_LOOP flag since icmp6_error() expects
862 * the IPv6 and the hop-by-hop options header are
863 * continuous unless the flag is set.
865 m->m_flags |= M_LOOP;
866 m->m_pkthdr.rcvif = ifp;
867 if (ip6_process_hopopts(m,
868 (u_int8_t *)(hbh + 1),
869 ((hbh->ip6h_len + 1) << 3) -
870 sizeof(struct ip6_hbh),
871 &dummy1, &dummy2) < 0) {
872 /* m was already freed at this point */
873 error = EINVAL;/* better error? */
876 m->m_flags &= ~M_LOOP; /* XXX */
877 m->m_pkthdr.rcvif = NULL;
881 * Run through list of hooks for output packets.
883 if (pfil_has_hooks(&inet6_pfil_hook)) {
884 error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
885 if (error != 0 || m == NULL)
887 ip6 = mtod(m, struct ip6_hdr *);
891 * Send the packet to the outgoing interface.
892 * If necessary, do IPv6 fragmentation before sending.
894 * the logic here is rather complex:
895 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
896 * 1-a: send as is if tlen <= path mtu
897 * 1-b: fragment if tlen > path mtu
899 * 2: if user asks us not to fragment (dontfrag == 1)
900 * 2-a: send as is if tlen <= interface mtu
901 * 2-b: error if tlen > interface mtu
903 * 3: if we always need to attach fragment header (alwaysfrag == 1)
906 * 4: if dontfrag == 1 && alwaysfrag == 1
907 * error, as we cannot handle this conflicting request
909 tlen = m->m_pkthdr.len;
911 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
915 if (dontfrag && alwaysfrag) { /* case 4 */
916 /* conflicting request - can't transmit */
920 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
922 * Even if the DONTFRAG option is specified, we cannot send the
923 * packet when the data length is larger than the MTU of the
924 * outgoing interface.
925 * Notify the error by sending IPV6_PATHMTU ancillary data as
926 * well as returning an error code (the latter is not described
930 struct ip6ctlparam ip6cp;
932 mtu32 = (u_int32_t)mtu;
933 bzero(&ip6cp, sizeof(ip6cp));
934 ip6cp.ip6c_cmdarg = (void *)&mtu32;
935 kpfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
943 * transmit packet without fragmentation
945 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
946 struct in6_ifaddr *ia6;
948 ip6 = mtod(m, struct ip6_hdr *);
949 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
951 /* Record statistics for this interface address. */
952 ia6->ia_ifa.if_opackets++;
953 ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
956 /* clean ipsec history once it goes out of the node */
959 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
964 * try to fragment the packet. case 1-b and 3
966 if (mtu < IPV6_MMTU) {
968 * note that path MTU is never less than IPV6_MMTU
972 in6_ifstat_inc(ifp, ifs6_out_fragfail);
974 } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
976 in6_ifstat_inc(ifp, ifs6_out_fragfail);
979 struct mbuf **mnext, *m_frgpart;
980 struct ip6_frag *ip6f;
981 u_int32_t id = htonl(ip6_id++);
982 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
986 * Too large for the destination or interface;
987 * fragment if possible.
988 * Must be able to put at least 8 bytes per fragment.
990 hlen = unfragpartlen;
991 if (mtu > IPV6_MAXPACKET)
992 mtu = IPV6_MAXPACKET;
994 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
997 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1002 * Verify that we have any chance at all of being able to queue
1003 * the packet or packet fragments
1005 if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1006 < tlen /* - hlen */)) {
1008 ip6stat.ip6s_odropped++;
1012 mnext = &m->m_nextpkt;
1015 * Change the next header field of the last header in the
1016 * unfragmentable part.
1018 if (exthdrs.ip6e_rthdr) {
1019 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1020 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1021 } else if (exthdrs.ip6e_dest1) {
1022 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1023 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1024 } else if (exthdrs.ip6e_hbh) {
1025 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1026 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1028 nextproto = ip6->ip6_nxt;
1029 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1033 * Loop through length of segment after first fragment,
1034 * make new header and copy data of each part and link onto
1038 for (off = hlen; off < tlen; off += len) {
1039 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1042 ip6stat.ip6s_odropped++;
1045 m->m_pkthdr.rcvif = NULL;
1046 m->m_flags = m0->m_flags & M_COPYFLAGS;
1048 mnext = &m->m_nextpkt;
1049 m->m_data += max_linkhdr;
1050 mhip6 = mtod(m, struct ip6_hdr *);
1052 m->m_len = sizeof(*mhip6);
1053 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1055 ip6stat.ip6s_odropped++;
1058 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1059 if (off + len >= tlen)
1062 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1063 mhip6->ip6_plen = htons((u_short)(len + hlen +
1065 sizeof(struct ip6_hdr)));
1066 if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1068 ip6stat.ip6s_odropped++;
1071 m_cat(m, m_frgpart);
1072 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1073 m->m_pkthdr.rcvif = NULL;
1074 ip6f->ip6f_reserved = 0;
1075 ip6f->ip6f_ident = id;
1076 ip6f->ip6f_nxt = nextproto;
1077 ip6stat.ip6s_ofragments++;
1078 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1081 in6_ifstat_inc(ifp, ifs6_out_fragok);
1085 * Remove leading garbages.
1089 m0->m_nextpkt = NULL;
1091 for (m0 = m; m; m = m0) {
1093 m->m_nextpkt = NULL;
1095 /* Record statistics for this interface address. */
1097 ia->ia_ifa.if_opackets++;
1098 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1101 /* clean ipsec history once it goes out of the node */
1104 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1110 ip6stat.ip6s_fragmented++;
1113 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1115 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1116 RTFREE(ro_pmtu->ro_rt);
1131 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1132 m_freem(exthdrs.ip6e_dest1);
1133 m_freem(exthdrs.ip6e_rthdr);
1134 m_freem(exthdrs.ip6e_dest2);
1142 copyexthdr(void *h, struct mbuf **mp)
1144 struct ip6_ext *hdr = h;
1151 hlen = (hdr->ip6e_len + 1) * 8;
1152 if (hlen > MCLBYTES)
1153 return ENOBUFS; /* XXX */
1155 m = m_getb(hlen, MB_DONTWAIT, MT_DATA, 0);
1160 bcopy(hdr, mtod(m, caddr_t), hlen);
1167 * Insert jumbo payload option.
1170 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1176 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1179 * If there is no hop-by-hop options header, allocate new one.
1180 * If there is one but it doesn't have enough space to store the
1181 * jumbo payload option, allocate a cluster to store the whole options.
1182 * Otherwise, use it to store the options.
1184 if (exthdrs->ip6e_hbh == NULL) {
1185 MGET(mopt, MB_DONTWAIT, MT_DATA);
1188 mopt->m_len = JUMBOOPTLEN;
1189 optbuf = mtod(mopt, u_char *);
1190 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1191 exthdrs->ip6e_hbh = mopt;
1193 struct ip6_hbh *hbh;
1195 mopt = exthdrs->ip6e_hbh;
1196 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1199 * - exthdrs->ip6e_hbh is not referenced from places
1200 * other than exthdrs.
1201 * - exthdrs->ip6e_hbh is not an mbuf chain.
1203 int oldoptlen = mopt->m_len;
1207 * XXX: give up if the whole (new) hbh header does
1208 * not fit even in an mbuf cluster.
1210 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1214 * As a consequence, we must always prepare a cluster
1217 n = m_getcl(MB_DONTWAIT, MT_DATA, 0);
1220 n->m_len = oldoptlen + JUMBOOPTLEN;
1221 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen);
1222 optbuf = mtod(n, caddr_t) + oldoptlen;
1224 mopt = exthdrs->ip6e_hbh = n;
1226 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1227 mopt->m_len += JUMBOOPTLEN;
1229 optbuf[0] = IP6OPT_PADN;
1233 * Adjust the header length according to the pad and
1234 * the jumbo payload option.
1236 hbh = mtod(mopt, struct ip6_hbh *);
1237 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1240 /* fill in the option. */
1241 optbuf[2] = IP6OPT_JUMBO;
1243 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1244 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1246 /* finally, adjust the packet header length */
1247 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1254 * Insert fragment header and copy unfragmentable header portions.
1257 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1258 struct ip6_frag **frghdrp)
1260 struct mbuf *n, *mlast;
1262 if (hlen > sizeof(struct ip6_hdr)) {
1263 n = m_copym(m0, sizeof(struct ip6_hdr),
1264 hlen - sizeof(struct ip6_hdr), MB_DONTWAIT);
1271 /* Search for the last mbuf of unfragmentable part. */
1272 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1275 if (!(mlast->m_flags & M_EXT) &&
1276 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1277 /* use the trailing space of the last mbuf for the fragment hdr */
1279 (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1280 mlast->m_len += sizeof(struct ip6_frag);
1281 m->m_pkthdr.len += sizeof(struct ip6_frag);
1283 /* allocate a new mbuf for the fragment header */
1286 MGET(mfrg, MB_DONTWAIT, MT_DATA);
1289 mfrg->m_len = sizeof(struct ip6_frag);
1290 *frghdrp = mtod(mfrg, struct ip6_frag *);
1291 mlast->m_next = mfrg;
1298 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1299 struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1306 if (ro_pmtu != ro) {
1307 /* The first hop and the final destination may differ. */
1308 struct sockaddr_in6 *sa6_dst =
1309 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1310 if (ro_pmtu->ro_rt &&
1311 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1312 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1313 RTFREE(ro_pmtu->ro_rt);
1314 ro_pmtu->ro_rt = NULL;
1316 if (ro_pmtu->ro_rt == NULL) {
1317 bzero(sa6_dst, sizeof(*sa6_dst));
1318 sa6_dst->sin6_family = AF_INET6;
1319 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1320 sa6_dst->sin6_addr = *dst;
1322 rtalloc((struct route *)ro_pmtu);
1325 if (ro_pmtu->ro_rt) {
1327 struct in_conninfo inc;
1329 bzero(&inc, sizeof(inc));
1330 inc.inc_flags = 1; /* IPv6 */
1331 inc.inc6_faddr = *dst;
1334 ifp = ro_pmtu->ro_rt->rt_ifp;
1335 ifmtu = IN6_LINKMTU(ifp);
1336 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1339 else if (mtu < IPV6_MMTU) {
1341 * RFC2460 section 5, last paragraph:
1342 * if we record ICMPv6 too big message with
1343 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1344 * or smaller, with framgent header attached.
1345 * (fragment header is needed regardless from the
1346 * packet size, for translators to identify packets)
1350 } else if (mtu > ifmtu) {
1352 * The MTU on the route is larger than the MTU on
1353 * the interface! This shouldn't happen, unless the
1354 * MTU of the interface has been changed after the
1355 * interface was brought up. Change the MTU in the
1356 * route to match the interface MTU (as long as the
1357 * field isn't locked).
1360 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1363 mtu = IN6_LINKMTU(ifp);
1365 error = EHOSTUNREACH; /* XXX */
1369 *alwaysfragp = alwaysfrag;
1374 * IP6 socket option processing.
1377 ip6_ctloutput_dispatch(netmsg_t msg)
1381 error = ip6_ctloutput(msg->ctloutput.base.nm_so,
1382 msg->ctloutput.nm_sopt);
1383 lwkt_replymsg(&msg->ctloutput.base.lmsg, error);
1387 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1389 int optdatalen,uproto;
1391 struct inpcb *in6p = so->so_pcb;
1394 int level, op, optname;
1399 level = sopt->sopt_level;
1400 op = sopt->sopt_dir;
1401 optname = sopt->sopt_name;
1402 optlen = sopt->sopt_valsize;
1405 panic("ip6_ctloutput: arg soopt is NULL");
1411 uproto = (int)so->so_proto->pr_protocol;
1412 privileged = (td == NULL || priv_check(td, PRIV_ROOT)) ? 0 : 1;
1414 if (level == IPPROTO_IPV6) {
1419 case IPV6_2292PKTOPTIONS:
1420 #ifdef IPV6_PKTOPTIONS
1421 case IPV6_PKTOPTIONS:
1426 error = soopt_getm(sopt, &m); /* XXX */
1429 soopt_to_mbuf(sopt, m); /* XXX */
1430 error = ip6_pcbopts(&in6p->in6p_outputopts,
1432 m_freem(m); /* XXX */
1437 * Use of some Hop-by-Hop options or some
1438 * Destination options, might require special
1439 * privilege. That is, normal applications
1440 * (without special privilege) might be forbidden
1441 * from setting certain options in outgoing packets,
1442 * and might never see certain options in received
1443 * packets. [RFC 2292 Section 6]
1444 * KAME specific note:
1445 * KAME prevents non-privileged users from sending or
1446 * receiving ANY hbh/dst options in order to avoid
1447 * overhead of parsing options in the kernel.
1449 case IPV6_RECVHOPOPTS:
1450 case IPV6_RECVDSTOPTS:
1451 case IPV6_RECVRTHDRDSTOPTS:
1454 case IPV6_RECVPKTINFO:
1455 case IPV6_RECVHOPLIMIT:
1456 case IPV6_RECVRTHDR:
1457 case IPV6_RECVPATHMTU:
1458 case IPV6_RECVTCLASS:
1459 case IPV6_AUTOFLOWLABEL:
1462 case IPV6_UNICAST_HOPS:
1466 if (optlen != sizeof(int)) {
1470 error = soopt_to_kbuf(sopt, &optval,
1471 sizeof optval, sizeof optval);
1476 case IPV6_UNICAST_HOPS:
1477 if (optval < -1 || optval >= 256)
1480 /* -1 = kernel default */
1481 in6p->in6p_hops = optval;
1483 if ((in6p->in6p_vflag &
1485 in6p->inp_ip_ttl = optval;
1488 #define OPTSET(bit) \
1491 in6p->in6p_flags |= (bit); \
1493 in6p->in6p_flags &= ~(bit); \
1495 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1497 * Although changed to RFC3542, It's better to also support RFC2292 API
1499 #define OPTSET2292(bit) \
1501 in6p->in6p_flags |= IN6P_RFC2292; \
1503 in6p->in6p_flags |= (bit); \
1505 in6p->in6p_flags &= ~(bit); \
1506 } while (/*CONSTCOND*/ 0)
1508 case IPV6_RECVPKTINFO:
1509 /* cannot mix with RFC2292 */
1510 if (OPTBIT(IN6P_RFC2292)) {
1514 OPTSET(IN6P_PKTINFO);
1519 struct ip6_pktopts **optp;
1521 /* cannot mix with RFC2292 */
1522 if (OPTBIT(IN6P_RFC2292)) {
1526 optp = &in6p->in6p_outputopts;
1527 error = ip6_pcbopt(IPV6_HOPLIMIT,
1528 (u_char *)&optval, sizeof(optval),
1533 case IPV6_RECVHOPLIMIT:
1534 /* cannot mix with RFC2292 */
1535 if (OPTBIT(IN6P_RFC2292)) {
1539 OPTSET(IN6P_HOPLIMIT);
1542 case IPV6_RECVHOPOPTS:
1543 /* cannot mix with RFC2292 */
1544 if (OPTBIT(IN6P_RFC2292)) {
1548 OPTSET(IN6P_HOPOPTS);
1551 case IPV6_RECVDSTOPTS:
1552 /* cannot mix with RFC2292 */
1553 if (OPTBIT(IN6P_RFC2292)) {
1557 OPTSET(IN6P_DSTOPTS);
1560 case IPV6_RECVRTHDRDSTOPTS:
1561 /* cannot mix with RFC2292 */
1562 if (OPTBIT(IN6P_RFC2292)) {
1566 OPTSET(IN6P_RTHDRDSTOPTS);
1569 case IPV6_RECVRTHDR:
1570 /* cannot mix with RFC2292 */
1571 if (OPTBIT(IN6P_RFC2292)) {
1578 case IPV6_RECVPATHMTU:
1580 * We ignore this option for TCP
1582 * (RFC3542 leaves this case
1585 if (uproto != IPPROTO_TCP)
1589 case IPV6_RECVTCLASS:
1590 /* cannot mix with RFC2292 XXX */
1591 if (OPTBIT(IN6P_RFC2292)) {
1595 OPTSET(IN6P_TCLASS);
1598 case IPV6_AUTOFLOWLABEL:
1599 OPTSET(IN6P_AUTOFLOWLABEL);
1608 * make setsockopt(IPV6_V6ONLY)
1609 * available only prior to bind(2).
1611 if (in6p->in6p_lport ||
1612 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
1617 OPTSET(IN6P_IPV6_V6ONLY);
1619 in6p->in6p_vflag &= ~INP_IPV4;
1621 in6p->in6p_vflag |= INP_IPV4;
1628 case IPV6_USE_MIN_MTU:
1629 case IPV6_PREFER_TEMPADDR:
1630 if (optlen != sizeof(optval)) {
1634 error = soopt_to_kbuf(sopt, &optval,
1635 sizeof optval, sizeof optval);
1639 struct ip6_pktopts **optp;
1640 optp = &in6p->in6p_outputopts;
1641 error = ip6_pcbopt(optname,
1642 (u_char *)&optval, sizeof(optval),
1647 case IPV6_2292PKTINFO:
1648 case IPV6_2292HOPLIMIT:
1649 case IPV6_2292HOPOPTS:
1650 case IPV6_2292DSTOPTS:
1651 case IPV6_2292RTHDR:
1653 if (optlen != sizeof(int)) {
1657 error = soopt_to_kbuf(sopt, &optval,
1658 sizeof optval, sizeof optval);
1662 case IPV6_2292PKTINFO:
1663 OPTSET2292(IN6P_PKTINFO);
1665 case IPV6_2292HOPLIMIT:
1666 OPTSET2292(IN6P_HOPLIMIT);
1668 case IPV6_2292HOPOPTS:
1670 * Check super-user privilege.
1671 * See comments for IPV6_RECVHOPOPTS.
1675 OPTSET2292(IN6P_HOPOPTS);
1677 case IPV6_2292DSTOPTS:
1680 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1682 case IPV6_2292RTHDR:
1683 OPTSET2292(IN6P_RTHDR);
1692 case IPV6_RTHDRDSTOPTS:
1696 * New advanced API (RFC3542)
1699 u_char optbuf_storage[MCLBYTES];
1701 struct ip6_pktopts **optp;
1703 /* cannot mix with RFC2292 */
1704 if (OPTBIT(IN6P_RFC2292)) {
1710 * We only ensure valsize is not too large
1711 * here. Further validation will be done
1714 error = soopt_to_kbuf(sopt, optbuf_storage,
1715 sizeof(optbuf_storage), 0);
1718 optlen = sopt->sopt_valsize;
1719 optbuf = optbuf_storage;
1720 optp = &in6p->in6p_outputopts;
1721 error = ip6_pcbopt(optname, optbuf, optlen,
1727 case IPV6_MULTICAST_IF:
1728 case IPV6_MULTICAST_HOPS:
1729 case IPV6_MULTICAST_LOOP:
1730 case IPV6_JOIN_GROUP:
1731 case IPV6_LEAVE_GROUP:
1734 if (sopt->sopt_valsize > MLEN) {
1739 MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER);
1744 m->m_len = sopt->sopt_valsize;
1745 error = soopt_to_kbuf(sopt, mtod(m, char *),
1746 m->m_len, m->m_len);
1747 error = ip6_setmoptions(sopt->sopt_name,
1748 &in6p->in6p_moptions,
1754 case IPV6_PORTRANGE:
1755 error = soopt_to_kbuf(sopt, &optval,
1756 sizeof optval, sizeof optval);
1761 case IPV6_PORTRANGE_DEFAULT:
1762 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1763 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1766 case IPV6_PORTRANGE_HIGH:
1767 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1768 in6p->in6p_flags |= IN6P_HIGHPORT;
1771 case IPV6_PORTRANGE_LOW:
1772 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1773 in6p->in6p_flags |= IN6P_LOWPORT;
1782 #if defined(IPSEC) || defined(FAST_IPSEC)
1783 case IPV6_IPSEC_POLICY:
1789 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1791 soopt_to_mbuf(sopt, m); /* XXX */
1793 req = mtod(m, caddr_t);
1796 error = ipsec6_set_policy(in6p, optname, req,
1801 #endif /* KAME IPSEC */
1809 struct mbuf **mp = &m;
1811 if (ip6_fw_ctl_ptr == NULL)
1814 if ((error = soopt_getm(sopt, &m)) != 0)
1817 soopt_to_mbuf(sopt, m);
1818 error = (*ip6_fw_ctl_ptr)(optname, mp);
1824 error = ENOPROTOOPT;
1831 case IPV6_2292PKTOPTIONS:
1832 #ifdef IPV6_PKTOPTIONS
1833 case IPV6_PKTOPTIONS:
1836 * RFC3542 (effectively) deprecated the
1837 * semantics of the 2292-style pktoptions.
1838 * Since it was not reliable in nature (i.e.,
1839 * applications had to expect the lack of some
1840 * information after all), it would make sense
1841 * to simplify this part by always returning
1844 if (in6p->in6p_options) {
1846 m = m_copym(in6p->in6p_options,
1847 0, M_COPYALL, MB_WAIT);
1848 error = soopt_from_mbuf(sopt, m);
1852 sopt->sopt_valsize = 0;
1855 case IPV6_RECVHOPOPTS:
1856 case IPV6_RECVDSTOPTS:
1857 case IPV6_RECVRTHDRDSTOPTS:
1858 case IPV6_UNICAST_HOPS:
1859 case IPV6_RECVPKTINFO:
1860 case IPV6_RECVHOPLIMIT:
1861 case IPV6_RECVRTHDR:
1862 case IPV6_RECVPATHMTU:
1863 case IPV6_RECVTCLASS:
1864 case IPV6_AUTOFLOWLABEL:
1867 case IPV6_PORTRANGE:
1870 case IPV6_RECVHOPOPTS:
1871 optval = OPTBIT(IN6P_HOPOPTS);
1874 case IPV6_RECVDSTOPTS:
1875 optval = OPTBIT(IN6P_DSTOPTS);
1878 case IPV6_RECVRTHDRDSTOPTS:
1879 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1882 case IPV6_RECVPKTINFO:
1883 optval = OPTBIT(IN6P_PKTINFO);
1886 case IPV6_RECVHOPLIMIT:
1887 optval = OPTBIT(IN6P_HOPLIMIT);
1890 case IPV6_RECVRTHDR:
1891 optval = OPTBIT(IN6P_RTHDR);
1894 case IPV6_RECVPATHMTU:
1895 optval = OPTBIT(IN6P_MTU);
1898 case IPV6_RECVTCLASS:
1899 optval = OPTBIT(IN6P_TCLASS);
1902 case IPV6_AUTOFLOWLABEL:
1903 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1907 case IPV6_UNICAST_HOPS:
1908 optval = in6p->in6p_hops;
1912 optval = OPTBIT(IN6P_FAITH);
1916 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1919 case IPV6_PORTRANGE:
1922 flags = in6p->in6p_flags;
1923 if (flags & IN6P_HIGHPORT)
1924 optval = IPV6_PORTRANGE_HIGH;
1925 else if (flags & IN6P_LOWPORT)
1926 optval = IPV6_PORTRANGE_LOW;
1932 soopt_from_kbuf(sopt, &optval,
1939 struct ip6_mtuinfo mtuinfo;
1940 struct route_in6 sro;
1942 bzero(&sro, sizeof(sro));
1944 if (!(so->so_state & SS_ISCONNECTED))
1947 * XXX: we dot not consider the case of source
1948 * routing, or optional information to specify
1949 * the outgoing interface.
1951 error = ip6_getpmtu(&sro, NULL, NULL,
1952 &in6p->in6p_faddr, &pmtu, NULL);
1957 if (pmtu > IPV6_MAXPACKET)
1958 pmtu = IPV6_MAXPACKET;
1960 bzero(&mtuinfo, sizeof(mtuinfo));
1961 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1962 optdata = (void *)&mtuinfo;
1963 optdatalen = sizeof(mtuinfo);
1964 soopt_from_kbuf(sopt, optdata,
1969 case IPV6_2292PKTINFO:
1970 case IPV6_2292HOPLIMIT:
1971 case IPV6_2292HOPOPTS:
1972 case IPV6_2292RTHDR:
1973 case IPV6_2292DSTOPTS:
1974 if (optname == IPV6_2292HOPOPTS ||
1975 optname == IPV6_2292DSTOPTS ||
1979 case IPV6_2292PKTINFO:
1980 optval = OPTBIT(IN6P_PKTINFO);
1982 case IPV6_2292HOPLIMIT:
1983 optval = OPTBIT(IN6P_HOPLIMIT);
1985 case IPV6_2292HOPOPTS:
1988 optval = OPTBIT(IN6P_HOPOPTS);
1990 case IPV6_2292RTHDR:
1991 optval = OPTBIT(IN6P_RTHDR);
1993 case IPV6_2292DSTOPTS:
1996 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1999 soopt_from_kbuf(sopt, &optval,
2007 case IPV6_RTHDRDSTOPTS:
2011 case IPV6_USE_MIN_MTU:
2012 case IPV6_PREFER_TEMPADDR:
2013 error = ip6_getpcbopt(in6p->in6p_outputopts,
2017 case IPV6_MULTICAST_IF:
2018 case IPV6_MULTICAST_HOPS:
2019 case IPV6_MULTICAST_LOOP:
2020 case IPV6_JOIN_GROUP:
2021 case IPV6_LEAVE_GROUP:
2024 error = ip6_getmoptions(sopt->sopt_name,
2025 in6p->in6p_moptions, &m);
2027 soopt_from_kbuf(sopt,
2028 mtod(m, char *), m->m_len);
2033 #if defined(IPSEC) || defined(FAST_IPSEC)
2034 case IPV6_IPSEC_POLICY:
2038 struct mbuf *m = NULL;
2039 struct mbuf **mp = &m;
2041 error = soopt_getm(sopt, &m); /* XXX */
2044 soopt_to_mbuf(sopt, m); /* XXX */
2046 req = mtod(m, caddr_t);
2049 error = ipsec6_get_policy(in6p, req, len, mp);
2051 error = soopt_from_mbuf(sopt, m); /*XXX*/
2052 if (error == 0 && m != NULL)
2056 #endif /* KAME IPSEC */
2061 struct mbuf **mp = &m;
2063 if (ip6_fw_ctl_ptr == NULL)
2067 error = (*ip6_fw_ctl_ptr)(optname, mp);
2069 error = soopt_from_mbuf(sopt, m); /* XXX */
2070 if (error == 0 && m != NULL)
2076 error = ENOPROTOOPT;
2088 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2090 int error = 0, optval, optlen;
2091 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2092 struct in6pcb *in6p = sotoin6pcb(so);
2093 int level, op, optname;
2096 level = sopt->sopt_level;
2097 op = sopt->sopt_dir;
2098 optname = sopt->sopt_name;
2099 optlen = sopt->sopt_valsize;
2101 panic("ip6_raw_ctloutput: arg soopt is NULL");
2103 if (level != IPPROTO_IPV6) {
2110 * For ICMPv6 sockets, no modification allowed for checksum
2111 * offset, permit "no change" values to help existing apps.
2113 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2114 * for an ICMPv6 socket will fail."
2115 * The current behavior does not meet RFC3542.
2119 if (optlen != sizeof(int)) {
2123 error = soopt_to_kbuf(sopt, &optval,
2124 sizeof optval, sizeof optval);
2127 if ((optval % 2) != 0) {
2128 /* the API assumes even offset values */
2130 } else if (so->so_proto->pr_protocol ==
2132 if (optval != icmp6off)
2135 in6p->in6p_cksum = optval;
2139 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2142 optval = in6p->in6p_cksum;
2144 soopt_from_kbuf(sopt, &optval, sizeof(optval));
2154 error = ENOPROTOOPT;
2162 * Set up IP6 options in pcb for insertion in output packets or
2163 * specifying behavior of outgoing packets.
2166 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2167 struct socket *so, struct sockopt *sopt)
2170 struct ip6_pktopts *opt = *pktopt;
2173 /* turn off any old options. */
2176 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2177 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2178 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2179 kprintf("ip6_pcbopts: all specified options are cleared.\n");
2181 ip6_clearpktopts(opt, -1);
2183 opt = kmalloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2186 if (!m || m->m_len == 0) {
2188 * Only turning off any previous options, regardless of
2189 * whether the opt is just created or given.
2191 kfree(opt, M_IP6OPT);
2195 /* set options specified by user. */
2196 if ((error = ip6_setpktoptions(m, opt, NULL, so->so_proto->pr_protocol, priv)) != 0) {
2197 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2198 kfree(opt, M_IP6OPT);
2207 * Below three functions are introduced by merge to RFC3542
2211 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2213 void *optdata = NULL;
2215 struct ip6_ext *ip6e;
2217 struct in6_pktinfo null_pktinfo;
2218 int deftclass = 0, on;
2219 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2220 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2224 if (pktopt && pktopt->ip6po_pktinfo)
2225 optdata = (void *)pktopt->ip6po_pktinfo;
2227 /* XXX: we don't have to do this every time... */
2228 bzero(&null_pktinfo, sizeof(null_pktinfo));
2229 optdata = (void *)&null_pktinfo;
2231 optdatalen = sizeof(struct in6_pktinfo);
2234 if (pktopt && pktopt->ip6po_tclass >= 0)
2235 optdata = (void *)&pktopt->ip6po_tclass;
2237 optdata = (void *)&deftclass;
2238 optdatalen = sizeof(int);
2241 if (pktopt && pktopt->ip6po_hbh) {
2242 optdata = (void *)pktopt->ip6po_hbh;
2243 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2244 optdatalen = (ip6e->ip6e_len + 1) << 3;
2248 if (pktopt && pktopt->ip6po_rthdr) {
2249 optdata = (void *)pktopt->ip6po_rthdr;
2250 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2251 optdatalen = (ip6e->ip6e_len + 1) << 3;
2254 case IPV6_RTHDRDSTOPTS:
2255 if (pktopt && pktopt->ip6po_dest1) {
2256 optdata = (void *)pktopt->ip6po_dest1;
2257 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2258 optdatalen = (ip6e->ip6e_len + 1) << 3;
2262 if (pktopt && pktopt->ip6po_dest2) {
2263 optdata = (void *)pktopt->ip6po_dest2;
2264 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2265 optdatalen = (ip6e->ip6e_len + 1) << 3;
2269 if (pktopt && pktopt->ip6po_nexthop) {
2270 optdata = (void *)pktopt->ip6po_nexthop;
2271 optdatalen = pktopt->ip6po_nexthop->sa_len;
2274 case IPV6_USE_MIN_MTU:
2276 optdata = (void *)&pktopt->ip6po_minmtu;
2278 optdata = (void *)&defminmtu;
2279 optdatalen = sizeof(int);
2282 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2286 optdata = (void *)&on;
2287 optdatalen = sizeof(on);
2289 case IPV6_PREFER_TEMPADDR:
2291 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2293 optdata = (void *)&defpreftemp;
2294 optdatalen = sizeof(int);
2296 default: /* should not happen */
2298 panic("ip6_getpcbopt: unexpected option\n");
2300 return (ENOPROTOOPT);
2303 soopt_from_kbuf(sopt, optdata, optdatalen);
2309 * initialize ip6_pktopts. beware that there are non-zero default values in
2314 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
2316 struct ip6_pktopts *opt;
2318 if (*pktopt == NULL) {
2319 *pktopt = kmalloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2320 init_ip6pktopts(*pktopt);
2324 return (ip6_setpktoption(optname, buf, len, opt, 1, 0, uproto, priv));
2328 * initialize ip6_pktopts. beware that there are non-zero default values in
2332 init_ip6pktopts(struct ip6_pktopts *opt)
2335 bzero(opt, sizeof(*opt));
2336 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2337 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2338 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2339 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2343 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2348 if (optname == -1 || optname == IPV6_PKTINFO) {
2349 if (pktopt->ip6po_pktinfo)
2350 kfree(pktopt->ip6po_pktinfo, M_IP6OPT);
2351 pktopt->ip6po_pktinfo = NULL;
2353 if (optname == -1 || optname == IPV6_HOPLIMIT)
2354 pktopt->ip6po_hlim = -1;
2355 if (optname == -1 || optname == IPV6_TCLASS)
2356 pktopt->ip6po_tclass = -1;
2357 if (optname == -1 || optname == IPV6_NEXTHOP) {
2358 if (pktopt->ip6po_nextroute.ro_rt) {
2359 RTFREE(pktopt->ip6po_nextroute.ro_rt);
2360 pktopt->ip6po_nextroute.ro_rt = NULL;
2362 if (pktopt->ip6po_nexthop)
2363 kfree(pktopt->ip6po_nexthop, M_IP6OPT);
2364 pktopt->ip6po_nexthop = NULL;
2366 if (optname == -1 || optname == IPV6_HOPOPTS) {
2367 if (pktopt->ip6po_hbh)
2368 kfree(pktopt->ip6po_hbh, M_IP6OPT);
2369 pktopt->ip6po_hbh = NULL;
2371 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2372 if (pktopt->ip6po_dest1)
2373 kfree(pktopt->ip6po_dest1, M_IP6OPT);
2374 pktopt->ip6po_dest1 = NULL;
2376 if (optname == -1 || optname == IPV6_RTHDR) {
2377 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2378 kfree(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2379 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2380 if (pktopt->ip6po_route.ro_rt) {
2381 RTFREE(pktopt->ip6po_route.ro_rt);
2382 pktopt->ip6po_route.ro_rt = NULL;
2385 if (optname == -1 || optname == IPV6_DSTOPTS) {
2386 if (pktopt->ip6po_dest2)
2387 kfree(pktopt->ip6po_dest2, M_IP6OPT);
2388 pktopt->ip6po_dest2 = NULL;
2392 #define PKTOPT_EXTHDRCPY(type) \
2396 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2397 dst->type = kmalloc(hlen, M_IP6OPT, canwait);\
2398 if (dst->type == NULL)\
2400 bcopy(src->type, dst->type, hlen);\
2404 struct ip6_pktopts *
2405 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2407 struct ip6_pktopts *dst;
2410 kprintf("ip6_clearpktopts: invalid argument\n");
2414 dst = kmalloc(sizeof(*dst), M_IP6OPT, canwait | M_ZERO);
2418 dst->ip6po_hlim = src->ip6po_hlim;
2419 if (src->ip6po_pktinfo) {
2420 dst->ip6po_pktinfo = kmalloc(sizeof(*dst->ip6po_pktinfo),
2422 if (dst->ip6po_pktinfo == NULL)
2424 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2426 if (src->ip6po_nexthop) {
2427 dst->ip6po_nexthop = kmalloc(src->ip6po_nexthop->sa_len,
2429 if (dst->ip6po_nexthop == NULL)
2431 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2432 src->ip6po_nexthop->sa_len);
2434 PKTOPT_EXTHDRCPY(ip6po_hbh);
2435 PKTOPT_EXTHDRCPY(ip6po_dest1);
2436 PKTOPT_EXTHDRCPY(ip6po_dest2);
2437 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2441 if (dst->ip6po_pktinfo) kfree(dst->ip6po_pktinfo, M_IP6OPT);
2442 if (dst->ip6po_nexthop) kfree(dst->ip6po_nexthop, M_IP6OPT);
2443 if (dst->ip6po_hbh) kfree(dst->ip6po_hbh, M_IP6OPT);
2444 if (dst->ip6po_dest1) kfree(dst->ip6po_dest1, M_IP6OPT);
2445 if (dst->ip6po_dest2) kfree(dst->ip6po_dest2, M_IP6OPT);
2446 if (dst->ip6po_rthdr) kfree(dst->ip6po_rthdr, M_IP6OPT);
2447 kfree(dst, M_IP6OPT);
2452 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2454 if (dst == NULL || src == NULL) {
2456 kprintf("ip6_clearpktopts: invalid argument\n");
2461 dst->ip6po_hlim = src->ip6po_hlim;
2462 dst->ip6po_tclass = src->ip6po_tclass;
2463 dst->ip6po_flags = src->ip6po_flags;
2464 if (src->ip6po_pktinfo) {
2465 dst->ip6po_pktinfo = kmalloc(sizeof(*dst->ip6po_pktinfo),
2467 if (dst->ip6po_pktinfo == NULL)
2469 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2471 if (src->ip6po_nexthop) {
2472 dst->ip6po_nexthop = kmalloc(src->ip6po_nexthop->sa_len,
2474 if (dst->ip6po_nexthop == NULL)
2476 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2477 src->ip6po_nexthop->sa_len);
2479 PKTOPT_EXTHDRCPY(ip6po_hbh);
2480 PKTOPT_EXTHDRCPY(ip6po_dest1);
2481 PKTOPT_EXTHDRCPY(ip6po_dest2);
2482 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2486 ip6_clearpktopts(dst, -1);
2489 #undef PKTOPT_EXTHDRCPY
2492 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2497 ip6_clearpktopts(pktopt, -1);
2499 kfree(pktopt, M_IP6OPT);
2503 * Set the IP6 multicast options in response to user setsockopt().
2506 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2509 u_int loop, ifindex;
2510 struct ipv6_mreq *mreq;
2512 struct ip6_moptions *im6o = *im6op;
2513 struct route_in6 ro;
2514 struct sockaddr_in6 *dst;
2515 struct in6_multi_mship *imm;
2516 struct thread *td = curthread; /* XXX */
2520 * No multicast option buffer attached to the pcb;
2521 * allocate one and initialize to default values.
2523 im6o = (struct ip6_moptions *)
2524 kmalloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2527 im6o->im6o_multicast_ifp = NULL;
2528 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2529 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2530 LIST_INIT(&im6o->im6o_memberships);
2535 case IPV6_MULTICAST_IF:
2537 * Select the interface for outgoing multicast packets.
2539 if (m == NULL || m->m_len != sizeof(u_int)) {
2543 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2544 if (ifindex < 0 || if_index < ifindex) {
2545 error = ENXIO; /* XXX EINVAL? */
2548 ifp = ifindex2ifnet[ifindex];
2549 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
2550 error = EADDRNOTAVAIL;
2553 im6o->im6o_multicast_ifp = ifp;
2556 case IPV6_MULTICAST_HOPS:
2559 * Set the IP6 hoplimit for outgoing multicast packets.
2562 if (m == NULL || m->m_len != sizeof(int)) {
2566 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2567 if (optval < -1 || optval >= 256)
2569 else if (optval == -1)
2570 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2572 im6o->im6o_multicast_hlim = optval;
2576 case IPV6_MULTICAST_LOOP:
2578 * Set the loopback flag for outgoing multicast packets.
2579 * Must be zero or one.
2581 if (m == NULL || m->m_len != sizeof(u_int)) {
2585 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2590 im6o->im6o_multicast_loop = loop;
2593 case IPV6_JOIN_GROUP:
2595 * Add a multicast group membership.
2596 * Group must be a valid IP6 multicast address.
2598 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2602 mreq = mtod(m, struct ipv6_mreq *);
2603 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2605 * We use the unspecified address to specify to accept
2606 * all multicast addresses. Only super user is allowed
2609 if (priv_check(td, PRIV_ROOT))
2614 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2620 * If the interface is specified, validate it.
2622 if (mreq->ipv6mr_interface < 0
2623 || if_index < mreq->ipv6mr_interface) {
2624 error = ENXIO; /* XXX EINVAL? */
2628 * If no interface was explicitly specified, choose an
2629 * appropriate one according to the given multicast address.
2631 if (mreq->ipv6mr_interface == 0) {
2633 * If the multicast address is in node-local scope,
2634 * the interface should be a loopback interface.
2635 * Otherwise, look up the routing table for the
2636 * address, and choose the outgoing interface.
2637 * XXX: is it a good approach?
2639 if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
2643 dst = (struct sockaddr_in6 *)&ro.ro_dst;
2644 bzero(dst, sizeof(*dst));
2645 dst->sin6_len = sizeof(struct sockaddr_in6);
2646 dst->sin6_family = AF_INET6;
2647 dst->sin6_addr = mreq->ipv6mr_multiaddr;
2648 rtalloc((struct route *)&ro);
2649 if (ro.ro_rt == NULL) {
2650 error = EADDRNOTAVAIL;
2653 ifp = ro.ro_rt->rt_ifp;
2657 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2660 * See if we found an interface, and confirm that it
2661 * supports multicast
2663 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
2664 error = EADDRNOTAVAIL;
2668 * Put interface index into the multicast address,
2669 * if the address has link-local scope.
2671 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2672 mreq->ipv6mr_multiaddr.s6_addr16[1]
2673 = htons(mreq->ipv6mr_interface);
2676 * See if the membership already exists.
2678 for (imm = im6o->im6o_memberships.lh_first;
2679 imm != NULL; imm = imm->i6mm_chain.le_next)
2680 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2681 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2682 &mreq->ipv6mr_multiaddr))
2689 * Everything looks good; add a new record to the multicast
2690 * address list for the given interface.
2692 imm = kmalloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2693 if ((imm->i6mm_maddr =
2694 in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2695 kfree(imm, M_IPMADDR);
2698 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2701 case IPV6_LEAVE_GROUP:
2703 * Drop a multicast group membership.
2704 * Group must be a valid IP6 multicast address.
2706 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2710 mreq = mtod(m, struct ipv6_mreq *);
2711 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2712 if (priv_check(td, PRIV_ROOT)) {
2716 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2721 * If an interface address was specified, get a pointer
2722 * to its ifnet structure.
2724 if (mreq->ipv6mr_interface < 0
2725 || if_index < mreq->ipv6mr_interface) {
2726 error = ENXIO; /* XXX EINVAL? */
2729 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2731 * Put interface index into the multicast address,
2732 * if the address has link-local scope.
2734 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2735 mreq->ipv6mr_multiaddr.s6_addr16[1]
2736 = htons(mreq->ipv6mr_interface);
2739 * Find the membership in the membership list.
2741 for (imm = im6o->im6o_memberships.lh_first;
2742 imm != NULL; imm = imm->i6mm_chain.le_next) {
2744 imm->i6mm_maddr->in6m_ifp == ifp) &&
2745 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2746 &mreq->ipv6mr_multiaddr))
2750 /* Unable to resolve interface */
2751 error = EADDRNOTAVAIL;
2755 * Give up the multicast address record to which the
2756 * membership points.
2758 LIST_REMOVE(imm, i6mm_chain);
2759 in6_delmulti(imm->i6mm_maddr);
2760 kfree(imm, M_IPMADDR);
2769 * If all options have default values, no need to keep the mbuf.
2771 if (im6o->im6o_multicast_ifp == NULL &&
2772 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2773 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2774 im6o->im6o_memberships.lh_first == NULL) {
2775 kfree(*im6op, M_IPMOPTS);
2783 * Return the IP6 multicast options in response to user getsockopt().
2786 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2788 u_int *hlim, *loop, *ifindex;
2790 *mp = m_get(MB_WAIT, MT_HEADER); /* XXX */
2794 case IPV6_MULTICAST_IF:
2795 ifindex = mtod(*mp, u_int *);
2796 (*mp)->m_len = sizeof(u_int);
2797 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2800 *ifindex = im6o->im6o_multicast_ifp->if_index;
2803 case IPV6_MULTICAST_HOPS:
2804 hlim = mtod(*mp, u_int *);
2805 (*mp)->m_len = sizeof(u_int);
2807 *hlim = ip6_defmcasthlim;
2809 *hlim = im6o->im6o_multicast_hlim;
2812 case IPV6_MULTICAST_LOOP:
2813 loop = mtod(*mp, u_int *);
2814 (*mp)->m_len = sizeof(u_int);
2816 *loop = ip6_defmcasthlim;
2818 *loop = im6o->im6o_multicast_loop;
2822 return (EOPNOTSUPP);
2827 * Discard the IP6 multicast options.
2830 ip6_freemoptions(struct ip6_moptions *im6o)
2832 struct in6_multi_mship *imm;
2837 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2838 LIST_REMOVE(imm, i6mm_chain);
2839 if (imm->i6mm_maddr)
2840 in6_delmulti(imm->i6mm_maddr);
2841 kfree(imm, M_IPMADDR);
2843 kfree(im6o, M_IPMOPTS);
2847 * Set a particular packet option, as a sticky option or an ancillary data
2848 * item. "len" can be 0 only when it's a sticky option.
2849 * We have 4 cases of combination of "sticky" and "cmsg":
2850 * "sticky=0, cmsg=0": impossible
2851 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2852 * "sticky=1, cmsg=0": RFC3542 socket option
2853 * "sticky=1, cmsg=1": RFC2292 socket option
2856 ip6_setpktoption(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2857 int sticky, int cmsg, int uproto, int priv)
2859 int minmtupolicy, preftemp;
2862 if (!sticky && !cmsg) {
2863 kprintf("ip6_setpktoption: impossible case\n");
2868 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2869 * not be specified in the context of RFC3542. Conversely,
2870 * RFC3542 types should not be specified in the context of RFC2292.
2874 case IPV6_2292PKTINFO:
2875 case IPV6_2292HOPLIMIT:
2876 case IPV6_2292NEXTHOP:
2877 case IPV6_2292HOPOPTS:
2878 case IPV6_2292DSTOPTS:
2879 case IPV6_2292RTHDR:
2880 case IPV6_2292PKTOPTIONS:
2881 return (ENOPROTOOPT);
2884 if (sticky && cmsg) {
2891 case IPV6_RTHDRDSTOPTS:
2893 case IPV6_USE_MIN_MTU:
2896 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2897 return (ENOPROTOOPT);
2902 case IPV6_2292PKTINFO:
2905 struct in6_pktinfo *pktinfo;
2906 if (len != sizeof(struct in6_pktinfo))
2908 pktinfo = (struct in6_pktinfo *)buf;
2911 * An application can clear any sticky IPV6_PKTINFO option by
2912 * doing a "regular" setsockopt with ipi6_addr being
2913 * in6addr_any and ipi6_ifindex being zero.
2914 * [RFC 3542, Section 6]
2916 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2917 pktinfo->ipi6_ifindex == 0 &&
2918 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2919 ip6_clearpktopts(opt, optname);
2923 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2924 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2928 /* validate the interface index if specified. */
2929 if (pktinfo->ipi6_ifindex > if_index ||
2930 pktinfo->ipi6_ifindex < 0) {
2934 * Check if the requested source address is indeed a
2935 * unicast address assigned to the node, and can be
2936 * used as the packet's source address.
2938 if (opt->ip6po_pktinfo != NULL &&
2939 !IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2940 struct in6_ifaddr *ia6;
2941 struct sockaddr_in6 sin6;
2943 bzero(&sin6, sizeof(sin6));
2944 sin6.sin6_len = sizeof(sin6);
2945 sin6.sin6_family = AF_INET6;
2947 opt->ip6po_pktinfo->ipi6_addr;
2948 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
2950 (ia6->ia6_flags & (IN6_IFF_ANYCAST |
2951 IN6_IFF_NOTREADY)) != 0)
2952 return (EADDRNOTAVAIL);
2956 * We store the address anyway, and let in6_selectsrc()
2957 * validate the specified address. This is because ipi6_addr
2958 * may not have enough information about its scope zone, and
2959 * we may need additional information (such as outgoing
2960 * interface or the scope zone of a destination address) to
2961 * disambiguate the scope.
2962 * XXX: the delay of the validation may confuse the
2963 * application when it is used as a sticky option.
2965 if (opt->ip6po_pktinfo == NULL) {
2966 opt->ip6po_pktinfo = kmalloc(sizeof(*pktinfo),
2967 M_IP6OPT, M_NOWAIT);
2968 if (opt->ip6po_pktinfo == NULL)
2971 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2975 case IPV6_2292HOPLIMIT:
2981 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2982 * to simplify the ordering among hoplimit options.
2984 if (optname == IPV6_HOPLIMIT && sticky)
2985 return (ENOPROTOOPT);
2987 if (len != sizeof(int))
2990 if (*hlimp < -1 || *hlimp > 255)
2993 opt->ip6po_hlim = *hlimp;
3001 if (len != sizeof(int))
3003 tclass = *(int *)buf;
3004 if (tclass < -1 || tclass > 255)
3007 opt->ip6po_tclass = tclass;
3011 case IPV6_2292NEXTHOP:
3016 if (len == 0) { /* just remove the option */
3017 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3021 /* check if cmsg_len is large enough for sa_len */
3022 if (len < sizeof(struct sockaddr) || len < *buf)
3025 switch (((struct sockaddr *)buf)->sa_family) {
3028 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3031 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3034 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3035 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3040 case AF_LINK: /* should eventually be supported */
3042 return (EAFNOSUPPORT);
3045 /* turn off the previous option, then set the new option. */
3046 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3047 opt->ip6po_nexthop = kmalloc(*buf, M_IP6OPT, M_NOWAIT);
3048 if (opt->ip6po_nexthop == NULL)
3050 bcopy(buf, opt->ip6po_nexthop, *buf);
3053 case IPV6_2292HOPOPTS:
3056 struct ip6_hbh *hbh;
3060 * XXX: We don't allow a non-privileged user to set ANY HbH
3061 * options, since per-option restriction has too much
3067 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3068 break; /* just remove the option */
3071 /* message length validation */
3072 if (len < sizeof(struct ip6_hbh))
3074 hbh = (struct ip6_hbh *)buf;
3075 hbhlen = (hbh->ip6h_len + 1) << 3;
3079 /* turn off the previous option, then set the new option. */
3080 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3081 opt->ip6po_hbh = kmalloc(hbhlen, M_IP6OPT, M_NOWAIT);
3082 if (opt->ip6po_hbh == NULL)
3084 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3089 case IPV6_2292DSTOPTS:
3091 case IPV6_RTHDRDSTOPTS:
3093 struct ip6_dest *dest, **newdest = NULL;
3099 ip6_clearpktopts(opt, optname);
3100 break; /* just remove the option */
3103 /* message length validation */
3104 if (len < sizeof(struct ip6_dest))
3106 dest = (struct ip6_dest *)buf;
3107 destlen = (dest->ip6d_len + 1) << 3;
3112 * Determine the position that the destination options header
3113 * should be inserted; before or after the routing header.
3116 case IPV6_2292DSTOPTS:
3118 * The old advacned API is ambiguous on this point.
3119 * Our approach is to determine the position based
3120 * according to the existence of a routing header.
3121 * Note, however, that this depends on the order of the
3122 * extension headers in the ancillary data; the 1st
3123 * part of the destination options header must appear
3124 * before the routing header in the ancillary data,
3126 * RFC3542 solved the ambiguity by introducing
3127 * separate ancillary data or option types.
3129 if (opt->ip6po_rthdr == NULL)
3130 newdest = &opt->ip6po_dest1;
3132 newdest = &opt->ip6po_dest2;
3134 case IPV6_RTHDRDSTOPTS:
3135 newdest = &opt->ip6po_dest1;
3138 newdest = &opt->ip6po_dest2;
3142 /* turn off the previous option, then set the new option. */
3143 ip6_clearpktopts(opt, optname);
3144 *newdest = kmalloc(destlen, M_IP6OPT, M_NOWAIT);
3145 if (*newdest == NULL)
3147 bcopy(dest, *newdest, destlen);
3152 case IPV6_2292RTHDR:
3155 struct ip6_rthdr *rth;
3159 ip6_clearpktopts(opt, IPV6_RTHDR);
3160 break; /* just remove the option */
3163 /* message length validation */
3164 if (len < sizeof(struct ip6_rthdr))
3166 rth = (struct ip6_rthdr *)buf;
3167 rthlen = (rth->ip6r_len + 1) << 3;
3171 switch (rth->ip6r_type) {
3173 return (EINVAL); /* not supported */
3176 /* turn off the previous option */
3177 ip6_clearpktopts(opt, IPV6_RTHDR);
3178 opt->ip6po_rthdr = kmalloc(rthlen, M_IP6OPT, M_NOWAIT);
3179 if (opt->ip6po_rthdr == NULL)
3181 bcopy(rth, opt->ip6po_rthdr, rthlen);
3186 case IPV6_USE_MIN_MTU:
3187 if (len != sizeof(int))
3189 minmtupolicy = *(int *)buf;
3190 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3191 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3192 minmtupolicy != IP6PO_MINMTU_ALL) {
3195 opt->ip6po_minmtu = minmtupolicy;
3199 if (len != sizeof(int))
3202 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3204 * we ignore this option for TCP sockets.
3205 * (RFC3542 leaves this case unspecified.)
3207 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3209 opt->ip6po_flags |= IP6PO_DONTFRAG;
3212 case IPV6_PREFER_TEMPADDR:
3213 if (len != sizeof(int))
3215 preftemp = *(int *)buf;
3216 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3217 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3218 preftemp != IP6PO_TEMPADDR_PREFER) {
3221 opt->ip6po_prefer_tempaddr = preftemp;
3225 return (ENOPROTOOPT);
3226 } /* end of switch */
3233 * Set IPv6 outgoing packet options based on advanced API.
3236 ip6_setpktoptions(struct mbuf *control, struct ip6_pktopts *opt,
3237 struct ip6_pktopts *stickyopt, int uproto, int priv)
3239 struct cmsghdr *cm = NULL;
3241 if (control == NULL || opt == NULL)
3244 init_ip6pktopts(opt);
3247 * XXX: Currently, we assume all the optional information is stored
3254 * If stickyopt is provided, make a local copy of the options
3255 * for this particular packet, then override them by ancillary
3257 * XXX: copypktopts() does not copy the cached route to a next
3258 * hop (if any). This is not very good in terms of efficiency,
3259 * but we can allow this since this option should be rarely
3262 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3267 * XXX: Currently, we assume all the optional information is stored
3270 if (control->m_next)
3273 for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3274 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3277 if (control->m_len < CMSG_LEN(0))
3280 cm = mtod(control, struct cmsghdr *);
3281 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3283 if (cm->cmsg_level != IPPROTO_IPV6)
3286 error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
3287 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto, priv);
3296 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3297 * packet to the input queue of a specified interface. Note that this
3298 * calls the output routine of the loopback "driver", but with an interface
3299 * pointer that might NOT be &loif -- easier than replicating that code here.
3302 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3305 struct ip6_hdr *ip6;
3307 copym = m_copy(m, 0, M_COPYALL);
3312 * Make sure to deep-copy IPv6 header portion in case the data
3313 * is in an mbuf cluster, so that we can safely override the IPv6
3314 * header portion later.
3316 if ((copym->m_flags & M_EXT) != 0 ||
3317 copym->m_len < sizeof(struct ip6_hdr)) {
3318 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3324 if (copym->m_len < sizeof(*ip6)) {
3330 ip6 = mtod(copym, struct ip6_hdr *);
3332 * clear embedded scope identifiers if necessary.
3333 * in6_clearscope will touch the addresses only when necessary.
3335 in6_clearscope(&ip6->ip6_src);
3336 in6_clearscope(&ip6->ip6_dst);
3338 if_simloop(ifp, copym, dst->sin6_family, 0);
3342 * Separate the IPv6 header from the payload into its own mbuf.
3344 * Returns the new mbuf chain or the original mbuf if no payload.
3345 * Returns NULL if can't allocate new mbuf for header.
3347 static struct mbuf *
3348 ip6_splithdr(struct mbuf *m)
3352 if (m->m_len <= sizeof(struct ip6_hdr)) /* no payload */
3355 MGETHDR(mh, MB_DONTWAIT, MT_HEADER);
3358 mh->m_len = sizeof(struct ip6_hdr);
3359 M_MOVE_PKTHDR(mh, m);
3360 MH_ALIGN(mh, sizeof(struct ip6_hdr));
3361 bcopy(mtod(m, caddr_t), mtod(mh, caddr_t), sizeof(struct ip6_hdr));
3362 m->m_data += sizeof(struct ip6_hdr);
3363 m->m_len -= sizeof(struct ip6_hdr);
3369 * Compute IPv6 extension header length.
3372 ip6_optlen(struct in6pcb *in6p)
3376 if (!in6p->in6p_outputopts)
3381 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3383 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3384 if (in6p->in6p_outputopts->ip6po_rthdr)
3385 /* dest1 is valid with rthdr only */
3386 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3387 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3388 len += elen(in6p->in6p_outputopts->ip6po_dest2);