1 /* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.13.2.18 2003/01/24 05:11:35 sam Exp $ */
2 /* $DragonFly: src/sys/netinet6/ip6_output.c,v 1.26 2006/10/24 06:18:42 hsu Exp $ */
3 /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
6 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the project nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * Copyright (c) 1982, 1986, 1988, 1990, 1993
36 * The Regents of the University of California. All rights reserved.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
69 #include "opt_ip6fw.h"
71 #include "opt_inet6.h"
72 #include "opt_ipsec.h"
74 #include <sys/param.h>
75 #include <sys/malloc.h>
77 #include <sys/errno.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
86 #include <net/route.h>
89 #include <netinet/in.h>
90 #include <netinet/in_var.h>
91 #include <netinet6/in6_var.h>
92 #include <netinet/ip6.h>
93 #include <netinet/icmp6.h>
94 #include <netinet6/ip6_var.h>
95 #include <netinet/in_pcb.h>
96 #include <netinet6/nd6.h>
99 #include <netinet6/ipsec.h>
101 #include <netinet6/ipsec6.h>
103 #include <netproto/key/key.h>
107 #include <netproto/ipsec/ipsec.h>
108 #include <netproto/ipsec/ipsec6.h>
109 #include <netproto/ipsec/key.h>
110 #endif /* FAST_IPSEC */
112 #include <net/ip6fw/ip6_fw.h>
114 #include <net/net_osdep.h>
116 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
119 struct mbuf *ip6e_ip6;
120 struct mbuf *ip6e_hbh;
121 struct mbuf *ip6e_dest1;
122 struct mbuf *ip6e_rthdr;
123 struct mbuf *ip6e_dest2;
126 static int ip6_pcbopts (struct ip6_pktopts **, struct mbuf *,
127 struct socket *, struct sockopt *sopt);
128 static int ip6_setmoptions (int, struct ip6_moptions **, struct mbuf *);
129 static int ip6_getmoptions (int, struct ip6_moptions *, struct mbuf **);
130 static int copyexthdr (void *, struct mbuf **);
131 static int ip6_insertfraghdr (struct mbuf *, struct mbuf *, int,
133 static int ip6_insert_jumboopt (struct ip6_exthdrs *, u_int32_t);
134 static struct mbuf *ip6_splithdr (struct mbuf *);
137 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
138 * header (with pri, len, nxt, hlim, src, dst).
139 * This function may modify ver and hlim only.
140 * The mbuf chain containing the packet will be freed.
141 * The mbuf opt, if present, will not be freed.
143 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
144 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
145 * which is rt_rmx.rmx_mtu.
148 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
149 int flags, struct ip6_moptions *im6o,
150 struct ifnet **ifpp, /* XXX: just for statistics */
153 struct ip6_hdr *ip6, *mhip6;
154 struct ifnet *ifp, *origifp;
156 int hlen, tlen, len, off;
157 struct route_in6 ip6route;
158 struct sockaddr_in6 *dst;
160 struct in6_ifaddr *ia = NULL;
162 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
163 struct ip6_exthdrs exthdrs;
164 struct in6_addr finaldst;
165 struct route_in6 *ro_pmtu = NULL;
166 boolean_t hdrsplit = FALSE;
169 int needipsectun = 0;
170 struct secpolicy *sp = NULL;
171 struct socket *so = inp ? inp->inp_socket : NULL;
173 ip6 = mtod(m, struct ip6_hdr *);
176 int needipsectun = 0;
177 struct secpolicy *sp = NULL;
179 ip6 = mtod(m, struct ip6_hdr *);
180 #endif /* FAST_IPSEC */
182 bzero(&exthdrs, sizeof(exthdrs));
185 if ((error = copyexthdr(opt->ip6po_hbh, &exthdrs.ip6e_hbh)))
187 if ((error = copyexthdr(opt->ip6po_dest1, &exthdrs.ip6e_dest1)))
189 if ((error = copyexthdr(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr)))
191 if ((error = copyexthdr(opt->ip6po_dest2, &exthdrs.ip6e_dest2)))
196 /* get a security policy for this packet */
198 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
200 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
203 ipsec6stat.out_inval++;
210 switch (sp->policy) {
211 case IPSEC_POLICY_DISCARD:
213 * This packet is just discarded.
215 ipsec6stat.out_polvio++;
218 case IPSEC_POLICY_BYPASS:
219 case IPSEC_POLICY_NONE:
220 /* no need to do IPsec. */
224 case IPSEC_POLICY_IPSEC:
225 if (sp->req == NULL) {
226 error = key_spdacquire(sp); /* acquire a policy */
232 case IPSEC_POLICY_ENTRUST:
234 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
238 /* get a security policy for this packet */
240 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
242 sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
245 newipsecstat.ips_out_inval++;
252 switch (sp->policy) {
253 case IPSEC_POLICY_DISCARD:
255 * This packet is just discarded.
257 newipsecstat.ips_out_polvio++;
260 case IPSEC_POLICY_BYPASS:
261 case IPSEC_POLICY_NONE:
262 /* no need to do IPsec. */
266 case IPSEC_POLICY_IPSEC:
267 if (sp->req == NULL) {
268 /* acquire a policy */
269 error = key_spdacquire(sp);
275 case IPSEC_POLICY_ENTRUST:
277 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
279 #endif /* FAST_IPSEC */
282 * Calculate the total length of the extension header chain.
283 * Keep the length of the unfragmentable part for fragmentation.
286 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
287 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
288 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
289 unfragpartlen = optlen + sizeof(struct ip6_hdr);
290 /* NOTE: we don't add AH/ESP length here. do that later. */
291 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
294 * If we need IPsec, or there is at least one extension header,
295 * separate IP6 header from the payload.
297 if ((needipsec || optlen) && !hdrsplit) {
298 exthdrs.ip6e_ip6 = ip6_splithdr(m);
299 if (exthdrs.ip6e_ip6 == NULL) {
303 m = exthdrs.ip6e_ip6;
308 ip6 = mtod(m, struct ip6_hdr *);
310 /* adjust mbuf packet header length */
311 m->m_pkthdr.len += optlen;
312 plen = m->m_pkthdr.len - sizeof(*ip6);
314 /* If this is a jumbo payload, insert a jumbo payload option. */
315 if (plen > IPV6_MAXPACKET) {
317 exthdrs.ip6e_ip6 = ip6_splithdr(m);
318 if (exthdrs.ip6e_ip6 == NULL) {
322 m = exthdrs.ip6e_ip6;
326 ip6 = mtod(m, struct ip6_hdr *);
327 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
331 ip6->ip6_plen = htons(plen);
334 * Concatenate headers and fill in next header fields.
335 * Here we have, on "m"
337 * and we insert headers accordingly. Finally, we should be getting:
338 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
340 * during the header composing process, "m" points to IPv6 header.
341 * "mprev" points to an extension header prior to esp.
344 u_char *nexthdrp = &ip6->ip6_nxt;
345 struct mbuf *mprev = m;
348 * we treat dest2 specially. this makes IPsec processing
349 * much easier. the goal here is to make mprev point the
350 * mbuf prior to dest2.
352 * result: IPv6 dest2 payload
353 * m and mprev will point to IPv6 header.
355 if (exthdrs.ip6e_dest2) {
357 panic("assumption failed: hdr not split");
358 exthdrs.ip6e_dest2->m_next = m->m_next;
359 m->m_next = exthdrs.ip6e_dest2;
360 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
361 ip6->ip6_nxt = IPPROTO_DSTOPTS;
365 panic("assumption failed: hdr not split"); \
368 * Place m1 after mprev.
370 #define MAKE_CHAIN(m1, mprev, nexthdrp, i)\
373 *mtod(m1, u_char *) = *nexthdrp;\
375 nexthdrp = mtod(m1, u_char *);\
376 m1->m_next = mprev->m_next;\
382 * result: IPv6 hbh dest1 rthdr dest2 payload
383 * m will point to IPv6 header. mprev will point to the
384 * extension header prior to dest2 (rthdr in the above case).
386 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
387 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
388 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
390 #if defined(IPSEC) || defined(FAST_IPSEC)
395 * pointers after IPsec headers are not valid any more.
396 * other pointers need a great care too.
397 * (IPsec routines should not mangle mbufs prior to AH/ESP)
399 exthdrs.ip6e_dest2 = NULL;
402 struct ip6_rthdr *rh = NULL;
404 struct ipsec_output_state state;
406 if (exthdrs.ip6e_rthdr) {
407 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
408 segleft_org = rh->ip6r_segleft;
409 rh->ip6r_segleft = 0;
412 bzero(&state, sizeof(state));
414 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
418 /* mbuf is already reclaimed in ipsec6_output_trans. */
428 printf("ip6_output (ipsec): error code %d\n", error);
431 /* don't show these error codes to the user */
437 if (exthdrs.ip6e_rthdr) {
438 /* ah6_output doesn't modify mbuf chain */
439 rh->ip6r_segleft = segleft_org;
447 * If there is a routing header, replace destination address field
448 * with the first hop of the routing header.
450 if (exthdrs.ip6e_rthdr) {
451 struct ip6_rthdr *rh =
452 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
453 struct ip6_rthdr *));
454 struct ip6_rthdr0 *rh0;
456 finaldst = ip6->ip6_dst;
457 switch (rh->ip6r_type) {
458 case IPV6_RTHDR_TYPE_0:
459 rh0 = (struct ip6_rthdr0 *)rh;
460 ip6->ip6_dst = rh0->ip6r0_addr[0];
461 bcopy(&rh0->ip6r0_addr[1], &rh0->ip6r0_addr[0],
462 sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1));
463 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
465 default: /* is it possible? */
471 /* Source address validation */
472 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
473 !(flags & IPV6_DADOUTPUT)) {
475 ip6stat.ip6s_badscope++;
478 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
480 ip6stat.ip6s_badscope++;
484 ip6stat.ip6s_localout++;
491 bzero(ro, sizeof(*ro));
494 if (opt && opt->ip6po_rthdr)
495 ro = &opt->ip6po_route;
496 dst = (struct sockaddr_in6 *)&ro->ro_dst;
498 * If there is a cached route,
499 * check that it is to the same destination
500 * and is still up. If not, free it and try again.
502 if (ro->ro_rt != NULL &&
503 (!(ro->ro_rt->rt_flags & RTF_UP) || dst->sin6_family != AF_INET6 ||
504 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
508 if (ro->ro_rt == NULL) {
509 bzero(dst, sizeof(*dst));
510 dst->sin6_family = AF_INET6;
511 dst->sin6_len = sizeof(struct sockaddr_in6);
512 dst->sin6_addr = ip6->ip6_dst;
514 #if defined(IPSEC) || defined(FAST_IPSEC)
515 if (needipsec && needipsectun) {
516 struct ipsec_output_state state;
519 * All the extension headers will become inaccessible
520 * (since they can be encrypted).
521 * Don't panic, we need no more updates to extension headers
522 * on inner IPv6 packet (since they are now encapsulated).
524 * IPv6 [ESP|AH] IPv6 [extension headers] payload
526 bzero(&exthdrs, sizeof(exthdrs));
527 exthdrs.ip6e_ip6 = m;
529 bzero(&state, sizeof(state));
531 state.ro = (struct route *)ro;
532 state.dst = (struct sockaddr *)dst;
534 error = ipsec6_output_tunnel(&state, sp, flags);
537 ro = (struct route_in6 *)state.ro;
538 dst = (struct sockaddr_in6 *)state.dst;
540 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
551 printf("ip6_output (ipsec): error code %d\n", error);
554 /* don't show these error codes to the user */
561 exthdrs.ip6e_ip6 = m;
565 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
568 #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
569 #define sin6tosa(sin6) ((struct sockaddr *)(sin6))
571 * interface selection comes here
572 * if an interface is specified from an upper layer,
575 if (ro->ro_rt == NULL) {
577 * non-bsdi always clone routes, if parent is
580 rtalloc((struct route *)ro);
582 if (ro->ro_rt == NULL) {
583 ip6stat.ip6s_noroute++;
584 error = EHOSTUNREACH;
585 /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
588 ia = ifatoia6(ro->ro_rt->rt_ifa);
589 ifp = ro->ro_rt->rt_ifp;
591 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
592 dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
593 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
595 in6_ifstat_inc(ifp, ifs6_out_request);
598 * Check if the outgoing interface conflicts with
599 * the interface specified by ifi6_ifindex (if specified).
600 * Note that loopback interface is always okay.
601 * (this may happen when we are sending a packet to one of
602 * our own addresses.)
604 if (opt && opt->ip6po_pktinfo
605 && opt->ip6po_pktinfo->ipi6_ifindex) {
606 if (!(ifp->if_flags & IFF_LOOPBACK)
607 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
608 ip6stat.ip6s_noroute++;
609 in6_ifstat_inc(ifp, ifs6_out_discard);
610 error = EHOSTUNREACH;
615 if (opt && opt->ip6po_hlim != -1)
616 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
619 struct in6_multi *in6m;
621 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
624 * See if the caller provided any multicast options
628 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
629 if (im6o->im6o_multicast_ifp != NULL)
630 ifp = im6o->im6o_multicast_ifp;
632 ip6->ip6_hlim = ip6_defmcasthlim;
635 * See if the caller provided the outgoing interface
636 * as an ancillary data.
637 * Boundary check for ifindex is assumed to be already done.
639 if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
640 ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
643 * If the destination is a node-local scope multicast,
644 * the packet should be loop-backed only.
646 if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
648 * If the outgoing interface is already specified,
649 * it should be a loopback interface.
651 if (ifp && !(ifp->if_flags & IFF_LOOPBACK)) {
652 ip6stat.ip6s_badscope++;
653 error = ENETUNREACH; /* XXX: better error? */
654 /* XXX correct ifp? */
655 in6_ifstat_inc(ifp, ifs6_out_discard);
662 if (opt && opt->ip6po_hlim != -1)
663 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
666 * If caller did not provide an interface lookup a
667 * default in the routing table. This is either a
668 * default for the speicfied group (i.e. a host
669 * route), or a multicast default (a route for the
673 if (ro->ro_rt == NULL) {
675 rtpurelookup((struct sockaddr *)&ro->ro_dst);
677 if (ro->ro_rt == NULL) {
678 ip6stat.ip6s_noroute++;
679 error = EHOSTUNREACH;
680 /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
683 ia = ifatoia6(ro->ro_rt->rt_ifa);
684 ifp = ro->ro_rt->rt_ifp;
688 if (!(flags & IPV6_FORWARDING))
689 in6_ifstat_inc(ifp, ifs6_out_request);
690 in6_ifstat_inc(ifp, ifs6_out_mcast);
693 * Confirm that the outgoing interface supports multicast.
695 if (!(ifp->if_flags & IFF_MULTICAST)) {
696 ip6stat.ip6s_noroute++;
697 in6_ifstat_inc(ifp, ifs6_out_discard);
701 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
703 (im6o == NULL || im6o->im6o_multicast_loop)) {
705 * If we belong to the destination multicast group
706 * on the outgoing interface, and the caller did not
707 * forbid loopback, loop back a copy.
709 ip6_mloopback(ifp, m, dst);
712 * If we are acting as a multicast router, perform
713 * multicast forwarding as if the packet had just
714 * arrived on the interface to which we are about
715 * to send. The multicast forwarding function
716 * recursively calls this function, using the
717 * IPV6_FORWARDING flag to prevent infinite recursion.
719 * Multicasts that are looped back by ip6_mloopback(),
720 * above, will be forwarded by the ip6_input() routine,
723 if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
724 if (ip6_mforward(ip6, ifp, m) != 0) {
731 * Multicasts with a hoplimit of zero may be looped back,
732 * above, but must not be transmitted on a network.
733 * Also, multicasts addressed to the loopback interface
734 * are not sent -- the above call to ip6_mloopback() will
735 * loop back a copy if this host actually belongs to the
736 * destination group on the loopback interface.
738 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
745 * Fill the outgoing inteface to tell the upper layer
746 * to increment per-interface statistics.
752 * Determine path MTU.
755 /* The first hop and the final destination may differ. */
756 struct sockaddr_in6 *sin6_fin =
757 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
759 if (ro_pmtu->ro_rt != NULL &&
760 (!(ro->ro_rt->rt_flags & RTF_UP) ||
761 !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) {
762 RTFREE(ro_pmtu->ro_rt);
763 ro_pmtu->ro_rt = NULL;
765 if (ro_pmtu->ro_rt == NULL) {
766 bzero(sin6_fin, sizeof(*sin6_fin));
767 sin6_fin->sin6_family = AF_INET6;
768 sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
769 sin6_fin->sin6_addr = finaldst;
771 rtalloc((struct route *)ro_pmtu);
774 if (ro_pmtu->ro_rt != NULL) {
775 u_int32_t ifmtu = ND_IFINFO(ifp)->linkmtu;
777 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
778 if (mtu > ifmtu || mtu == 0) {
780 * The MTU on the route is larger than the MTU on
781 * the interface! This shouldn't happen, unless the
782 * MTU of the interface has been changed after the
783 * interface was brought up. Change the MTU in the
784 * route to match the interface MTU (as long as the
785 * field isn't locked).
787 * if MTU on the route is 0, we need to fix the MTU.
788 * this case happens with path MTU discovery timeouts.
791 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
792 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
795 mtu = ND_IFINFO(ifp)->linkmtu;
799 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
801 if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
804 /* Fake scoped addresses */
805 if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
807 * If source or destination address is a scoped address, and
808 * the packet is going to be sent to a loopback interface,
809 * we should keep the original interface.
813 * XXX: this is a very experimental and temporary solution.
814 * We eventually have sockaddr_in6 and use the sin6_scope_id
815 * field of the structure here.
816 * We rely on the consistency between two scope zone ids
817 * of source and destination, which should already be assured.
818 * Larger scopes than link will be supported in the future.
821 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
822 origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
823 else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
824 origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
826 * XXX: origifp can be NULL even in those two cases above.
827 * For example, if we remove the (only) link-local address
828 * from the loopback interface, and try to send a link-local
829 * address without link-id information. Then the source
830 * address is ::1, and the destination address is the
831 * link-local address with its s6_addr16[1] being zero.
832 * What is worse, if the packet goes to the loopback interface
833 * by a default rejected route, the null pointer would be
834 * passed to looutput, and the kernel would hang.
835 * The following last resort would prevent such disaster.
843 * clear embedded scope identifiers if necessary.
844 * in6_clearscope will touch the addresses only when necessary.
846 in6_clearscope(&ip6->ip6_src);
847 in6_clearscope(&ip6->ip6_dst);
850 * Check with the firewall...
852 if (ip6_fw_enable && ip6_fw_chk_ptr) {
854 m->m_pkthdr.rcvif = NULL; /* XXX */
855 /* If ipfw says divert, we have to just drop packet */
856 if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
867 * If the outgoing packet contains a hop-by-hop options header,
868 * it must be examined and processed even by the source node.
869 * (RFC 2460, section 4.)
871 if (exthdrs.ip6e_hbh) {
872 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
873 u_int32_t dummy1; /* XXX unused */
874 u_int32_t dummy2; /* XXX unused */
877 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
878 panic("ip6e_hbh is not continuous");
881 * XXX: if we have to send an ICMPv6 error to the sender,
882 * we need the M_LOOP flag since icmp6_error() expects
883 * the IPv6 and the hop-by-hop options header are
884 * continuous unless the flag is set.
886 m->m_flags |= M_LOOP;
887 m->m_pkthdr.rcvif = ifp;
888 if (ip6_process_hopopts(m,
889 (u_int8_t *)(hbh + 1),
890 ((hbh->ip6h_len + 1) << 3) -
891 sizeof(struct ip6_hbh),
892 &dummy1, &dummy2) < 0) {
893 /* m was already freed at this point */
894 error = EINVAL;/* better error? */
897 m->m_flags &= ~M_LOOP; /* XXX */
898 m->m_pkthdr.rcvif = NULL;
902 * Run through list of hooks for output packets.
904 if (pfil_has_hooks(&inet6_pfil_hook)) {
905 error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
906 if (error != 0 || m == NULL)
908 ip6 = mtod(m, struct ip6_hdr *);
912 * Send the packet to the outgoing interface.
913 * If necessary, do IPv6 fragmentation before sending.
915 tlen = m->m_pkthdr.len;
919 * On any link that cannot convey a 1280-octet packet in one piece,
920 * link-specific fragmentation and reassembly must be provided at
921 * a layer below IPv6. [RFC 2460, sec.5]
922 * Thus if the interface has ability of link-level fragmentation,
923 * we can just send the packet even if the packet size is
924 * larger than the link's MTU.
925 * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
928 || ifp->if_flags & IFF_FRAGMENTABLE
932 /* Record statistics for this interface address. */
933 if (ia && !(flags & IPV6_FORWARDING)) {
934 ia->ia_ifa.if_opackets++;
935 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
938 /* clean ipsec history once it goes out of the node */
941 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
943 } else if (mtu < IPV6_MMTU) {
945 * note that path MTU is never less than IPV6_MMTU
949 in6_ifstat_inc(ifp, ifs6_out_fragfail);
951 } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
953 in6_ifstat_inc(ifp, ifs6_out_fragfail);
956 struct mbuf **mnext, *m_frgpart;
957 struct ip6_frag *ip6f;
958 u_int32_t id = htonl(ip6_id++);
962 * Too large for the destination or interface;
963 * fragment if possible.
964 * Must be able to put at least 8 bytes per fragment.
966 hlen = unfragpartlen;
967 if (mtu > IPV6_MAXPACKET)
968 mtu = IPV6_MAXPACKET;
970 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
973 in6_ifstat_inc(ifp, ifs6_out_fragfail);
977 mnext = &m->m_nextpkt;
980 * Change the next header field of the last header in the
981 * unfragmentable part.
983 if (exthdrs.ip6e_rthdr) {
984 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
985 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
986 } else if (exthdrs.ip6e_dest1) {
987 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
988 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
989 } else if (exthdrs.ip6e_hbh) {
990 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
991 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
993 nextproto = ip6->ip6_nxt;
994 ip6->ip6_nxt = IPPROTO_FRAGMENT;
998 * Loop through length of segment after first fragment,
999 * make new header and copy data of each part and link onto
1003 for (off = hlen; off < tlen; off += len) {
1004 MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1007 ip6stat.ip6s_odropped++;
1010 m->m_pkthdr.rcvif = NULL;
1011 m->m_flags = m0->m_flags & M_COPYFLAGS;
1013 mnext = &m->m_nextpkt;
1014 m->m_data += max_linkhdr;
1015 mhip6 = mtod(m, struct ip6_hdr *);
1017 m->m_len = sizeof(*mhip6);
1018 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1020 ip6stat.ip6s_odropped++;
1023 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1024 if (off + len >= tlen)
1027 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1028 mhip6->ip6_plen = htons((u_short)(len + hlen +
1030 sizeof(struct ip6_hdr)));
1031 if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1033 ip6stat.ip6s_odropped++;
1036 m_cat(m, m_frgpart);
1037 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1038 m->m_pkthdr.rcvif = (struct ifnet *)0;
1039 ip6f->ip6f_reserved = 0;
1040 ip6f->ip6f_ident = id;
1041 ip6f->ip6f_nxt = nextproto;
1042 ip6stat.ip6s_ofragments++;
1043 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1046 in6_ifstat_inc(ifp, ifs6_out_fragok);
1050 * Remove leading garbages.
1056 for (m0 = m; m; m = m0) {
1060 /* Record statistics for this interface address. */
1062 ia->ia_ifa.if_opackets++;
1063 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1066 /* clean ipsec history once it goes out of the node */
1069 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1075 ip6stat.ip6s_fragmented++;
1078 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1080 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1081 RTFREE(ro_pmtu->ro_rt);
1091 #endif /* FAST_IPSEC */
1096 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1097 m_freem(exthdrs.ip6e_dest1);
1098 m_freem(exthdrs.ip6e_rthdr);
1099 m_freem(exthdrs.ip6e_dest2);
1107 copyexthdr(void *h, struct mbuf **mp)
1109 struct ip6_ext *hdr = h;
1116 hlen = (hdr->ip6e_len + 1) * 8;
1117 if (hlen > MCLBYTES)
1118 return ENOBUFS; /* XXX */
1120 m = m_getb(hlen, MB_DONTWAIT, MT_DATA, 0);
1125 bcopy(hdr, mtod(m, caddr_t), hlen);
1132 * Insert jumbo payload option.
1135 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1141 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1144 * If there is no hop-by-hop options header, allocate new one.
1145 * If there is one but it doesn't have enough space to store the
1146 * jumbo payload option, allocate a cluster to store the whole options.
1147 * Otherwise, use it to store the options.
1149 if (exthdrs->ip6e_hbh == NULL) {
1150 MGET(mopt, MB_DONTWAIT, MT_DATA);
1153 mopt->m_len = JUMBOOPTLEN;
1154 optbuf = mtod(mopt, u_char *);
1155 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1156 exthdrs->ip6e_hbh = mopt;
1158 struct ip6_hbh *hbh;
1160 mopt = exthdrs->ip6e_hbh;
1161 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1164 * - exthdrs->ip6e_hbh is not referenced from places
1165 * other than exthdrs.
1166 * - exthdrs->ip6e_hbh is not an mbuf chain.
1168 int oldoptlen = mopt->m_len;
1172 * XXX: give up if the whole (new) hbh header does
1173 * not fit even in an mbuf cluster.
1175 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1179 * As a consequence, we must always prepare a cluster
1182 n = m_getcl(MB_DONTWAIT, MT_DATA, 0);
1185 n->m_len = oldoptlen + JUMBOOPTLEN;
1186 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen);
1187 optbuf = mtod(n, caddr_t) + oldoptlen;
1189 mopt = exthdrs->ip6e_hbh = n;
1191 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1192 mopt->m_len += JUMBOOPTLEN;
1194 optbuf[0] = IP6OPT_PADN;
1198 * Adjust the header length according to the pad and
1199 * the jumbo payload option.
1201 hbh = mtod(mopt, struct ip6_hbh *);
1202 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1205 /* fill in the option. */
1206 optbuf[2] = IP6OPT_JUMBO;
1208 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1209 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1211 /* finally, adjust the packet header length */
1212 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1219 * Insert fragment header and copy unfragmentable header portions.
1222 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1223 struct ip6_frag **frghdrp)
1225 struct mbuf *n, *mlast;
1227 if (hlen > sizeof(struct ip6_hdr)) {
1228 n = m_copym(m0, sizeof(struct ip6_hdr),
1229 hlen - sizeof(struct ip6_hdr), MB_DONTWAIT);
1236 /* Search for the last mbuf of unfragmentable part. */
1237 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1240 if (!(mlast->m_flags & M_EXT) &&
1241 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1242 /* use the trailing space of the last mbuf for the fragment hdr */
1244 (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1245 mlast->m_len += sizeof(struct ip6_frag);
1246 m->m_pkthdr.len += sizeof(struct ip6_frag);
1248 /* allocate a new mbuf for the fragment header */
1251 MGET(mfrg, MB_DONTWAIT, MT_DATA);
1254 mfrg->m_len = sizeof(struct ip6_frag);
1255 *frghdrp = mtod(mfrg, struct ip6_frag *);
1256 mlast->m_next = mfrg;
1263 * IP6 socket option processing.
1266 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1269 struct inpcb *in6p = so->so_pcb;
1271 int level, op, optname;
1276 level = sopt->sopt_level;
1277 op = sopt->sopt_dir;
1278 optname = sopt->sopt_name;
1279 optlen = sopt->sopt_valsize;
1282 panic("ip6_ctloutput: arg soopt is NULL");
1288 privileged = (td == NULL || suser(td)) ? 0 : 1;
1290 if (level == IPPROTO_IPV6) {
1295 case IPV6_PKTOPTIONS:
1299 error = soopt_getm(sopt, &m); /* XXX */
1302 error = soopt_mcopyin(sopt, m); /* XXX */
1305 error = ip6_pcbopts(&in6p->in6p_outputopts,
1307 m_freem(m); /* XXX */
1312 * Use of some Hop-by-Hop options or some
1313 * Destination options, might require special
1314 * privilege. That is, normal applications
1315 * (without special privilege) might be forbidden
1316 * from setting certain options in outgoing packets,
1317 * and might never see certain options in received
1318 * packets. [RFC 2292 Section 6]
1319 * KAME specific note:
1320 * KAME prevents non-privileged users from sending or
1321 * receiving ANY hbh/dst options in order to avoid
1322 * overhead of parsing options in the kernel.
1324 case IPV6_UNICAST_HOPS:
1329 if (optlen != sizeof(int)) {
1333 error = sooptcopyin(sopt, &optval,
1334 sizeof optval, sizeof optval);
1339 case IPV6_UNICAST_HOPS:
1340 if (optval < -1 || optval >= 256)
1343 /* -1 = kernel default */
1344 in6p->in6p_hops = optval;
1346 if ((in6p->in6p_vflag &
1348 in6p->inp_ip_ttl = optval;
1351 #define OPTSET(bit) \
1354 in6p->in6p_flags |= (bit); \
1356 in6p->in6p_flags &= ~(bit); \
1358 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1361 in6p->in6p_cksum = optval;
1370 * make setsockopt(IPV6_V6ONLY)
1371 * available only prior to bind(2).
1372 * see ipng mailing list, Jun 22 2001.
1374 if (in6p->in6p_lport ||
1375 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
1380 OPTSET(IN6P_IPV6_V6ONLY);
1382 in6p->in6p_vflag &= ~INP_IPV4;
1384 in6p->in6p_vflag |= INP_IPV4;
1395 if (optlen != sizeof(int)) {
1399 error = sooptcopyin(sopt, &optval,
1400 sizeof optval, sizeof optval);
1405 OPTSET(IN6P_PKTINFO);
1408 OPTSET(IN6P_HOPLIMIT);
1412 * Check super-user privilege.
1413 * See comments for IPV6_RECVHOPOPTS.
1417 OPTSET(IN6P_HOPOPTS);
1422 OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1431 case IPV6_MULTICAST_IF:
1432 case IPV6_MULTICAST_HOPS:
1433 case IPV6_MULTICAST_LOOP:
1434 case IPV6_JOIN_GROUP:
1435 case IPV6_LEAVE_GROUP:
1438 if (sopt->sopt_valsize > MLEN) {
1443 MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER);
1448 m->m_len = sopt->sopt_valsize;
1449 error = sooptcopyin(sopt, mtod(m, char *),
1450 m->m_len, m->m_len);
1451 error = ip6_setmoptions(sopt->sopt_name,
1452 &in6p->in6p_moptions,
1458 case IPV6_PORTRANGE:
1459 error = sooptcopyin(sopt, &optval,
1460 sizeof optval, sizeof optval);
1465 case IPV6_PORTRANGE_DEFAULT:
1466 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1467 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1470 case IPV6_PORTRANGE_HIGH:
1471 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1472 in6p->in6p_flags |= IN6P_HIGHPORT;
1475 case IPV6_PORTRANGE_LOW:
1476 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1477 in6p->in6p_flags |= IN6P_LOWPORT;
1486 #if defined(IPSEC) || defined(FAST_IPSEC)
1487 case IPV6_IPSEC_POLICY:
1493 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1495 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1498 req = mtod(m, caddr_t);
1501 error = ipsec6_set_policy(in6p, optname, req,
1506 #endif /* KAME IPSEC */
1514 struct mbuf **mp = &m;
1516 if (ip6_fw_ctl_ptr == NULL)
1519 if ((error = soopt_getm(sopt, &m)) != 0)
1522 if ((error = soopt_mcopyin(sopt, m)) != 0)
1524 error = (*ip6_fw_ctl_ptr)(optname, mp);
1530 error = ENOPROTOOPT;
1538 case IPV6_PKTOPTIONS:
1539 if (in6p->in6p_options) {
1541 m = m_copym(in6p->in6p_options,
1542 0, M_COPYALL, MB_WAIT);
1543 error = soopt_mcopyout(sopt, m);
1547 sopt->sopt_valsize = 0;
1550 case IPV6_UNICAST_HOPS:
1555 case IPV6_PORTRANGE:
1558 case IPV6_UNICAST_HOPS:
1559 optval = in6p->in6p_hops;
1563 optval = in6p->in6p_cksum;
1567 optval = OPTBIT(IN6P_FAITH);
1571 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1574 case IPV6_PORTRANGE:
1577 flags = in6p->in6p_flags;
1578 if (flags & IN6P_HIGHPORT)
1579 optval = IPV6_PORTRANGE_HIGH;
1580 else if (flags & IN6P_LOWPORT)
1581 optval = IPV6_PORTRANGE_LOW;
1587 error = sooptcopyout(sopt, &optval,
1596 if (optname == IPV6_HOPOPTS ||
1597 optname == IPV6_DSTOPTS ||
1602 optval = OPTBIT(IN6P_PKTINFO);
1605 optval = OPTBIT(IN6P_HOPLIMIT);
1610 optval = OPTBIT(IN6P_HOPOPTS);
1613 optval = OPTBIT(IN6P_RTHDR);
1618 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1621 error = sooptcopyout(sopt, &optval,
1625 case IPV6_MULTICAST_IF:
1626 case IPV6_MULTICAST_HOPS:
1627 case IPV6_MULTICAST_LOOP:
1628 case IPV6_JOIN_GROUP:
1629 case IPV6_LEAVE_GROUP:
1632 error = ip6_getmoptions(sopt->sopt_name,
1633 in6p->in6p_moptions, &m);
1635 error = sooptcopyout(sopt,
1636 mtod(m, char *), m->m_len);
1641 #if defined(IPSEC) || defined(FAST_IPSEC)
1642 case IPV6_IPSEC_POLICY:
1646 struct mbuf *m = NULL;
1647 struct mbuf **mp = &m;
1649 error = soopt_getm(sopt, &m); /* XXX */
1652 error = soopt_mcopyin(sopt, m); /* XXX */
1656 req = mtod(m, caddr_t);
1659 error = ipsec6_get_policy(in6p, req, len, mp);
1661 error = soopt_mcopyout(sopt, m); /*XXX*/
1662 if (error == 0 && m != NULL)
1666 #endif /* KAME IPSEC */
1671 struct mbuf **mp = &m;
1673 if (ip6_fw_ctl_ptr == NULL)
1677 error = (*ip6_fw_ctl_ptr)(optname, mp);
1679 error = soopt_mcopyout(sopt, m); /* XXX */
1680 if (error == 0 && m != NULL)
1686 error = ENOPROTOOPT;
1698 * Set up IP6 options in pcb for insertion in output packets or
1699 * specifying behavior of outgoing packets.
1702 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
1703 struct sockopt *sopt)
1705 struct ip6_pktopts *opt = *pktopt;
1707 struct thread *td = sopt->sopt_td;
1710 /* turn off any old options. */
1713 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
1714 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
1715 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
1716 printf("ip6_pcbopts: all specified options are cleared.\n");
1718 ip6_clearpktopts(opt, 1, -1);
1720 opt = kmalloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
1723 if (m == NULL || m->m_len == 0) {
1725 * Only turning off any previous options, regardless of
1726 * whether the opt is just created or given.
1728 kfree(opt, M_IP6OPT);
1732 /* set options specified by user. */
1735 if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) {
1736 ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */
1737 kfree(opt, M_IP6OPT);
1745 * initialize ip6_pktopts. beware that there are non-zero default values in
1749 init_ip6pktopts(struct ip6_pktopts *opt)
1752 bzero(opt, sizeof(*opt));
1753 opt->ip6po_hlim = -1; /* -1 means default hop limit */
1757 ip6_clearpktopts(struct ip6_pktopts *pktopt, int needfree, int optname)
1762 if (optname == -1) {
1763 if (needfree && pktopt->ip6po_pktinfo)
1764 kfree(pktopt->ip6po_pktinfo, M_IP6OPT);
1765 pktopt->ip6po_pktinfo = NULL;
1768 pktopt->ip6po_hlim = -1;
1769 if (optname == -1) {
1770 if (needfree && pktopt->ip6po_nexthop)
1771 kfree(pktopt->ip6po_nexthop, M_IP6OPT);
1772 pktopt->ip6po_nexthop = NULL;
1774 if (optname == -1) {
1775 if (needfree && pktopt->ip6po_hbh)
1776 kfree(pktopt->ip6po_hbh, M_IP6OPT);
1777 pktopt->ip6po_hbh = NULL;
1779 if (optname == -1) {
1780 if (needfree && pktopt->ip6po_dest1)
1781 kfree(pktopt->ip6po_dest1, M_IP6OPT);
1782 pktopt->ip6po_dest1 = NULL;
1784 if (optname == -1) {
1785 if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
1786 kfree(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
1787 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
1788 if (pktopt->ip6po_route.ro_rt) {
1789 RTFREE(pktopt->ip6po_route.ro_rt);
1790 pktopt->ip6po_route.ro_rt = NULL;
1793 if (optname == -1) {
1794 if (needfree && pktopt->ip6po_dest2)
1795 kfree(pktopt->ip6po_dest2, M_IP6OPT);
1796 pktopt->ip6po_dest2 = NULL;
1800 #define PKTOPT_EXTHDRCPY(type) \
1804 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
1805 dst->type = kmalloc(hlen, M_IP6OPT, canwait);\
1806 if (dst->type == NULL)\
1808 bcopy(src->type, dst->type, hlen);\
1812 struct ip6_pktopts *
1813 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
1815 struct ip6_pktopts *dst;
1818 printf("ip6_clearpktopts: invalid argument\n");
1822 dst = kmalloc(sizeof(*dst), M_IP6OPT, canwait);
1825 bzero(dst, sizeof(*dst));
1827 dst->ip6po_hlim = src->ip6po_hlim;
1828 if (src->ip6po_pktinfo) {
1829 dst->ip6po_pktinfo = kmalloc(sizeof(*dst->ip6po_pktinfo),
1831 if (dst->ip6po_pktinfo == NULL)
1833 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
1835 if (src->ip6po_nexthop) {
1836 dst->ip6po_nexthop = kmalloc(src->ip6po_nexthop->sa_len,
1838 if (dst->ip6po_nexthop == NULL)
1840 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
1841 src->ip6po_nexthop->sa_len);
1843 PKTOPT_EXTHDRCPY(ip6po_hbh);
1844 PKTOPT_EXTHDRCPY(ip6po_dest1);
1845 PKTOPT_EXTHDRCPY(ip6po_dest2);
1846 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
1850 if (dst->ip6po_pktinfo) kfree(dst->ip6po_pktinfo, M_IP6OPT);
1851 if (dst->ip6po_nexthop) kfree(dst->ip6po_nexthop, M_IP6OPT);
1852 if (dst->ip6po_hbh) kfree(dst->ip6po_hbh, M_IP6OPT);
1853 if (dst->ip6po_dest1) kfree(dst->ip6po_dest1, M_IP6OPT);
1854 if (dst->ip6po_dest2) kfree(dst->ip6po_dest2, M_IP6OPT);
1855 if (dst->ip6po_rthdr) kfree(dst->ip6po_rthdr, M_IP6OPT);
1856 kfree(dst, M_IP6OPT);
1859 #undef PKTOPT_EXTHDRCPY
1862 ip6_freepcbopts(struct ip6_pktopts *pktopt)
1867 ip6_clearpktopts(pktopt, 1, -1);
1869 kfree(pktopt, M_IP6OPT);
1873 * Set the IP6 multicast options in response to user setsockopt().
1876 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
1879 u_int loop, ifindex;
1880 struct ipv6_mreq *mreq;
1882 struct ip6_moptions *im6o = *im6op;
1883 struct route_in6 ro;
1884 struct sockaddr_in6 *dst;
1885 struct in6_multi_mship *imm;
1886 struct thread *td = curthread; /* XXX */
1890 * No multicast option buffer attached to the pcb;
1891 * allocate one and initialize to default values.
1893 im6o = (struct ip6_moptions *)
1894 kmalloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
1899 im6o->im6o_multicast_ifp = NULL;
1900 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1901 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
1902 LIST_INIT(&im6o->im6o_memberships);
1907 case IPV6_MULTICAST_IF:
1909 * Select the interface for outgoing multicast packets.
1911 if (m == NULL || m->m_len != sizeof(u_int)) {
1915 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
1916 if (ifindex < 0 || if_index < ifindex) {
1917 error = ENXIO; /* XXX EINVAL? */
1920 ifp = ifindex2ifnet[ifindex];
1921 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
1922 error = EADDRNOTAVAIL;
1925 im6o->im6o_multicast_ifp = ifp;
1928 case IPV6_MULTICAST_HOPS:
1931 * Set the IP6 hoplimit for outgoing multicast packets.
1934 if (m == NULL || m->m_len != sizeof(int)) {
1938 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
1939 if (optval < -1 || optval >= 256)
1941 else if (optval == -1)
1942 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1944 im6o->im6o_multicast_hlim = optval;
1948 case IPV6_MULTICAST_LOOP:
1950 * Set the loopback flag for outgoing multicast packets.
1951 * Must be zero or one.
1953 if (m == NULL || m->m_len != sizeof(u_int)) {
1957 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
1962 im6o->im6o_multicast_loop = loop;
1965 case IPV6_JOIN_GROUP:
1967 * Add a multicast group membership.
1968 * Group must be a valid IP6 multicast address.
1970 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1974 mreq = mtod(m, struct ipv6_mreq *);
1975 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1977 * We use the unspecified address to specify to accept
1978 * all multicast addresses. Only super user is allowed
1986 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1992 * If the interface is specified, validate it.
1994 if (mreq->ipv6mr_interface < 0
1995 || if_index < mreq->ipv6mr_interface) {
1996 error = ENXIO; /* XXX EINVAL? */
2000 * If no interface was explicitly specified, choose an
2001 * appropriate one according to the given multicast address.
2003 if (mreq->ipv6mr_interface == 0) {
2005 * If the multicast address is in node-local scope,
2006 * the interface should be a loopback interface.
2007 * Otherwise, look up the routing table for the
2008 * address, and choose the outgoing interface.
2009 * XXX: is it a good approach?
2011 if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
2015 dst = (struct sockaddr_in6 *)&ro.ro_dst;
2016 bzero(dst, sizeof(*dst));
2017 dst->sin6_len = sizeof(struct sockaddr_in6);
2018 dst->sin6_family = AF_INET6;
2019 dst->sin6_addr = mreq->ipv6mr_multiaddr;
2020 rtalloc((struct route *)&ro);
2021 if (ro.ro_rt == NULL) {
2022 error = EADDRNOTAVAIL;
2025 ifp = ro.ro_rt->rt_ifp;
2029 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2032 * See if we found an interface, and confirm that it
2033 * supports multicast
2035 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
2036 error = EADDRNOTAVAIL;
2040 * Put interface index into the multicast address,
2041 * if the address has link-local scope.
2043 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2044 mreq->ipv6mr_multiaddr.s6_addr16[1]
2045 = htons(mreq->ipv6mr_interface);
2048 * See if the membership already exists.
2050 for (imm = im6o->im6o_memberships.lh_first;
2051 imm != NULL; imm = imm->i6mm_chain.le_next)
2052 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2053 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2054 &mreq->ipv6mr_multiaddr))
2061 * Everything looks good; add a new record to the multicast
2062 * address list for the given interface.
2064 imm = kmalloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2069 if ((imm->i6mm_maddr =
2070 in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2071 kfree(imm, M_IPMADDR);
2074 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2077 case IPV6_LEAVE_GROUP:
2079 * Drop a multicast group membership.
2080 * Group must be a valid IP6 multicast address.
2082 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2086 mreq = mtod(m, struct ipv6_mreq *);
2087 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2092 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2097 * If an interface address was specified, get a pointer
2098 * to its ifnet structure.
2100 if (mreq->ipv6mr_interface < 0
2101 || if_index < mreq->ipv6mr_interface) {
2102 error = ENXIO; /* XXX EINVAL? */
2105 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2107 * Put interface index into the multicast address,
2108 * if the address has link-local scope.
2110 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2111 mreq->ipv6mr_multiaddr.s6_addr16[1]
2112 = htons(mreq->ipv6mr_interface);
2115 * Find the membership in the membership list.
2117 for (imm = im6o->im6o_memberships.lh_first;
2118 imm != NULL; imm = imm->i6mm_chain.le_next) {
2120 imm->i6mm_maddr->in6m_ifp == ifp) &&
2121 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2122 &mreq->ipv6mr_multiaddr))
2126 /* Unable to resolve interface */
2127 error = EADDRNOTAVAIL;
2131 * Give up the multicast address record to which the
2132 * membership points.
2134 LIST_REMOVE(imm, i6mm_chain);
2135 in6_delmulti(imm->i6mm_maddr);
2136 kfree(imm, M_IPMADDR);
2145 * If all options have default values, no need to keep the mbuf.
2147 if (im6o->im6o_multicast_ifp == NULL &&
2148 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2149 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2150 im6o->im6o_memberships.lh_first == NULL) {
2151 kfree(*im6op, M_IPMOPTS);
2159 * Return the IP6 multicast options in response to user getsockopt().
2162 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2164 u_int *hlim, *loop, *ifindex;
2166 *mp = m_get(MB_WAIT, MT_HEADER); /* XXX */
2170 case IPV6_MULTICAST_IF:
2171 ifindex = mtod(*mp, u_int *);
2172 (*mp)->m_len = sizeof(u_int);
2173 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2176 *ifindex = im6o->im6o_multicast_ifp->if_index;
2179 case IPV6_MULTICAST_HOPS:
2180 hlim = mtod(*mp, u_int *);
2181 (*mp)->m_len = sizeof(u_int);
2183 *hlim = ip6_defmcasthlim;
2185 *hlim = im6o->im6o_multicast_hlim;
2188 case IPV6_MULTICAST_LOOP:
2189 loop = mtod(*mp, u_int *);
2190 (*mp)->m_len = sizeof(u_int);
2192 *loop = ip6_defmcasthlim;
2194 *loop = im6o->im6o_multicast_loop;
2198 return (EOPNOTSUPP);
2203 * Discard the IP6 multicast options.
2206 ip6_freemoptions(struct ip6_moptions *im6o)
2208 struct in6_multi_mship *imm;
2213 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2214 LIST_REMOVE(imm, i6mm_chain);
2215 if (imm->i6mm_maddr)
2216 in6_delmulti(imm->i6mm_maddr);
2217 kfree(imm, M_IPMADDR);
2219 kfree(im6o, M_IPMOPTS);
2223 * Set IPv6 outgoing packet options based on advanced API.
2226 ip6_setpktoptions(struct mbuf *control, struct ip6_pktopts *opt, int priv,
2229 struct cmsghdr *cm = NULL;
2231 if (control == NULL || opt == NULL)
2234 init_ip6pktopts(opt);
2237 * XXX: Currently, we assume all the optional information is stored
2240 if (control->m_next)
2243 for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2244 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2245 cm = mtod(control, struct cmsghdr *);
2246 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2248 if (cm->cmsg_level != IPPROTO_IPV6)
2252 * XXX should check if RFC2292 API is mixed with 2292bis API
2254 switch (cm->cmsg_type) {
2256 if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
2259 /* XXX: Is it really WAITOK? */
2260 opt->ip6po_pktinfo =
2261 kmalloc(sizeof(struct in6_pktinfo),
2262 M_IP6OPT, M_WAITOK);
2263 bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo,
2264 sizeof(struct in6_pktinfo));
2266 opt->ip6po_pktinfo =
2267 (struct in6_pktinfo *)CMSG_DATA(cm);
2268 if (opt->ip6po_pktinfo->ipi6_ifindex &&
2269 IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
2270 opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
2271 htons(opt->ip6po_pktinfo->ipi6_ifindex);
2273 if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
2274 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
2279 * Check if the requested source address is indeed a
2280 * unicast address assigned to the node, and can be
2281 * used as the packet's source address.
2283 if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2284 struct in6_ifaddr *ia6;
2285 struct sockaddr_in6 sin6;
2287 bzero(&sin6, sizeof(sin6));
2288 sin6.sin6_len = sizeof(sin6);
2289 sin6.sin6_family = AF_INET6;
2291 opt->ip6po_pktinfo->ipi6_addr;
2292 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
2294 (ia6->ia6_flags & (IN6_IFF_ANYCAST |
2295 IN6_IFF_NOTREADY)) != 0)
2296 return (EADDRNOTAVAIL);
2301 if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
2304 opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
2305 if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
2313 if (cm->cmsg_len < sizeof(u_char) ||
2314 /* check if cmsg_len is large enough for sa_len */
2315 cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
2319 opt->ip6po_nexthop =
2320 kmalloc(*CMSG_DATA(cm),
2321 M_IP6OPT, M_WAITOK);
2322 bcopy(CMSG_DATA(cm),
2326 opt->ip6po_nexthop =
2327 (struct sockaddr *)CMSG_DATA(cm);
2332 struct ip6_hbh *hbh;
2335 if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
2337 hbh = (struct ip6_hbh *)CMSG_DATA(cm);
2338 hbhlen = (hbh->ip6h_len + 1) << 3;
2339 if (cm->cmsg_len != CMSG_LEN(hbhlen))
2344 kmalloc(hbhlen, M_IP6OPT, M_WAITOK);
2345 bcopy(hbh, opt->ip6po_hbh, hbhlen);
2347 opt->ip6po_hbh = hbh;
2353 struct ip6_dest *dest, **newdest;
2356 if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
2358 dest = (struct ip6_dest *)CMSG_DATA(cm);
2359 destlen = (dest->ip6d_len + 1) << 3;
2360 if (cm->cmsg_len != CMSG_LEN(destlen))
2364 * The old advacned API is ambiguous on this
2365 * point. Our approach is to determine the
2366 * position based according to the existence
2367 * of a routing header. Note, however, that
2368 * this depends on the order of the extension
2369 * headers in the ancillary data; the 1st part
2370 * of the destination options header must
2371 * appear before the routing header in the
2372 * ancillary data, too.
2373 * RFC2292bis solved the ambiguity by
2374 * introducing separate cmsg types.
2376 if (opt->ip6po_rthdr == NULL)
2377 newdest = &opt->ip6po_dest1;
2379 newdest = &opt->ip6po_dest2;
2382 *newdest = kmalloc(destlen, M_IP6OPT, M_WAITOK);
2383 bcopy(dest, *newdest, destlen);
2392 struct ip6_rthdr *rth;
2395 if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
2397 rth = (struct ip6_rthdr *)CMSG_DATA(cm);
2398 rthlen = (rth->ip6r_len + 1) << 3;
2399 if (cm->cmsg_len != CMSG_LEN(rthlen))
2402 switch (rth->ip6r_type) {
2403 case IPV6_RTHDR_TYPE_0:
2404 /* must contain one addr */
2405 if (rth->ip6r_len == 0)
2407 /* length must be even */
2408 if (rth->ip6r_len % 2)
2410 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2414 return (EINVAL); /* not supported */
2418 opt->ip6po_rthdr = kmalloc(rthlen, M_IP6OPT,
2420 bcopy(rth, opt->ip6po_rthdr, rthlen);
2422 opt->ip6po_rthdr = rth;
2428 return (ENOPROTOOPT);
2436 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2437 * packet to the input queue of a specified interface. Note that this
2438 * calls the output routine of the loopback "driver", but with an interface
2439 * pointer that might NOT be &loif -- easier than replicating that code here.
2442 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
2445 struct ip6_hdr *ip6;
2447 copym = m_copy(m, 0, M_COPYALL);
2452 * Make sure to deep-copy IPv6 header portion in case the data
2453 * is in an mbuf cluster, so that we can safely override the IPv6
2454 * header portion later.
2456 if ((copym->m_flags & M_EXT) != 0 ||
2457 copym->m_len < sizeof(struct ip6_hdr)) {
2458 copym = m_pullup(copym, sizeof(struct ip6_hdr));
2464 if (copym->m_len < sizeof(*ip6)) {
2470 ip6 = mtod(copym, struct ip6_hdr *);
2472 * clear embedded scope identifiers if necessary.
2473 * in6_clearscope will touch the addresses only when necessary.
2475 in6_clearscope(&ip6->ip6_src);
2476 in6_clearscope(&ip6->ip6_dst);
2478 if_simloop(ifp, copym, dst->sin6_family, NULL);
2482 * Separate the IPv6 header from the payload into its own mbuf.
2484 * Returns the new mbuf chain or the original mbuf if no payload.
2485 * Returns NULL if can't allocate new mbuf for header.
2487 static struct mbuf *
2488 ip6_splithdr(struct mbuf *m)
2492 if (m->m_len <= sizeof(struct ip6_hdr)) /* no payload */
2495 MGETHDR(mh, MB_DONTWAIT, MT_HEADER);
2498 mh->m_len = sizeof(struct ip6_hdr);
2499 M_MOVE_PKTHDR(mh, m);
2500 MH_ALIGN(mh, sizeof(struct ip6_hdr));
2501 bcopy(mtod(m, caddr_t), mtod(mh, caddr_t), sizeof(struct ip6_hdr));
2502 m->m_data += sizeof(struct ip6_hdr);
2503 m->m_len -= sizeof(struct ip6_hdr);
2509 * Compute IPv6 extension header length.
2512 ip6_optlen(struct in6pcb *in6p)
2516 if (!in6p->in6p_outputopts)
2521 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
2523 len += elen(in6p->in6p_outputopts->ip6po_hbh);
2524 if (in6p->in6p_outputopts->ip6po_rthdr)
2525 /* dest1 is valid with rthdr only */
2526 len += elen(in6p->in6p_outputopts->ip6po_dest1);
2527 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
2528 len += elen(in6p->in6p_outputopts->ip6po_dest2);