network - Major netmsg retooling, part 1
[dragonfly.git] / sys / netinet6 / ip6_output.c
1 /*      $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.13.2.18 2003/01/24 05:11:35 sam Exp $       */
2 /*      $DragonFly: src/sys/netinet6/ip6_output.c,v 1.37 2008/09/04 09:08:22 hasso Exp $        */
3 /*      $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $    */
4
5 /*
6  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the project nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 /*
35  * Copyright (c) 1982, 1986, 1988, 1990, 1993
36  *      The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *      This product includes software developed by the University of
49  *      California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *      @(#)ip_output.c 8.3 (Berkeley) 1/21/94
67  */
68
69 #include "opt_ip6fw.h"
70 #include "opt_inet.h"
71 #include "opt_inet6.h"
72 #include "opt_ipsec.h"
73
74 #include <sys/param.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/errno.h>
78 #include <sys/protosw.h>
79 #include <sys/socket.h>
80 #include <sys/socketvar.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/proc.h>
84 #include <sys/priv.h>
85
86 #include <sys/thread2.h>
87 #include <sys/msgport2.h>
88
89 #include <net/if.h>
90 #include <net/route.h>
91 #include <net/pfil.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_var.h>
95 #include <netinet6/in6_var.h>
96 #include <netinet/ip6.h>
97 #include <netinet/icmp6.h>
98 #include <netinet6/ip6_var.h>
99 #include <netinet/in_pcb.h>
100 #include <netinet6/nd6.h>
101 #include <netinet6/ip6protosw.h>
102
103 #ifdef IPSEC
104 #include <netinet6/ipsec.h>
105 #ifdef INET6
106 #include <netinet6/ipsec6.h>
107 #endif
108 #include <netproto/key/key.h>
109 #endif /* IPSEC */
110
111 #ifdef FAST_IPSEC
112 #include <netproto/ipsec/ipsec.h>
113 #include <netproto/ipsec/ipsec6.h>
114 #include <netproto/ipsec/key.h>
115 #endif
116
117 #include <net/ip6fw/ip6_fw.h>
118
119 #include <net/net_osdep.h>
120
121 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
122
123 struct ip6_exthdrs {
124         struct mbuf *ip6e_ip6;
125         struct mbuf *ip6e_hbh;
126         struct mbuf *ip6e_dest1;
127         struct mbuf *ip6e_rthdr;
128         struct mbuf *ip6e_dest2;
129 };
130
131 static int ip6_pcbopt (int, u_char *, int, struct ip6_pktopts **, int);
132 static int ip6_setpktoption (int, u_char *, int, struct ip6_pktopts *,
133          int, int, int, int);
134 static int ip6_pcbopts (struct ip6_pktopts **, struct mbuf *,
135                             struct socket *, struct sockopt *);
136 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
137 static int ip6_setmoptions (int, struct ip6_moptions **, struct mbuf *);
138 static int ip6_getmoptions (int, struct ip6_moptions *, struct mbuf **);
139 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
140         struct ifnet *, struct in6_addr *, u_long *, int *);
141 static int copyexthdr (void *, struct mbuf **);
142 static int ip6_insertfraghdr (struct mbuf *, struct mbuf *, int,
143                                   struct ip6_frag **);
144 static int ip6_insert_jumboopt (struct ip6_exthdrs *, u_int32_t);
145 static struct mbuf *ip6_splithdr (struct mbuf *);
146 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
147
148 /*
149  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
150  * header (with pri, len, nxt, hlim, src, dst).
151  * This function may modify ver and hlim only.
152  * The mbuf chain containing the packet will be freed.
153  * The mbuf opt, if present, will not be freed.
154  *
155  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
156  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
157  * which is rt_rmx.rmx_mtu.
158  */
159 int
160 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
161            int flags, struct ip6_moptions *im6o,
162            struct ifnet **ifpp,         /* XXX: just for statistics */
163            struct inpcb *inp)
164 {
165         struct ip6_hdr *ip6, *mhip6;
166         struct ifnet *ifp, *origifp;
167         struct mbuf *m = m0;
168         struct mbuf *mprev;
169         u_char *nexthdrp;
170         int hlen, tlen, len, off;
171         struct route_in6 ip6route;
172         struct sockaddr_in6 *dst;
173         int error = 0;
174         struct in6_ifaddr *ia = NULL;
175         u_long mtu;
176         int alwaysfrag, dontfrag;
177         u_int32_t optlen, plen = 0, unfragpartlen;
178         struct ip6_exthdrs exthdrs;
179         struct in6_addr finaldst;
180         struct route_in6 *ro_pmtu = NULL;
181         boolean_t hdrsplit = FALSE;
182         boolean_t needipsec = FALSE;
183 #ifdef IPSEC
184         boolean_t needipsectun = FALSE;
185         struct secpolicy *sp = NULL;
186         struct socket *so = inp ? inp->inp_socket : NULL;
187
188         ip6 = mtod(m, struct ip6_hdr *);
189 #endif
190 #ifdef FAST_IPSEC
191         boolean_t needipsectun = FALSE;
192         struct secpolicy *sp = NULL;
193
194         ip6 = mtod(m, struct ip6_hdr *);
195 #endif
196
197         bzero(&exthdrs, sizeof exthdrs);
198
199         if (opt) {
200                 if ((error = copyexthdr(opt->ip6po_hbh, &exthdrs.ip6e_hbh)))
201                         goto freehdrs;
202                 if ((error = copyexthdr(opt->ip6po_dest1, &exthdrs.ip6e_dest1)))
203                         goto freehdrs;
204                 if ((error = copyexthdr(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr)))
205                         goto freehdrs;
206                 if ((error = copyexthdr(opt->ip6po_dest2, &exthdrs.ip6e_dest2)))
207                         goto freehdrs;
208         }
209
210 #ifdef IPSEC
211         /* get a security policy for this packet */
212         if (so == NULL)
213                 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
214         else
215                 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
216
217         if (sp == NULL) {
218                 ipsec6stat.out_inval++;
219                 goto freehdrs;
220         }
221
222         error = 0;
223
224         /* check policy */
225         switch (sp->policy) {
226         case IPSEC_POLICY_DISCARD:
227                 /*
228                  * This packet is just discarded.
229                  */
230                 ipsec6stat.out_polvio++;
231                 goto freehdrs;
232
233         case IPSEC_POLICY_BYPASS:
234         case IPSEC_POLICY_NONE:
235                 /* no need to do IPsec. */
236                 needipsec = FALSE;
237                 break;
238
239         case IPSEC_POLICY_IPSEC:
240                 if (sp->req == NULL) {
241                         error = key_spdacquire(sp);     /* acquire a policy */
242                         goto freehdrs;
243                 }
244                 needipsec = TRUE;
245                 break;
246
247         case IPSEC_POLICY_ENTRUST:
248         default:
249                 kprintf("ip6_output: Invalid policy found. %d\n", sp->policy);
250         }
251 #endif /* IPSEC */
252 #ifdef FAST_IPSEC
253         /* get a security policy for this packet */
254         if (inp == NULL)
255                 sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
256         else
257                 sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
258
259         if (sp == NULL) {
260                 newipsecstat.ips_out_inval++;
261                 goto freehdrs;
262         }
263
264         error = 0;
265
266         /* check policy */
267         switch (sp->policy) {
268         case IPSEC_POLICY_DISCARD:
269                 /*
270                  * This packet is just discarded.
271                  */
272                 newipsecstat.ips_out_polvio++;
273                 goto freehdrs;
274
275         case IPSEC_POLICY_BYPASS:
276         case IPSEC_POLICY_NONE:
277                 /* no need to do IPsec. */
278                 needipsec = FALSE;
279                 break;
280
281         case IPSEC_POLICY_IPSEC:
282                 if (sp->req == NULL) {
283                         error = key_spdacquire(sp);     /* acquire a policy */
284                         goto freehdrs;
285                 }
286                 needipsec = TRUE;
287                 break;
288
289         case IPSEC_POLICY_ENTRUST:
290         default:
291                 kprintf("ip6_output: Invalid policy found. %d\n", sp->policy);
292         }
293 #endif /* FAST_IPSEC */
294
295         /*
296          * Calculate the total length of the extension header chain.
297          * Keep the length of the unfragmentable part for fragmentation.
298          */
299         optlen = m_lengthm(exthdrs.ip6e_hbh, NULL) +
300             m_lengthm(exthdrs.ip6e_dest1, NULL) +
301             m_lengthm(exthdrs.ip6e_rthdr, NULL);
302
303         unfragpartlen = optlen + sizeof(struct ip6_hdr);
304
305         /* NOTE: we don't add AH/ESP length here. do that later. */
306         optlen += m_lengthm(exthdrs.ip6e_dest2, NULL);
307
308         /*
309          * If we need IPsec, or there is at least one extension header,
310          * separate IP6 header from the payload.
311          */
312         if ((needipsec || optlen) && !hdrsplit) {
313                 exthdrs.ip6e_ip6 = ip6_splithdr(m);
314                 if (exthdrs.ip6e_ip6 == NULL) {
315                         error = ENOBUFS;
316                         goto freehdrs;
317                 }
318                 m = exthdrs.ip6e_ip6;
319                 hdrsplit = TRUE;
320         }
321
322         /* adjust pointer */
323         ip6 = mtod(m, struct ip6_hdr *);
324
325         /* adjust mbuf packet header length */
326         m->m_pkthdr.len += optlen;
327         plen = m->m_pkthdr.len - sizeof(*ip6);
328
329         /* If this is a jumbo payload, insert a jumbo payload option. */
330         if (plen > IPV6_MAXPACKET) {
331                 if (!hdrsplit) {
332                         exthdrs.ip6e_ip6 = ip6_splithdr(m);
333                         if (exthdrs.ip6e_ip6 == NULL) {
334                                 error = ENOBUFS;
335                                 goto freehdrs;
336                         }
337                         m = exthdrs.ip6e_ip6;
338                         hdrsplit = TRUE;
339                 }
340                 /* adjust pointer */
341                 ip6 = mtod(m, struct ip6_hdr *);
342                 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
343                         goto freehdrs;
344                 ip6->ip6_plen = 0;
345         } else
346                 ip6->ip6_plen = htons(plen);
347
348         /*
349          * Concatenate headers and fill in next header fields.
350          * Here we have, on "m"
351          *      IPv6 payload
352          * and we insert headers accordingly.  Finally, we should be getting:
353          *      IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
354          *
355          * during the header composing process, "m" points to IPv6 header.
356          * "mprev" points to an extension header prior to esp.
357          */
358
359         nexthdrp = &ip6->ip6_nxt;
360         mprev = m;
361
362         /*
363          * we treat dest2 specially.  this makes IPsec processing
364          * much easier.  the goal here is to make mprev point the
365          * mbuf prior to dest2.
366          *
367          * result: IPv6 dest2 payload
368          * m and mprev will point to IPv6 header.
369          */
370         if (exthdrs.ip6e_dest2) {
371                 if (!hdrsplit)
372                         panic("assumption failed: hdr not split");
373                 exthdrs.ip6e_dest2->m_next = m->m_next;
374                 m->m_next = exthdrs.ip6e_dest2;
375                 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
376                 ip6->ip6_nxt = IPPROTO_DSTOPTS;
377         }
378
379 /*
380  * Place m1 after mprev.
381  */
382 #define MAKE_CHAIN(m1, mprev, nexthdrp, i)\
383     do {\
384         if (m1) {\
385                 if (!hdrsplit)\
386                         panic("assumption failed: hdr not split");\
387                 *mtod(m1, u_char *) = *nexthdrp;\
388                 *nexthdrp = (i);\
389                 nexthdrp = mtod(m1, u_char *);\
390                 m1->m_next = mprev->m_next;\
391                 mprev->m_next = m1;\
392                 mprev = m1;\
393         }\
394     } while (0)
395
396         /*
397          * result: IPv6 hbh dest1 rthdr dest2 payload
398          * m will point to IPv6 header.  mprev will point to the
399          * extension header prior to dest2 (rthdr in the above case).
400          */
401         MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
402         MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
403         MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
404
405 #if defined(IPSEC) || defined(FAST_IPSEC)
406         if (needipsec) {
407                 struct ipsec_output_state state;
408                 int segleft_org = 0;
409                 struct ip6_rthdr *rh = NULL;
410
411                 /*
412                  * pointers after IPsec headers are not valid any more.
413                  * other pointers need a great care too.
414                  * (IPsec routines should not mangle mbufs prior to AH/ESP)
415                  */
416                 exthdrs.ip6e_dest2 = NULL;
417
418                 if (exthdrs.ip6e_rthdr) {
419                         rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
420                         segleft_org = rh->ip6r_segleft;
421                         rh->ip6r_segleft = 0;
422                 }
423
424                 bzero(&state, sizeof state);
425                 state.m = m;
426                 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
427                                             &needipsectun);
428                 m = state.m;
429                 if (error) {
430                         /* mbuf is already reclaimed in ipsec6_output_trans. */
431                         m = NULL;
432                         switch (error) {
433                         case EHOSTUNREACH:
434                         case ENETUNREACH:
435                         case EMSGSIZE:
436                         case ENOBUFS:
437                         case ENOMEM:
438                                 break;
439                         default:
440                                 kprintf("ip6_output (ipsec): error code %d\n",
441                                        error);
442                                 /* fall through */
443                         case ENOENT:
444                                 /* don't show these error codes to the user */
445                                 error = 0;
446                                 break;
447                         }
448                         goto bad;
449                 }
450                 if (exthdrs.ip6e_rthdr) {
451                         /* ah6_output doesn't modify mbuf chain */
452                         rh->ip6r_segleft = segleft_org;
453                 }
454         }
455 #endif
456
457         /*
458          * If there is a routing header, replace destination address field
459          * with the first hop of the routing header.
460          */
461         if (exthdrs.ip6e_rthdr) {
462                 struct ip6_rthdr *rh;
463
464                 finaldst = ip6->ip6_dst;
465                 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
466                 switch (rh->ip6r_type) {
467                 default:        /* is it possible? */
468                          error = EINVAL;
469                          goto bad;
470                 }
471         }
472
473         /* Source address validation */
474         if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
475             !(flags & IPV6_DADOUTPUT)) {
476                 error = EOPNOTSUPP;
477                 ip6stat.ip6s_badscope++;
478                 goto bad;
479         }
480         if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
481                 error = EOPNOTSUPP;
482                 ip6stat.ip6s_badscope++;
483                 goto bad;
484         }
485
486         ip6stat.ip6s_localout++;
487
488         /*
489          * Route packet.
490          */
491         if (ro == NULL) {
492                 ro = &ip6route;
493                 bzero(ro, sizeof(*ro));
494         }
495         ro_pmtu = ro;
496         if (opt && opt->ip6po_rthdr)
497                 ro = &opt->ip6po_route;
498         dst = (struct sockaddr_in6 *)&ro->ro_dst;
499         /*
500          * If there is a cached route,
501          * check that it is to the same destination
502          * and is still up. If not, free it and try again.
503          */
504         if (ro->ro_rt != NULL &&
505             (!(ro->ro_rt->rt_flags & RTF_UP) || dst->sin6_family != AF_INET6 ||
506              !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
507                 RTFREE(ro->ro_rt);
508                 ro->ro_rt = NULL;
509         }
510         if (ro->ro_rt == NULL) {
511                 bzero(dst, sizeof(*dst));
512                 dst->sin6_family = AF_INET6;
513                 dst->sin6_len = sizeof(struct sockaddr_in6);
514                 dst->sin6_addr = ip6->ip6_dst;
515         }
516 #if defined(IPSEC) || defined(FAST_IPSEC)
517         if (needipsec && needipsectun) {
518                 struct ipsec_output_state state;
519
520                 /*
521                  * All the extension headers will become inaccessible
522                  * (since they can be encrypted).
523                  * Don't panic, we need no more updates to extension headers
524                  * on inner IPv6 packet (since they are now encapsulated).
525                  *
526                  * IPv6 [ESP|AH] IPv6 [extension headers] payload
527                  */
528                 bzero(&exthdrs, sizeof(exthdrs));
529                 exthdrs.ip6e_ip6 = m;
530
531                 bzero(&state, sizeof(state));
532                 state.m = m;
533                 state.ro = (struct route *)ro;
534                 state.dst = (struct sockaddr *)dst;
535
536                 error = ipsec6_output_tunnel(&state, sp, flags);
537
538                 m = state.m;
539                 ro = (struct route_in6 *)state.ro;
540                 dst = (struct sockaddr_in6 *)state.dst;
541                 if (error) {
542                         /* mbuf is already reclaimed in ipsec6_output_tunnel. */
543                         m0 = m = NULL;
544                         m = NULL;
545                         switch (error) {
546                         case EHOSTUNREACH:
547                         case ENETUNREACH:
548                         case EMSGSIZE:
549                         case ENOBUFS:
550                         case ENOMEM:
551                                 break;
552                         default:
553                                 kprintf("ip6_output (ipsec): error code %d\n", error);
554                                 /* fall through */
555                         case ENOENT:
556                                 /* don't show these error codes to the user */
557                                 error = 0;
558                                 break;
559                         }
560                         goto bad;
561                 }
562
563                 exthdrs.ip6e_ip6 = m;
564         }
565 #endif
566
567         if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
568                 /* Unicast */
569
570 #define ifatoia6(ifa)   ((struct in6_ifaddr *)(ifa))
571 #define sin6tosa(sin6)  ((struct sockaddr *)(sin6))
572                 /* xxx
573                  * interface selection comes here
574                  * if an interface is specified from an upper layer,
575                  * ifp must point it.
576                  */
577                 if (ro->ro_rt == NULL) {
578                         /*
579                          * non-bsdi always clone routes, if parent is
580                          * PRF_CLONING.
581                          */
582                         rtalloc((struct route *)ro);
583                 }
584                 if (ro->ro_rt == NULL) {
585                         ip6stat.ip6s_noroute++;
586                         error = EHOSTUNREACH;
587                         /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
588                         goto bad;
589                 }
590                 ia = ifatoia6(ro->ro_rt->rt_ifa);
591                 ifp = ro->ro_rt->rt_ifp;
592                 ro->ro_rt->rt_use++;
593                 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
594                         dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
595                 m->m_flags &= ~(M_BCAST | M_MCAST);     /* just in case */
596
597                 in6_ifstat_inc(ifp, ifs6_out_request);
598
599                 /*
600                  * Check if the outgoing interface conflicts with
601                  * the interface specified by ifi6_ifindex (if specified).
602                  * Note that loopback interface is always okay.
603                  * (this may happen when we are sending a packet to one of
604                  *  our own addresses.)
605                  */
606                 if (opt && opt->ip6po_pktinfo
607                  && opt->ip6po_pktinfo->ipi6_ifindex) {
608                         if (!(ifp->if_flags & IFF_LOOPBACK)
609                          && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
610                                 ip6stat.ip6s_noroute++;
611                                 in6_ifstat_inc(ifp, ifs6_out_discard);
612                                 error = EHOSTUNREACH;
613                                 goto bad;
614                         }
615                 }
616
617                 if (opt && opt->ip6po_hlim != -1)
618                         ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
619         } else {
620                 /* Multicast */
621                 struct  in6_multi *in6m;
622
623                 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
624
625                 /*
626                  * See if the caller provided any multicast options
627                  */
628                 ifp = NULL;
629                 if (im6o != NULL) {
630                         ip6->ip6_hlim = im6o->im6o_multicast_hlim;
631                         if (im6o->im6o_multicast_ifp != NULL)
632                                 ifp = im6o->im6o_multicast_ifp;
633                 } else
634                         ip6->ip6_hlim = ip6_defmcasthlim;
635
636                 /*
637                  * See if the caller provided the outgoing interface
638                  * as an ancillary data.
639                  * Boundary check for ifindex is assumed to be already done.
640                  */
641                 if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
642                         ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
643
644                 /*
645                  * If the destination is a node-local scope multicast,
646                  * the packet should be loop-backed only.
647                  */
648                 if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
649                         /*
650                          * If the outgoing interface is already specified,
651                          * it should be a loopback interface.
652                          */
653                         if (ifp && !(ifp->if_flags & IFF_LOOPBACK)) {
654                                 ip6stat.ip6s_badscope++;
655                                 error = ENETUNREACH; /* XXX: better error? */
656                                 /* XXX correct ifp? */
657                                 in6_ifstat_inc(ifp, ifs6_out_discard);
658                                 goto bad;
659                         } else {
660                                 ifp = &loif[0];
661                         }
662                 }
663
664                 if (opt && opt->ip6po_hlim != -1)
665                         ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
666
667                 /*
668                  * If caller did not provide an interface lookup a
669                  * default in the routing table.  This is either a
670                  * default for the speicfied group (i.e. a host
671                  * route), or a multicast default (a route for the
672                  * ``net'' ff00::/8).
673                  */
674                 if (ifp == NULL) {
675                         if (ro->ro_rt == NULL) {
676                                 ro->ro_rt =
677                                   rtpurelookup((struct sockaddr *)&ro->ro_dst);
678                         }
679                         if (ro->ro_rt == NULL) {
680                                 ip6stat.ip6s_noroute++;
681                                 error = EHOSTUNREACH;
682                                 /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
683                                 goto bad;
684                         }
685                         ia = ifatoia6(ro->ro_rt->rt_ifa);
686                         ifp = ro->ro_rt->rt_ifp;
687                         ro->ro_rt->rt_use++;
688                 }
689
690                 if (!(flags & IPV6_FORWARDING))
691                         in6_ifstat_inc(ifp, ifs6_out_request);
692                 in6_ifstat_inc(ifp, ifs6_out_mcast);
693
694                 /*
695                  * Confirm that the outgoing interface supports multicast.
696                  */
697                 if (!(ifp->if_flags & IFF_MULTICAST)) {
698                         ip6stat.ip6s_noroute++;
699                         in6_ifstat_inc(ifp, ifs6_out_discard);
700                         error = ENETUNREACH;
701                         goto bad;
702                 }
703                 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
704                 if (in6m != NULL &&
705                    (im6o == NULL || im6o->im6o_multicast_loop)) {
706                         /*
707                          * If we belong to the destination multicast group
708                          * on the outgoing interface, and the caller did not
709                          * forbid loopback, loop back a copy.
710                          */
711                         ip6_mloopback(ifp, m, dst);
712                 } else {
713                         /*
714                          * If we are acting as a multicast router, perform
715                          * multicast forwarding as if the packet had just
716                          * arrived on the interface to which we are about
717                          * to send.  The multicast forwarding function
718                          * recursively calls this function, using the
719                          * IPV6_FORWARDING flag to prevent infinite recursion.
720                          *
721                          * Multicasts that are looped back by ip6_mloopback(),
722                          * above, will be forwarded by the ip6_input() routine,
723                          * if necessary.
724                          */
725                         if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
726                                 if (ip6_mforward(ip6, ifp, m) != 0) {
727                                         m_freem(m);
728                                         goto done;
729                                 }
730                         }
731                 }
732                 /*
733                  * Multicasts with a hoplimit of zero may be looped back,
734                  * above, but must not be transmitted on a network.
735                  * Also, multicasts addressed to the loopback interface
736                  * are not sent -- the above call to ip6_mloopback() will
737                  * loop back a copy if this host actually belongs to the
738                  * destination group on the loopback interface.
739                  */
740                 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
741                         m_freem(m);
742                         goto done;
743                 }
744         }
745
746         /*
747          * Fill the outgoing inteface to tell the upper layer
748          * to increment per-interface statistics.
749          */
750         if (ifpp)
751                 *ifpp = ifp;
752
753         /* Determine path MTU. */
754         if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
755             &alwaysfrag)) != 0)
756                 goto bad;
757
758         /*
759          * The caller of this function may specify to use the minimum MTU
760          * in some cases.
761          * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
762          * setting.  The logic is a bit complicated; by default, unicast
763          * packets will follow path MTU while multicast packets will be sent at
764          * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
765          * including unicast ones will be sent at the minimum MTU.  Multicast
766          * packets will always be sent at the minimum MTU unless
767          * IP6PO_MINMTU_DISABLE is explicitly specified.
768          * See RFC 3542 for more details.
769          */
770         if (mtu > IPV6_MMTU) {
771                 if ((flags & IPV6_MINMTU))
772                         mtu = IPV6_MMTU;
773                 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
774                         mtu = IPV6_MMTU;
775                 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
776                          (opt == NULL ||
777                           opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
778                         mtu = IPV6_MMTU;
779                 }
780         }
781
782         /* Fake scoped addresses */
783         if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
784                 /*
785                  * If source or destination address is a scoped address, and
786                  * the packet is going to be sent to a loopback interface,
787                  * we should keep the original interface.
788                  */
789
790                 /*
791                  * XXX: this is a very experimental and temporary solution.
792                  * We eventually have sockaddr_in6 and use the sin6_scope_id
793                  * field of the structure here.
794                  * We rely on the consistency between two scope zone ids
795                  * of source and destination, which should already be assured.
796                  * Larger scopes than link will be supported in the future.
797                  */
798                 origifp = NULL;
799                 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
800                         origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
801                 else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
802                         origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
803                 /*
804                  * XXX: origifp can be NULL even in those two cases above.
805                  * For example, if we remove the (only) link-local address
806                  * from the loopback interface, and try to send a link-local
807                  * address without link-id information.  Then the source
808                  * address is ::1, and the destination address is the
809                  * link-local address with its s6_addr16[1] being zero.
810                  * What is worse, if the packet goes to the loopback interface
811                  * by a default rejected route, the null pointer would be
812                  * passed to looutput, and the kernel would hang.
813                  * The following last resort would prevent such disaster.
814                  */
815                 if (origifp == NULL)
816                         origifp = ifp;
817         }
818         else
819                 origifp = ifp;
820         /*
821          * clear embedded scope identifiers if necessary.
822          * in6_clearscope will touch the addresses only when necessary.
823          */
824         in6_clearscope(&ip6->ip6_src);
825         in6_clearscope(&ip6->ip6_dst);
826
827         /*
828          * Check with the firewall...
829          */
830         if (ip6_fw_enable && ip6_fw_chk_ptr) {
831                 u_short port = 0;
832
833                 m->m_pkthdr.rcvif = NULL;       /* XXX */
834                 /* If ipfw says divert, we have to just drop packet */
835                 if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
836                         m_freem(m);
837                         goto done;
838                 }
839                 if (!m) {
840                         error = EACCES;
841                         goto done;
842                 }
843         }
844
845         /*
846          * If the outgoing packet contains a hop-by-hop options header,
847          * it must be examined and processed even by the source node.
848          * (RFC 2460, section 4.)
849          */
850         if (exthdrs.ip6e_hbh) {
851                 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
852                 u_int32_t dummy1; /* XXX unused */
853                 u_int32_t dummy2; /* XXX unused */
854
855 #ifdef DIAGNOSTIC
856                 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
857                         panic("ip6e_hbh is not continuous");
858 #endif
859                 /*
860                  *  XXX: if we have to send an ICMPv6 error to the sender,
861                  *       we need the M_LOOP flag since icmp6_error() expects
862                  *       the IPv6 and the hop-by-hop options header are
863                  *       continuous unless the flag is set.
864                  */
865                 m->m_flags |= M_LOOP;
866                 m->m_pkthdr.rcvif = ifp;
867                 if (ip6_process_hopopts(m,
868                                         (u_int8_t *)(hbh + 1),
869                                         ((hbh->ip6h_len + 1) << 3) -
870                                         sizeof(struct ip6_hbh),
871                                         &dummy1, &dummy2) < 0) {
872                         /* m was already freed at this point */
873                         error = EINVAL;/* better error? */
874                         goto done;
875                 }
876                 m->m_flags &= ~M_LOOP; /* XXX */
877                 m->m_pkthdr.rcvif = NULL;
878         }
879
880         /*
881          * Run through list of hooks for output packets.
882          */
883         if (pfil_has_hooks(&inet6_pfil_hook)) {
884                 error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT);
885                 if (error != 0 || m == NULL)
886                         goto done;
887                 ip6 = mtod(m, struct ip6_hdr *);
888         }
889
890         /*
891          * Send the packet to the outgoing interface.
892          * If necessary, do IPv6 fragmentation before sending.
893          *
894          * the logic here is rather complex:
895          * 1: normal case (dontfrag == 0, alwaysfrag == 0)
896          * 1-a: send as is if tlen <= path mtu
897          * 1-b: fragment if tlen > path mtu
898          *
899          * 2: if user asks us not to fragment (dontfrag == 1)
900          * 2-a: send as is if tlen <= interface mtu
901          * 2-b: error if tlen > interface mtu
902          *
903          * 3: if we always need to attach fragment header (alwaysfrag == 1)
904          *      always fragment
905          *
906          * 4: if dontfrag == 1 && alwaysfrag == 1
907          *      error, as we cannot handle this conflicting request
908          */
909         tlen = m->m_pkthdr.len;
910
911         if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
912                 dontfrag = 1;
913         else
914                 dontfrag = 0;
915         if (dontfrag && alwaysfrag) {   /* case 4 */
916                 /* conflicting request - can't transmit */
917                 error = EMSGSIZE;
918                 goto bad;
919         }
920         if (dontfrag && tlen > IN6_LINKMTU(ifp)) {      /* case 2-b */
921                 /*
922                  * Even if the DONTFRAG option is specified, we cannot send the
923                  * packet when the data length is larger than the MTU of the
924                  * outgoing interface.
925                  * Notify the error by sending IPV6_PATHMTU ancillary data as
926                  * well as returning an error code (the latter is not described
927                  * in the API spec.)
928                  */
929                 u_int32_t mtu32;
930                 struct ip6ctlparam ip6cp;
931
932                 mtu32 = (u_int32_t)mtu;
933                 bzero(&ip6cp, sizeof(ip6cp));
934                 ip6cp.ip6c_cmdarg = (void *)&mtu32;
935                 kpfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
936                     (void *)&ip6cp);
937
938                 error = EMSGSIZE;
939                 goto bad;
940         }
941
942         /*
943          * transmit packet without fragmentation
944          */
945         if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
946                 struct in6_ifaddr *ia6;
947
948                 ip6 = mtod(m, struct ip6_hdr *);
949                 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
950                 if (ia6) {
951                         /* Record statistics for this interface address. */
952                         ia6->ia_ifa.if_opackets++;
953                         ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
954                 }
955 #ifdef IPSEC
956                 /* clean ipsec history once it goes out of the node */
957                 ipsec_delaux(m);
958 #endif
959                 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
960                 goto done;
961         } 
962
963         /*
964          * try to fragment the packet.  case 1-b and 3
965          */
966         if (mtu < IPV6_MMTU) {
967                 /*
968                  * note that path MTU is never less than IPV6_MMTU
969                  * (see icmp6_input).
970                  */
971                 error = EMSGSIZE;
972                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
973                 goto bad;
974         } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
975                 error = EMSGSIZE;
976                 in6_ifstat_inc(ifp, ifs6_out_fragfail);
977                 goto bad;
978         } else {
979                 struct mbuf **mnext, *m_frgpart;
980                 struct ip6_frag *ip6f;
981                 u_int32_t id = htonl(ip6_id++);
982                 int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
983                 u_char nextproto;
984
985                 /*
986                  * Too large for the destination or interface;
987                  * fragment if possible.
988                  * Must be able to put at least 8 bytes per fragment.
989                  */
990                 hlen = unfragpartlen;
991                 if (mtu > IPV6_MAXPACKET)
992                         mtu = IPV6_MAXPACKET;
993
994                 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
995                 if (len < 8) {
996                         error = EMSGSIZE;
997                         in6_ifstat_inc(ifp, ifs6_out_fragfail);
998                         goto bad;
999                 }
1000
1001                 /*
1002                  * Verify that we have any chance at all of being able to queue
1003                  *      the packet or packet fragments
1004                  */
1005                 if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
1006                     < tlen  /* - hlen */)) {
1007                         error = ENOBUFS;
1008                         ip6stat.ip6s_odropped++;
1009                         goto bad;
1010                 }
1011
1012                 mnext = &m->m_nextpkt;
1013
1014                 /*
1015                  * Change the next header field of the last header in the
1016                  * unfragmentable part.
1017                  */
1018                 if (exthdrs.ip6e_rthdr) {
1019                         nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1020                         *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1021                 } else if (exthdrs.ip6e_dest1) {
1022                         nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1023                         *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1024                 } else if (exthdrs.ip6e_hbh) {
1025                         nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1026                         *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1027                 } else {
1028                         nextproto = ip6->ip6_nxt;
1029                         ip6->ip6_nxt = IPPROTO_FRAGMENT;
1030                 }
1031
1032                 /*
1033                  * Loop through length of segment after first fragment,
1034                  * make new header and copy data of each part and link onto
1035                  * chain.
1036                  */
1037                 m0 = m;
1038                 for (off = hlen; off < tlen; off += len) {
1039                         MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1040                         if (!m) {
1041                                 error = ENOBUFS;
1042                                 ip6stat.ip6s_odropped++;
1043                                 goto sendorfree;
1044                         }
1045                         m->m_pkthdr.rcvif = NULL;
1046                         m->m_flags = m0->m_flags & M_COPYFLAGS;
1047                         *mnext = m;
1048                         mnext = &m->m_nextpkt;
1049                         m->m_data += max_linkhdr;
1050                         mhip6 = mtod(m, struct ip6_hdr *);
1051                         *mhip6 = *ip6;
1052                         m->m_len = sizeof(*mhip6);
1053                         error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1054                         if (error) {
1055                                 ip6stat.ip6s_odropped++;
1056                                 goto sendorfree;
1057                         }
1058                         ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1059                         if (off + len >= tlen)
1060                                 len = tlen - off;
1061                         else
1062                                 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1063                         mhip6->ip6_plen = htons((u_short)(len + hlen +
1064                                                           sizeof(*ip6f) -
1065                                                           sizeof(struct ip6_hdr)));
1066                         if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1067                                 error = ENOBUFS;
1068                                 ip6stat.ip6s_odropped++;
1069                                 goto sendorfree;
1070                         }
1071                         m_cat(m, m_frgpart);
1072                         m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1073                         m->m_pkthdr.rcvif = NULL;
1074                         ip6f->ip6f_reserved = 0;
1075                         ip6f->ip6f_ident = id;
1076                         ip6f->ip6f_nxt = nextproto;
1077                         ip6stat.ip6s_ofragments++;
1078                         in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1079                 }
1080
1081                 in6_ifstat_inc(ifp, ifs6_out_fragok);
1082         }
1083
1084         /*
1085          * Remove leading garbages.
1086          */
1087 sendorfree:
1088         m = m0->m_nextpkt;
1089         m0->m_nextpkt = NULL;
1090         m_freem(m0);
1091         for (m0 = m; m; m = m0) {
1092                 m0 = m->m_nextpkt;
1093                 m->m_nextpkt = NULL;
1094                 if (error == 0) {
1095                         /* Record statistics for this interface address. */
1096                         if (ia) {
1097                                 ia->ia_ifa.if_opackets++;
1098                                 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1099                         }
1100 #ifdef IPSEC
1101                         /* clean ipsec history once it goes out of the node */
1102                         ipsec_delaux(m);
1103 #endif
1104                         error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1105                 } else
1106                         m_freem(m);
1107         }
1108
1109         if (error == 0)
1110                 ip6stat.ip6s_fragmented++;
1111
1112 done:
1113         if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1114                 RTFREE(ro->ro_rt);
1115         } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1116                 RTFREE(ro_pmtu->ro_rt);
1117         }
1118
1119 #ifdef IPSEC
1120         if (sp != NULL)
1121                 key_freesp(sp);
1122 #endif
1123 #ifdef FAST_IPSEC
1124         if (sp != NULL)
1125                 KEY_FREESP(&sp);
1126 #endif
1127
1128         return (error);
1129
1130 freehdrs:
1131         m_freem(exthdrs.ip6e_hbh);      /* m_freem will check if mbuf is 0 */
1132         m_freem(exthdrs.ip6e_dest1);
1133         m_freem(exthdrs.ip6e_rthdr);
1134         m_freem(exthdrs.ip6e_dest2);
1135         /* fall through */
1136 bad:
1137         m_freem(m);
1138         goto done;
1139 }
1140
1141 static int
1142 copyexthdr(void *h, struct mbuf **mp)
1143 {
1144         struct ip6_ext *hdr = h;
1145         int hlen;
1146         struct mbuf *m;
1147
1148         if (hdr == NULL)
1149                 return 0;
1150
1151         hlen = (hdr->ip6e_len + 1) * 8;
1152         if (hlen > MCLBYTES)
1153                 return ENOBUFS; /* XXX */
1154
1155         m = m_getb(hlen, MB_DONTWAIT, MT_DATA, 0);
1156         if (!m)
1157                 return ENOBUFS;
1158         m->m_len = hlen;
1159
1160         bcopy(hdr, mtod(m, caddr_t), hlen);
1161
1162         *mp = m;
1163         return 0;
1164 }
1165
1166 /*
1167  * Insert jumbo payload option.
1168  */
1169 static int
1170 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1171 {
1172         struct mbuf *mopt;
1173         u_char *optbuf;
1174         u_int32_t v;
1175
1176 #define JUMBOOPTLEN     8       /* length of jumbo payload option and padding */
1177
1178         /*
1179          * If there is no hop-by-hop options header, allocate new one.
1180          * If there is one but it doesn't have enough space to store the
1181          * jumbo payload option, allocate a cluster to store the whole options.
1182          * Otherwise, use it to store the options.
1183          */
1184         if (exthdrs->ip6e_hbh == NULL) {
1185                 MGET(mopt, MB_DONTWAIT, MT_DATA);
1186                 if (mopt == NULL)
1187                         return (ENOBUFS);
1188                 mopt->m_len = JUMBOOPTLEN;
1189                 optbuf = mtod(mopt, u_char *);
1190                 optbuf[1] = 0;  /* = ((JUMBOOPTLEN) >> 3) - 1 */
1191                 exthdrs->ip6e_hbh = mopt;
1192         } else {
1193                 struct ip6_hbh *hbh;
1194
1195                 mopt = exthdrs->ip6e_hbh;
1196                 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1197                         /*
1198                          * XXX assumption:
1199                          * - exthdrs->ip6e_hbh is not referenced from places
1200                          *   other than exthdrs.
1201                          * - exthdrs->ip6e_hbh is not an mbuf chain.
1202                          */
1203                         int oldoptlen = mopt->m_len;
1204                         struct mbuf *n;
1205
1206                         /*
1207                          * XXX: give up if the whole (new) hbh header does
1208                          * not fit even in an mbuf cluster.
1209                          */
1210                         if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1211                                 return (ENOBUFS);
1212
1213                         /*
1214                          * As a consequence, we must always prepare a cluster
1215                          * at this point.
1216                          */
1217                         n = m_getcl(MB_DONTWAIT, MT_DATA, 0);
1218                         if (!n)
1219                                 return (ENOBUFS);
1220                         n->m_len = oldoptlen + JUMBOOPTLEN;
1221                         bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen);
1222                         optbuf = mtod(n, caddr_t) + oldoptlen;
1223                         m_freem(mopt);
1224                         mopt = exthdrs->ip6e_hbh = n;
1225                 } else {
1226                         optbuf = mtod(mopt, u_char *) + mopt->m_len;
1227                         mopt->m_len += JUMBOOPTLEN;
1228                 }
1229                 optbuf[0] = IP6OPT_PADN;
1230                 optbuf[1] = 1;
1231
1232                 /*
1233                  * Adjust the header length according to the pad and
1234                  * the jumbo payload option.
1235                  */
1236                 hbh = mtod(mopt, struct ip6_hbh *);
1237                 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1238         }
1239
1240         /* fill in the option. */
1241         optbuf[2] = IP6OPT_JUMBO;
1242         optbuf[3] = 4;
1243         v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1244         bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1245
1246         /* finally, adjust the packet header length */
1247         exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1248
1249         return (0);
1250 #undef JUMBOOPTLEN
1251 }
1252
1253 /*
1254  * Insert fragment header and copy unfragmentable header portions.
1255  */
1256 static int
1257 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1258                   struct ip6_frag **frghdrp)
1259 {
1260         struct mbuf *n, *mlast;
1261
1262         if (hlen > sizeof(struct ip6_hdr)) {
1263                 n = m_copym(m0, sizeof(struct ip6_hdr),
1264                             hlen - sizeof(struct ip6_hdr), MB_DONTWAIT);
1265                 if (n == NULL)
1266                         return (ENOBUFS);
1267                 m->m_next = n;
1268         } else
1269                 n = m;
1270
1271         /* Search for the last mbuf of unfragmentable part. */
1272         for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1273                 ;
1274
1275         if (!(mlast->m_flags & M_EXT) &&
1276             M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1277                 /* use the trailing space of the last mbuf for the fragment hdr */
1278                 *frghdrp =
1279                         (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1280                 mlast->m_len += sizeof(struct ip6_frag);
1281                 m->m_pkthdr.len += sizeof(struct ip6_frag);
1282         } else {
1283                 /* allocate a new mbuf for the fragment header */
1284                 struct mbuf *mfrg;
1285
1286                 MGET(mfrg, MB_DONTWAIT, MT_DATA);
1287                 if (mfrg == NULL)
1288                         return (ENOBUFS);
1289                 mfrg->m_len = sizeof(struct ip6_frag);
1290                 *frghdrp = mtod(mfrg, struct ip6_frag *);
1291                 mlast->m_next = mfrg;
1292         }
1293
1294         return (0);
1295 }
1296
1297 static int
1298 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1299     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
1300     int *alwaysfragp)
1301 {
1302         u_int32_t mtu = 0;
1303         int alwaysfrag = 0;
1304         int error = 0;
1305
1306         if (ro_pmtu != ro) {
1307                 /* The first hop and the final destination may differ. */
1308                 struct sockaddr_in6 *sa6_dst =
1309                     (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1310                 if (ro_pmtu->ro_rt &&
1311                     ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1312                      !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1313                         RTFREE(ro_pmtu->ro_rt);
1314                         ro_pmtu->ro_rt = NULL;
1315                 }
1316                 if (ro_pmtu->ro_rt == NULL) {
1317                         bzero(sa6_dst, sizeof(*sa6_dst));
1318                         sa6_dst->sin6_family = AF_INET6;
1319                         sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1320                         sa6_dst->sin6_addr = *dst;
1321
1322                         rtalloc((struct route *)ro_pmtu);
1323                 }
1324         }
1325         if (ro_pmtu->ro_rt) {
1326                 u_int32_t ifmtu;
1327                 struct in_conninfo inc;
1328
1329                 bzero(&inc, sizeof(inc));
1330                 inc.inc_flags = 1; /* IPv6 */
1331                 inc.inc6_faddr = *dst;
1332
1333                 if (ifp == NULL)
1334                         ifp = ro_pmtu->ro_rt->rt_ifp;
1335                 ifmtu = IN6_LINKMTU(ifp);
1336                 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1337                 if (mtu == 0)
1338                         mtu = ifmtu;
1339                 else if (mtu < IPV6_MMTU) {
1340                         /*
1341                          * RFC2460 section 5, last paragraph:
1342                          * if we record ICMPv6 too big message with
1343                          * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1344                          * or smaller, with framgent header attached.
1345                          * (fragment header is needed regardless from the
1346                          * packet size, for translators to identify packets)
1347                          */
1348                         alwaysfrag = 1;
1349                         mtu = IPV6_MMTU;
1350                 } else if (mtu > ifmtu) {
1351                         /*
1352                          * The MTU on the route is larger than the MTU on
1353                          * the interface!  This shouldn't happen, unless the
1354                          * MTU of the interface has been changed after the
1355                          * interface was brought up.  Change the MTU in the
1356                          * route to match the interface MTU (as long as the
1357                          * field isn't locked).
1358                          */
1359                         mtu = ifmtu;
1360                         ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1361                 }
1362         } else if (ifp) {
1363                 mtu = IN6_LINKMTU(ifp);
1364         } else
1365                 error = EHOSTUNREACH; /* XXX */
1366
1367         *mtup = mtu;
1368         if (alwaysfragp)
1369                 *alwaysfragp = alwaysfrag;
1370         return (error);
1371 }
1372
1373 /*
1374  * IP6 socket option processing.
1375  */
1376 void
1377 ip6_ctloutput_dispatch(netmsg_t msg)
1378 {
1379         int error;
1380
1381         error = ip6_ctloutput(msg->ctloutput.base.nm_so,
1382                               msg->ctloutput.nm_sopt);
1383         lwkt_replymsg(&msg->ctloutput.base.lmsg, error);
1384 }
1385
1386 int
1387 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1388 {
1389         int optdatalen,uproto;
1390         int privileged;
1391         struct inpcb *in6p = so->so_pcb;
1392         void *optdata;
1393         int error, optval;
1394         int level, op, optname;
1395         int optlen;
1396         struct thread *td;
1397
1398         if (sopt) {
1399                 level = sopt->sopt_level;
1400                 op = sopt->sopt_dir;
1401                 optname = sopt->sopt_name;
1402                 optlen = sopt->sopt_valsize;
1403                 td = sopt->sopt_td;
1404         } else {
1405                 panic("ip6_ctloutput: arg soopt is NULL");
1406                 /* NOT REACHED */
1407                 td = NULL;
1408         }
1409         error = optval = 0;
1410
1411         uproto = (int)so->so_proto->pr_protocol;
1412         privileged = (td == NULL || priv_check(td, PRIV_ROOT)) ? 0 : 1;
1413
1414         if (level == IPPROTO_IPV6) {
1415                 switch (op) {
1416
1417                 case SOPT_SET:
1418                         switch (optname) {
1419                         case IPV6_2292PKTOPTIONS:
1420 #ifdef IPV6_PKTOPTIONS
1421                         case IPV6_PKTOPTIONS:
1422 #endif
1423                         {
1424                                 struct mbuf *m;
1425
1426                                 error = soopt_getm(sopt, &m); /* XXX */
1427                                 if (error != 0)
1428                                         break;
1429                                 soopt_to_mbuf(sopt, m); /* XXX */
1430                                 error = ip6_pcbopts(&in6p->in6p_outputopts,
1431                                                     m, so, sopt);
1432                                 m_freem(m); /* XXX */
1433                                 break;
1434                         }
1435
1436                         /*
1437                          * Use of some Hop-by-Hop options or some
1438                          * Destination options, might require special
1439                          * privilege.  That is, normal applications
1440                          * (without special privilege) might be forbidden
1441                          * from setting certain options in outgoing packets,
1442                          * and might never see certain options in received
1443                          * packets. [RFC 2292 Section 6]
1444                          * KAME specific note:
1445                          *  KAME prevents non-privileged users from sending or
1446                          *  receiving ANY hbh/dst options in order to avoid
1447                          *  overhead of parsing options in the kernel.
1448                          */
1449                         case IPV6_RECVHOPOPTS:
1450                         case IPV6_RECVDSTOPTS:
1451                         case IPV6_RECVRTHDRDSTOPTS:
1452                                 if (!privileged)
1453                                         return (EPERM);
1454                         case IPV6_RECVPKTINFO:
1455                         case IPV6_RECVHOPLIMIT:
1456                         case IPV6_RECVRTHDR:
1457                         case IPV6_RECVPATHMTU:
1458                         case IPV6_RECVTCLASS:
1459                         case IPV6_AUTOFLOWLABEL:
1460                         case IPV6_HOPLIMIT:
1461                         /* FALLTHROUGH */
1462                         case IPV6_UNICAST_HOPS:
1463                         case IPV6_FAITH:
1464
1465                         case IPV6_V6ONLY:
1466                                 if (optlen != sizeof(int)) {
1467                                         error = EINVAL;
1468                                         break;
1469                                 }
1470                                 error = soopt_to_kbuf(sopt, &optval,
1471                                         sizeof optval, sizeof optval);
1472                                 if (error)
1473                                         break;
1474                                 switch (optname) {
1475
1476                                 case IPV6_UNICAST_HOPS:
1477                                         if (optval < -1 || optval >= 256)
1478                                                 error = EINVAL;
1479                                         else {
1480                                                 /* -1 = kernel default */
1481                                                 in6p->in6p_hops = optval;
1482
1483                                                 if ((in6p->in6p_vflag &
1484                                                      INP_IPV4) != 0)
1485                                                         in6p->inp_ip_ttl = optval;
1486                                         }
1487                                         break;
1488 #define OPTSET(bit) \
1489 do { \
1490         if (optval) \
1491                 in6p->in6p_flags |= (bit); \
1492         else \
1493                 in6p->in6p_flags &= ~(bit); \
1494 } while (0)
1495 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1496 /* 
1497  * Although changed to RFC3542, It's better to also support RFC2292 API 
1498  */
1499 #define OPTSET2292(bit) \
1500 do { \
1501         in6p->in6p_flags |= IN6P_RFC2292; \
1502         if (optval) \
1503                 in6p->in6p_flags |= (bit); \
1504         else \
1505                 in6p->in6p_flags &= ~(bit); \
1506 } while (/*CONSTCOND*/ 0)
1507
1508                                 case IPV6_RECVPKTINFO:
1509                                         /* cannot mix with RFC2292 */
1510                                         if (OPTBIT(IN6P_RFC2292)) {
1511                                                 error = EINVAL;
1512                                                 break;
1513                                         }
1514                                         OPTSET(IN6P_PKTINFO);
1515                                         break;
1516
1517                                 case IPV6_HOPLIMIT:
1518                                 {
1519                                         struct ip6_pktopts **optp;
1520
1521                                         /* cannot mix with RFC2292 */
1522                                         if (OPTBIT(IN6P_RFC2292)) {
1523                                                 error = EINVAL;
1524                                                 break;
1525                                         }
1526                                         optp = &in6p->in6p_outputopts;
1527                                         error = ip6_pcbopt(IPV6_HOPLIMIT,
1528                                             (u_char *)&optval, sizeof(optval),
1529                                             optp, uproto);
1530                                         break;
1531                                 }
1532
1533                                 case IPV6_RECVHOPLIMIT:
1534                                         /* cannot mix with RFC2292 */
1535                                         if (OPTBIT(IN6P_RFC2292)) {
1536                                                 error = EINVAL;
1537                                                 break;
1538                                         }
1539                                         OPTSET(IN6P_HOPLIMIT);
1540                                         break;
1541
1542                                 case IPV6_RECVHOPOPTS:
1543                                         /* cannot mix with RFC2292 */
1544                                         if (OPTBIT(IN6P_RFC2292)) {
1545                                                 error = EINVAL;
1546                                                 break;
1547                                         }
1548                                         OPTSET(IN6P_HOPOPTS);
1549                                         break;
1550
1551                                 case IPV6_RECVDSTOPTS:
1552                                         /* cannot mix with RFC2292 */
1553                                         if (OPTBIT(IN6P_RFC2292)) {
1554                                                 error = EINVAL;
1555                                                 break;
1556                                         }
1557                                         OPTSET(IN6P_DSTOPTS);
1558                                         break;
1559
1560                                 case IPV6_RECVRTHDRDSTOPTS:
1561                                         /* cannot mix with RFC2292 */
1562                                         if (OPTBIT(IN6P_RFC2292)) {
1563                                                 error = EINVAL;
1564                                                 break;
1565                                         }
1566                                         OPTSET(IN6P_RTHDRDSTOPTS);
1567                                         break;
1568
1569                                 case IPV6_RECVRTHDR:
1570                                         /* cannot mix with RFC2292 */
1571                                         if (OPTBIT(IN6P_RFC2292)) {
1572                                                 error = EINVAL;
1573                                                 break;
1574                                         }
1575                                         OPTSET(IN6P_RTHDR);
1576                                         break;
1577
1578                                 case IPV6_RECVPATHMTU:
1579                                         /*
1580                                          * We ignore this option for TCP
1581                                          * sockets.
1582                                          * (RFC3542 leaves this case
1583                                          * unspecified.)
1584                                          */
1585                                         if (uproto != IPPROTO_TCP)
1586                                                 OPTSET(IN6P_MTU);
1587                                         break;
1588
1589                                 case IPV6_RECVTCLASS:
1590                                         /* cannot mix with RFC2292 XXX */
1591                                         if (OPTBIT(IN6P_RFC2292)) {
1592                                                 error = EINVAL;
1593                                                 break;
1594                                         }
1595                                         OPTSET(IN6P_TCLASS);
1596                                         break;
1597
1598                                 case IPV6_AUTOFLOWLABEL:
1599                                         OPTSET(IN6P_AUTOFLOWLABEL);
1600                                         break;
1601
1602                                 case IPV6_FAITH:
1603                                         OPTSET(IN6P_FAITH);
1604                                         break;
1605
1606                                 case IPV6_V6ONLY:
1607                                         /*
1608                                          * make setsockopt(IPV6_V6ONLY)
1609                                          * available only prior to bind(2).
1610                                          */
1611                                         if (in6p->in6p_lport ||
1612                                             !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
1613                                         {
1614                                                 error = EINVAL;
1615                                                 break;
1616                                         }
1617                                         OPTSET(IN6P_IPV6_V6ONLY);
1618                                         if (optval)
1619                                                 in6p->in6p_vflag &= ~INP_IPV4;
1620                                         else
1621                                                 in6p->in6p_vflag |= INP_IPV4;
1622                                         break;
1623                                 }
1624                                 break;
1625
1626                         case IPV6_TCLASS:
1627                         case IPV6_DONTFRAG:
1628                         case IPV6_USE_MIN_MTU:
1629                         case IPV6_PREFER_TEMPADDR:
1630                                 if (optlen != sizeof(optval)) {
1631                                         error = EINVAL;
1632                                         break;
1633                                 }
1634                                 error = soopt_to_kbuf(sopt, &optval,
1635                                         sizeof optval, sizeof optval);
1636                                 if (error)
1637                                         break;
1638                                 {
1639                                         struct ip6_pktopts **optp;
1640                                         optp = &in6p->in6p_outputopts;
1641                                         error = ip6_pcbopt(optname,
1642                                             (u_char *)&optval, sizeof(optval),
1643                                             optp, uproto);
1644                                         break;
1645                                 }
1646
1647                         case IPV6_2292PKTINFO:
1648                         case IPV6_2292HOPLIMIT:
1649                         case IPV6_2292HOPOPTS:
1650                         case IPV6_2292DSTOPTS:
1651                         case IPV6_2292RTHDR:
1652                                 /* RFC 2292 */
1653                                 if (optlen != sizeof(int)) {
1654                                         error = EINVAL;
1655                                         break;
1656                                 }
1657                                 error = soopt_to_kbuf(sopt, &optval,
1658                                         sizeof optval, sizeof optval);
1659                                 if (error)
1660                                         break;
1661                                 switch (optname) {
1662                                 case IPV6_2292PKTINFO:
1663                                         OPTSET2292(IN6P_PKTINFO);
1664                                         break;
1665                                 case IPV6_2292HOPLIMIT:
1666                                         OPTSET2292(IN6P_HOPLIMIT);
1667                                         break;
1668                                 case IPV6_2292HOPOPTS:
1669                                         /*
1670                                          * Check super-user privilege.
1671                                          * See comments for IPV6_RECVHOPOPTS.
1672                                          */
1673                                         if (!privileged)
1674                                                 return (EPERM);
1675                                         OPTSET2292(IN6P_HOPOPTS);
1676                                         break;
1677                                 case IPV6_2292DSTOPTS:
1678                                         if (!privileged)
1679                                                 return (EPERM);
1680                                         OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1681                                         break;
1682                                 case IPV6_2292RTHDR:
1683                                         OPTSET2292(IN6P_RTHDR);
1684                                         break;
1685                                 }
1686                                 break;
1687
1688                         case IPV6_PKTINFO:
1689                         case IPV6_HOPOPTS:
1690                         case IPV6_RTHDR:
1691                         case IPV6_DSTOPTS:
1692                         case IPV6_RTHDRDSTOPTS:
1693                         case IPV6_NEXTHOP:
1694                         {
1695                                 /* 
1696                                  * New advanced API (RFC3542) 
1697                                  */
1698                                 u_char *optbuf;
1699                                 u_char optbuf_storage[MCLBYTES];
1700                                 int optlen;
1701                                 struct ip6_pktopts **optp;
1702
1703                                 /* cannot mix with RFC2292 */
1704                                 if (OPTBIT(IN6P_RFC2292)) {
1705                                         error = EINVAL;
1706                                         break;
1707                                 }
1708
1709                                 /*
1710                                  * We only ensure valsize is not too large
1711                                  * here.  Further validation will be done
1712                                  * later.
1713                                  */
1714                                 error = soopt_to_kbuf(sopt, optbuf_storage,
1715                                     sizeof(optbuf_storage), 0);
1716                                 if (error)
1717                                         break;
1718                                 optlen = sopt->sopt_valsize;
1719                                 optbuf = optbuf_storage;
1720                                 optp = &in6p->in6p_outputopts;
1721                                 error = ip6_pcbopt(optname, optbuf, optlen,
1722                                     optp, uproto);
1723                                 break;
1724                         }       
1725 #undef OPTSET
1726
1727                         case IPV6_MULTICAST_IF:
1728                         case IPV6_MULTICAST_HOPS:
1729                         case IPV6_MULTICAST_LOOP:
1730                         case IPV6_JOIN_GROUP:
1731                         case IPV6_LEAVE_GROUP:
1732                             {
1733                                 struct mbuf *m;
1734                                 if (sopt->sopt_valsize > MLEN) {
1735                                         error = EMSGSIZE;
1736                                         break;
1737                                 }
1738                                 /* XXX */
1739                                 MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER);
1740                                 if (m == NULL) {
1741                                         error = ENOBUFS;
1742                                         break;
1743                                 }
1744                                 m->m_len = sopt->sopt_valsize;
1745                                 error = soopt_to_kbuf(sopt, mtod(m, char *),
1746                                                     m->m_len, m->m_len);
1747                                 error = ip6_setmoptions(sopt->sopt_name,
1748                                                         &in6p->in6p_moptions,
1749                                                         m);
1750                                 m_free(m);
1751                             }
1752                                 break;
1753
1754                         case IPV6_PORTRANGE:
1755                                 error = soopt_to_kbuf(sopt, &optval,
1756                                     sizeof optval, sizeof optval);
1757                                 if (error)
1758                                         break;
1759
1760                                 switch (optval) {
1761                                 case IPV6_PORTRANGE_DEFAULT:
1762                                         in6p->in6p_flags &= ~(IN6P_LOWPORT);
1763                                         in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1764                                         break;
1765
1766                                 case IPV6_PORTRANGE_HIGH:
1767                                         in6p->in6p_flags &= ~(IN6P_LOWPORT);
1768                                         in6p->in6p_flags |= IN6P_HIGHPORT;
1769                                         break;
1770
1771                                 case IPV6_PORTRANGE_LOW:
1772                                         in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1773                                         in6p->in6p_flags |= IN6P_LOWPORT;
1774                                         break;
1775
1776                                 default:
1777                                         error = EINVAL;
1778                                         break;
1779                                 }
1780                                 break;
1781
1782 #if defined(IPSEC) || defined(FAST_IPSEC)
1783                         case IPV6_IPSEC_POLICY:
1784                             {
1785                                 caddr_t req = NULL;
1786                                 size_t len = 0;
1787                                 struct mbuf *m;
1788
1789                                 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1790                                         break;
1791                                 soopt_to_mbuf(sopt, m);         /* XXX */
1792                                 if (m) {
1793                                         req = mtod(m, caddr_t);
1794                                         len = m->m_len;
1795                                 }
1796                                 error = ipsec6_set_policy(in6p, optname, req,
1797                                                           len, privileged);
1798                                 m_freem(m);
1799                             }
1800                                 break;
1801 #endif /* KAME IPSEC */
1802
1803                         case IPV6_FW_ADD:
1804                         case IPV6_FW_DEL:
1805                         case IPV6_FW_FLUSH:
1806                         case IPV6_FW_ZERO:
1807                             {
1808                                 struct mbuf *m;
1809                                 struct mbuf **mp = &m;
1810
1811                                 if (ip6_fw_ctl_ptr == NULL)
1812                                         return EINVAL;
1813                                 /* XXX */
1814                                 if ((error = soopt_getm(sopt, &m)) != 0)
1815                                         break;
1816                                 /* XXX */
1817                                 soopt_to_mbuf(sopt, m);
1818                                 error = (*ip6_fw_ctl_ptr)(optname, mp);
1819                                 m = *mp;
1820                             }
1821                                 break;
1822
1823                         default:
1824                                 error = ENOPROTOOPT;
1825                                 break;
1826                         }
1827                         break;
1828
1829                 case SOPT_GET:
1830                         switch (optname) {
1831                         case IPV6_2292PKTOPTIONS:
1832 #ifdef IPV6_PKTOPTIONS
1833                         case IPV6_PKTOPTIONS:
1834 #endif
1835                                 /*
1836                                  * RFC3542 (effectively) deprecated the
1837                                  * semantics of the 2292-style pktoptions.
1838                                  * Since it was not reliable in nature (i.e.,
1839                                  * applications had to expect the lack of some
1840                                  * information after all), it would make sense
1841                                  * to simplify this part by always returning
1842                                  * empty data.
1843                                  */
1844                                 if (in6p->in6p_options) {
1845                                         struct mbuf *m;
1846                                         m = m_copym(in6p->in6p_options,
1847                                             0, M_COPYALL, MB_WAIT);
1848                                         error = soopt_from_mbuf(sopt, m);
1849                                         if (error == 0)
1850                                                 m_freem(m);
1851                                 } else
1852                                         sopt->sopt_valsize = 0;
1853                                 break;
1854                 
1855                         case IPV6_RECVHOPOPTS:
1856                         case IPV6_RECVDSTOPTS:
1857                         case IPV6_RECVRTHDRDSTOPTS:
1858                         case IPV6_UNICAST_HOPS:
1859                         case IPV6_RECVPKTINFO:
1860                         case IPV6_RECVHOPLIMIT:
1861                         case IPV6_RECVRTHDR:
1862                         case IPV6_RECVPATHMTU:
1863                         case IPV6_RECVTCLASS:
1864                         case IPV6_AUTOFLOWLABEL:
1865                         case IPV6_FAITH:
1866                         case IPV6_V6ONLY:
1867                         case IPV6_PORTRANGE:
1868                                 switch (optname) {
1869
1870                                 case IPV6_RECVHOPOPTS:
1871                                         optval = OPTBIT(IN6P_HOPOPTS);
1872                                         break;
1873
1874                                 case IPV6_RECVDSTOPTS:
1875                                         optval = OPTBIT(IN6P_DSTOPTS);
1876                                         break;
1877
1878                                 case IPV6_RECVRTHDRDSTOPTS:
1879                                         optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1880                                         break;
1881
1882                                 case IPV6_RECVPKTINFO:
1883                                         optval = OPTBIT(IN6P_PKTINFO);
1884                                         break;
1885
1886                                 case IPV6_RECVHOPLIMIT:
1887                                         optval = OPTBIT(IN6P_HOPLIMIT);
1888                                         break;
1889
1890                                 case IPV6_RECVRTHDR:
1891                                         optval = OPTBIT(IN6P_RTHDR);
1892                                         break;
1893
1894                                 case IPV6_RECVPATHMTU:
1895                                         optval = OPTBIT(IN6P_MTU);
1896                                         break;
1897
1898                                 case IPV6_RECVTCLASS:
1899                                         optval = OPTBIT(IN6P_TCLASS);
1900                                         break;
1901
1902                                 case IPV6_AUTOFLOWLABEL:
1903                                         optval = OPTBIT(IN6P_AUTOFLOWLABEL);
1904                                         break;
1905
1906
1907                                 case IPV6_UNICAST_HOPS:
1908                                         optval = in6p->in6p_hops;
1909                                         break;
1910
1911                                 case IPV6_FAITH:
1912                                         optval = OPTBIT(IN6P_FAITH);
1913                                         break;
1914
1915                                 case IPV6_V6ONLY:
1916                                         optval = OPTBIT(IN6P_IPV6_V6ONLY);
1917                                         break;
1918
1919                                 case IPV6_PORTRANGE:
1920                                     {
1921                                         int flags;
1922                                         flags = in6p->in6p_flags;
1923                                         if (flags & IN6P_HIGHPORT)
1924                                                 optval = IPV6_PORTRANGE_HIGH;
1925                                         else if (flags & IN6P_LOWPORT)
1926                                                 optval = IPV6_PORTRANGE_LOW;
1927                                         else
1928                                                 optval = 0;
1929                                         break;
1930                                     }
1931                                 }
1932                                 soopt_from_kbuf(sopt, &optval,
1933                                         sizeof optval);
1934                                 break;
1935
1936                         case IPV6_PATHMTU:
1937                         {
1938                                 u_long pmtu = 0;
1939                                 struct ip6_mtuinfo mtuinfo;
1940                                 struct route_in6 sro;
1941
1942                                 bzero(&sro, sizeof(sro));
1943
1944                                 if (!(so->so_state & SS_ISCONNECTED))
1945                                         return (ENOTCONN);
1946                                 /*
1947                                  * XXX: we dot not consider the case of source
1948                                  * routing, or optional information to specify
1949                                  * the outgoing interface.
1950                                  */
1951                                 error = ip6_getpmtu(&sro, NULL, NULL,
1952                                     &in6p->in6p_faddr, &pmtu, NULL);
1953                                 if (sro.ro_rt)
1954                                         RTFREE(sro.ro_rt);
1955                                 if (error)
1956                                         break;
1957                                 if (pmtu > IPV6_MAXPACKET)
1958                                         pmtu = IPV6_MAXPACKET;
1959
1960                                 bzero(&mtuinfo, sizeof(mtuinfo));
1961                                 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1962                                 optdata = (void *)&mtuinfo;
1963                                 optdatalen = sizeof(mtuinfo);
1964                                 soopt_from_kbuf(sopt, optdata,
1965                                     optdatalen);
1966                                 break;
1967                         }
1968
1969                         case IPV6_2292PKTINFO:
1970                         case IPV6_2292HOPLIMIT:
1971                         case IPV6_2292HOPOPTS:
1972                         case IPV6_2292RTHDR:
1973                         case IPV6_2292DSTOPTS:
1974                                 if (optname == IPV6_2292HOPOPTS ||
1975                                     optname == IPV6_2292DSTOPTS ||
1976                                     !privileged)
1977                                         return (EPERM);
1978                                 switch (optname) {
1979                                 case IPV6_2292PKTINFO:
1980                                         optval = OPTBIT(IN6P_PKTINFO);
1981                                         break;
1982                                 case IPV6_2292HOPLIMIT:
1983                                         optval = OPTBIT(IN6P_HOPLIMIT);
1984                                         break;
1985                                 case IPV6_2292HOPOPTS:
1986                                         if (!privileged)
1987                                                 return (EPERM);
1988                                         optval = OPTBIT(IN6P_HOPOPTS);
1989                                         break;
1990                                 case IPV6_2292RTHDR:
1991                                         optval = OPTBIT(IN6P_RTHDR);
1992                                         break;
1993                                 case IPV6_2292DSTOPTS:
1994                                         if (!privileged)
1995                                                 return (EPERM);
1996                                         optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1997                                         break;
1998                                 }
1999                                 soopt_from_kbuf(sopt, &optval,
2000                                         sizeof optval);
2001                                 break;
2002
2003                         case IPV6_PKTINFO:
2004                         case IPV6_HOPOPTS:
2005                         case IPV6_RTHDR:
2006                         case IPV6_DSTOPTS:
2007                         case IPV6_RTHDRDSTOPTS:
2008                         case IPV6_NEXTHOP:
2009                         case IPV6_TCLASS:
2010                         case IPV6_DONTFRAG:
2011                         case IPV6_USE_MIN_MTU:
2012                         case IPV6_PREFER_TEMPADDR:
2013                                 error = ip6_getpcbopt(in6p->in6p_outputopts,
2014                                     optname, sopt);
2015                                 break;
2016
2017                         case IPV6_MULTICAST_IF:
2018                         case IPV6_MULTICAST_HOPS:
2019                         case IPV6_MULTICAST_LOOP:
2020                         case IPV6_JOIN_GROUP:
2021                         case IPV6_LEAVE_GROUP:
2022                             {
2023                                 struct mbuf *m;
2024                                 error = ip6_getmoptions(sopt->sopt_name,
2025                                                 in6p->in6p_moptions, &m);
2026                                 if (error == 0)
2027                                         soopt_from_kbuf(sopt,
2028                                                 mtod(m, char *), m->m_len);
2029                                 m_freem(m);
2030                             }
2031                                 break;
2032
2033 #if defined(IPSEC) || defined(FAST_IPSEC)
2034                         case IPV6_IPSEC_POLICY:
2035                           {
2036                                 caddr_t req = NULL;
2037                                 size_t len = 0;
2038                                 struct mbuf *m = NULL;
2039                                 struct mbuf **mp = &m;
2040
2041                                 error = soopt_getm(sopt, &m); /* XXX */
2042                                 if (error != 0)
2043                                         break;
2044                                 soopt_to_mbuf(sopt, m); /* XXX */
2045                                 if (m) {
2046                                         req = mtod(m, caddr_t);
2047                                         len = m->m_len;
2048                                 }
2049                                 error = ipsec6_get_policy(in6p, req, len, mp);
2050                                 if (error == 0)
2051                                         error = soopt_from_mbuf(sopt, m); /*XXX*/
2052                                 if (error == 0 && m != NULL)
2053                                         m_freem(m);
2054                                 break;
2055                           }
2056 #endif /* KAME IPSEC */
2057
2058                         case IPV6_FW_GET:
2059                           {
2060                                 struct mbuf *m;
2061                                 struct mbuf **mp = &m;
2062
2063                                 if (ip6_fw_ctl_ptr == NULL)
2064                                 {
2065                                         return EINVAL;
2066                                 }
2067                                 error = (*ip6_fw_ctl_ptr)(optname, mp);
2068                                 if (error == 0)
2069                                         error = soopt_from_mbuf(sopt, m); /* XXX */
2070                                 if (error == 0 && m != NULL)
2071                                         m_freem(m);
2072                           }
2073                                 break;
2074
2075                         default:
2076                                 error = ENOPROTOOPT;
2077                                 break;
2078                         }
2079                         break;
2080                 }
2081         } else {
2082                 error = EINVAL;
2083         }
2084         return (error);
2085 }
2086
2087 int
2088 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2089 {
2090         int error = 0, optval, optlen;
2091         const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2092         struct in6pcb *in6p = sotoin6pcb(so);
2093         int level, op, optname;
2094
2095         if (sopt) {
2096                 level = sopt->sopt_level;
2097                 op = sopt->sopt_dir;
2098                 optname = sopt->sopt_name;
2099                 optlen = sopt->sopt_valsize;
2100         } else
2101                 panic("ip6_raw_ctloutput: arg soopt is NULL");
2102
2103         if (level != IPPROTO_IPV6) {
2104                 return (EINVAL);
2105         }
2106
2107         switch (optname) {
2108         case IPV6_CHECKSUM:
2109                 /*
2110                  * For ICMPv6 sockets, no modification allowed for checksum
2111                  * offset, permit "no change" values to help existing apps.
2112                  *
2113                  * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2114                  * for an ICMPv6 socket will fail."
2115                  * The current behavior does not meet RFC3542.
2116                  */
2117                 switch (op) {
2118                 case SOPT_SET:
2119                         if (optlen != sizeof(int)) {
2120                                 error = EINVAL;
2121                                 break;
2122                         }
2123                         error = soopt_to_kbuf(sopt, &optval,
2124                                     sizeof optval, sizeof optval);
2125                         if (error)
2126                                 break;
2127                         if ((optval % 2) != 0) {
2128                                 /* the API assumes even offset values */
2129                                 error = EINVAL;
2130                         } else if (so->so_proto->pr_protocol ==
2131                             IPPROTO_ICMPV6) {
2132                                 if (optval != icmp6off)
2133                                         error = EINVAL;
2134                         } else
2135                                 in6p->in6p_cksum = optval;
2136                         break;
2137
2138                 case SOPT_GET:
2139                         if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2140                                 optval = icmp6off;
2141                         else
2142                                 optval = in6p->in6p_cksum;
2143
2144                         soopt_from_kbuf(sopt, &optval, sizeof(optval));
2145                         break;
2146
2147                 default:
2148                         error = EINVAL;
2149                         break;
2150                 }
2151                 break;
2152
2153         default:
2154                 error = ENOPROTOOPT;
2155                 break;
2156         }
2157
2158         return (error);
2159 }
2160
2161 /*
2162  * Set up IP6 options in pcb for insertion in output packets or
2163  * specifying behavior of outgoing packets.
2164  */
2165 static int
2166 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
2167     struct socket *so, struct sockopt *sopt)
2168 {
2169         int priv = 0;
2170         struct ip6_pktopts *opt = *pktopt;
2171         int error = 0;
2172
2173         /* turn off any old options. */
2174         if (opt) {
2175 #ifdef DIAGNOSTIC
2176                 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2177                     opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2178                     opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2179                         kprintf("ip6_pcbopts: all specified options are cleared.\n");
2180 #endif
2181                 ip6_clearpktopts(opt, -1);
2182         } else
2183                 opt = kmalloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2184         *pktopt = NULL;
2185
2186         if (!m || m->m_len == 0) {
2187                 /*
2188                  * Only turning off any previous options, regardless of
2189                  * whether the opt is just created or given.
2190                  */
2191                 kfree(opt, M_IP6OPT);
2192                 return (0);
2193         }
2194
2195         /*  set options specified by user. */
2196         if ((error = ip6_setpktoptions(m, opt, NULL, so->so_proto->pr_protocol, priv)) != 0) {
2197                 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2198                 kfree(opt, M_IP6OPT);
2199                 return (error);
2200         }
2201         *pktopt = opt;
2202         return (0);
2203 }
2204
2205
2206 /*
2207  * Below three functions are introduced by merge to RFC3542
2208  */
2209
2210 static int
2211 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2212 {
2213         void *optdata = NULL;
2214         int optdatalen = 0;
2215         struct ip6_ext *ip6e;
2216         int error = 0;
2217         struct in6_pktinfo null_pktinfo;
2218         int deftclass = 0, on;
2219         int defminmtu = IP6PO_MINMTU_MCASTONLY;
2220         int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2221
2222         switch (optname) {
2223         case IPV6_PKTINFO:
2224                 if (pktopt && pktopt->ip6po_pktinfo)
2225                         optdata = (void *)pktopt->ip6po_pktinfo;
2226                 else {
2227                         /* XXX: we don't have to do this every time... */
2228                         bzero(&null_pktinfo, sizeof(null_pktinfo));
2229                         optdata = (void *)&null_pktinfo;
2230                 }
2231                 optdatalen = sizeof(struct in6_pktinfo);
2232                 break;
2233         case IPV6_TCLASS:
2234                 if (pktopt && pktopt->ip6po_tclass >= 0)
2235                         optdata = (void *)&pktopt->ip6po_tclass;
2236                 else
2237                         optdata = (void *)&deftclass;
2238                 optdatalen = sizeof(int);
2239                 break;
2240         case IPV6_HOPOPTS:
2241                 if (pktopt && pktopt->ip6po_hbh) {
2242                         optdata = (void *)pktopt->ip6po_hbh;
2243                         ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2244                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2245                 }
2246                 break;
2247         case IPV6_RTHDR:
2248                 if (pktopt && pktopt->ip6po_rthdr) {
2249                         optdata = (void *)pktopt->ip6po_rthdr;
2250                         ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2251                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2252                 }
2253                 break;
2254         case IPV6_RTHDRDSTOPTS:
2255                 if (pktopt && pktopt->ip6po_dest1) {
2256                         optdata = (void *)pktopt->ip6po_dest1;
2257                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2258                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2259                 }
2260                 break;
2261         case IPV6_DSTOPTS:
2262                 if (pktopt && pktopt->ip6po_dest2) {
2263                         optdata = (void *)pktopt->ip6po_dest2;
2264                         ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2265                         optdatalen = (ip6e->ip6e_len + 1) << 3;
2266                 }
2267                 break;
2268         case IPV6_NEXTHOP:
2269                 if (pktopt && pktopt->ip6po_nexthop) {
2270                         optdata = (void *)pktopt->ip6po_nexthop;
2271                         optdatalen = pktopt->ip6po_nexthop->sa_len;
2272                 }
2273                 break;
2274         case IPV6_USE_MIN_MTU:
2275                 if (pktopt)
2276                         optdata = (void *)&pktopt->ip6po_minmtu;
2277                 else
2278                         optdata = (void *)&defminmtu;
2279                 optdatalen = sizeof(int);
2280                 break;
2281         case IPV6_DONTFRAG:
2282                 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2283                         on = 1;
2284                 else
2285                         on = 0;
2286                 optdata = (void *)&on;
2287                 optdatalen = sizeof(on);
2288                 break;
2289         case IPV6_PREFER_TEMPADDR:
2290                 if (pktopt)
2291                         optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2292                 else
2293                         optdata = (void *)&defpreftemp;
2294                 optdatalen = sizeof(int);
2295                 break;
2296         default:                /* should not happen */
2297 #ifdef DIAGNOSTIC
2298                 panic("ip6_getpcbopt: unexpected option\n");
2299 #endif
2300                 return (ENOPROTOOPT);
2301         }
2302
2303         soopt_from_kbuf(sopt, optdata, optdatalen);
2304
2305         return (error);
2306 }
2307
2308 /*
2309  * initialize ip6_pktopts.  beware that there are non-zero default values in
2310  * the struct.
2311  */
2312
2313 static int
2314 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
2315 {
2316         struct ip6_pktopts *opt;
2317         int priv =0;
2318         if (*pktopt == NULL) {
2319                 *pktopt = kmalloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2320                 init_ip6pktopts(*pktopt);
2321         }
2322         opt = *pktopt;
2323
2324         return (ip6_setpktoption(optname, buf, len, opt, 1, 0, uproto, priv));
2325 }
2326
2327 /*
2328  * initialize ip6_pktopts.  beware that there are non-zero default values in
2329  * the struct.
2330  */
2331 void
2332 init_ip6pktopts(struct ip6_pktopts *opt)
2333 {
2334
2335         bzero(opt, sizeof(*opt));
2336         opt->ip6po_hlim = -1;   /* -1 means default hop limit */
2337         opt->ip6po_tclass = -1; /* -1 means default traffic class */
2338         opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2339         opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2340 }
2341
2342 void
2343 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2344 {
2345         if (pktopt == NULL)
2346                 return;
2347
2348         if (optname == -1 || optname == IPV6_PKTINFO) {
2349                 if (pktopt->ip6po_pktinfo)
2350                         kfree(pktopt->ip6po_pktinfo, M_IP6OPT);
2351                 pktopt->ip6po_pktinfo = NULL;
2352         }
2353         if (optname == -1 || optname == IPV6_HOPLIMIT)
2354                 pktopt->ip6po_hlim = -1;
2355         if (optname == -1 || optname == IPV6_TCLASS)
2356                 pktopt->ip6po_tclass = -1;
2357         if (optname == -1 || optname == IPV6_NEXTHOP) {
2358                 if (pktopt->ip6po_nextroute.ro_rt) {
2359                         RTFREE(pktopt->ip6po_nextroute.ro_rt);
2360                         pktopt->ip6po_nextroute.ro_rt = NULL;
2361                 }
2362                 if (pktopt->ip6po_nexthop)
2363                         kfree(pktopt->ip6po_nexthop, M_IP6OPT);
2364                 pktopt->ip6po_nexthop = NULL;
2365         }
2366         if (optname == -1 || optname == IPV6_HOPOPTS) {
2367                 if (pktopt->ip6po_hbh)
2368                         kfree(pktopt->ip6po_hbh, M_IP6OPT);
2369                 pktopt->ip6po_hbh = NULL;
2370         }
2371         if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2372                 if (pktopt->ip6po_dest1)
2373                         kfree(pktopt->ip6po_dest1, M_IP6OPT);
2374                 pktopt->ip6po_dest1 = NULL;
2375         }
2376         if (optname == -1 || optname == IPV6_RTHDR) {
2377                 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2378                         kfree(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2379                 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2380                 if (pktopt->ip6po_route.ro_rt) {
2381                         RTFREE(pktopt->ip6po_route.ro_rt);
2382                         pktopt->ip6po_route.ro_rt = NULL;
2383                 }
2384         }
2385         if (optname == -1 || optname == IPV6_DSTOPTS) {
2386                 if (pktopt->ip6po_dest2)
2387                         kfree(pktopt->ip6po_dest2, M_IP6OPT);
2388                 pktopt->ip6po_dest2 = NULL;
2389         }
2390 }
2391
2392 #define PKTOPT_EXTHDRCPY(type) \
2393 do {\
2394         if (src->type) {\
2395                 int hlen =\
2396                         (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2397                 dst->type = kmalloc(hlen, M_IP6OPT, canwait);\
2398                 if (dst->type == NULL)\
2399                         goto bad;\
2400                 bcopy(src->type, dst->type, hlen);\
2401         }\
2402 } while (0)
2403
2404 struct ip6_pktopts *
2405 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2406 {
2407         struct ip6_pktopts *dst;
2408
2409         if (src == NULL) {
2410                 kprintf("ip6_clearpktopts: invalid argument\n");
2411                 return (NULL);
2412         }
2413
2414         dst = kmalloc(sizeof(*dst), M_IP6OPT, canwait | M_ZERO);
2415         if (dst == NULL)
2416                 return (NULL);
2417
2418         dst->ip6po_hlim = src->ip6po_hlim;
2419         if (src->ip6po_pktinfo) {
2420                 dst->ip6po_pktinfo = kmalloc(sizeof(*dst->ip6po_pktinfo),
2421                                             M_IP6OPT, canwait);
2422                 if (dst->ip6po_pktinfo == NULL)
2423                         goto bad;
2424                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2425         }
2426         if (src->ip6po_nexthop) {
2427                 dst->ip6po_nexthop = kmalloc(src->ip6po_nexthop->sa_len,
2428                                             M_IP6OPT, canwait);
2429                 if (dst->ip6po_nexthop == NULL)
2430                         goto bad;
2431                 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2432                       src->ip6po_nexthop->sa_len);
2433         }
2434         PKTOPT_EXTHDRCPY(ip6po_hbh);
2435         PKTOPT_EXTHDRCPY(ip6po_dest1);
2436         PKTOPT_EXTHDRCPY(ip6po_dest2);
2437         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2438         return (dst);
2439
2440 bad:
2441         if (dst->ip6po_pktinfo) kfree(dst->ip6po_pktinfo, M_IP6OPT);
2442         if (dst->ip6po_nexthop) kfree(dst->ip6po_nexthop, M_IP6OPT);
2443         if (dst->ip6po_hbh) kfree(dst->ip6po_hbh, M_IP6OPT);
2444         if (dst->ip6po_dest1) kfree(dst->ip6po_dest1, M_IP6OPT);
2445         if (dst->ip6po_dest2) kfree(dst->ip6po_dest2, M_IP6OPT);
2446         if (dst->ip6po_rthdr) kfree(dst->ip6po_rthdr, M_IP6OPT);
2447         kfree(dst, M_IP6OPT);
2448         return (NULL);
2449 }
2450
2451 static int
2452 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2453 {
2454         if (dst == NULL || src == NULL)  {
2455 #ifdef DIAGNOSTIC
2456                 kprintf("ip6_clearpktopts: invalid argument\n");
2457 #endif
2458                 return (EINVAL);
2459         }
2460
2461         dst->ip6po_hlim = src->ip6po_hlim;
2462         dst->ip6po_tclass = src->ip6po_tclass;
2463         dst->ip6po_flags = src->ip6po_flags;
2464         if (src->ip6po_pktinfo) {
2465                 dst->ip6po_pktinfo = kmalloc(sizeof(*dst->ip6po_pktinfo),
2466                     M_IP6OPT, canwait);
2467                 if (dst->ip6po_pktinfo == NULL)
2468                         goto bad;
2469                 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2470         }
2471         if (src->ip6po_nexthop) {
2472                 dst->ip6po_nexthop = kmalloc(src->ip6po_nexthop->sa_len,
2473                     M_IP6OPT, canwait);
2474                 if (dst->ip6po_nexthop == NULL)
2475                         goto bad;
2476                 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2477                     src->ip6po_nexthop->sa_len);
2478         }
2479         PKTOPT_EXTHDRCPY(ip6po_hbh);
2480         PKTOPT_EXTHDRCPY(ip6po_dest1);
2481         PKTOPT_EXTHDRCPY(ip6po_dest2);
2482         PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2483         return (0);
2484
2485   bad:
2486         ip6_clearpktopts(dst, -1);
2487         return (ENOBUFS);
2488 }
2489 #undef PKTOPT_EXTHDRCPY
2490
2491 void
2492 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2493 {
2494         if (pktopt == NULL)
2495                 return;
2496
2497         ip6_clearpktopts(pktopt, -1);
2498
2499         kfree(pktopt, M_IP6OPT);
2500 }
2501
2502 /*
2503  * Set the IP6 multicast options in response to user setsockopt().
2504  */
2505 static int
2506 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2507 {
2508         int error = 0;
2509         u_int loop, ifindex;
2510         struct ipv6_mreq *mreq;
2511         struct ifnet *ifp;
2512         struct ip6_moptions *im6o = *im6op;
2513         struct route_in6 ro;
2514         struct sockaddr_in6 *dst;
2515         struct in6_multi_mship *imm;
2516         struct thread *td = curthread;  /* XXX */
2517
2518         if (im6o == NULL) {
2519                 /*
2520                  * No multicast option buffer attached to the pcb;
2521                  * allocate one and initialize to default values.
2522                  */
2523                 im6o = (struct ip6_moptions *)
2524                         kmalloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2525
2526                 *im6op = im6o;
2527                 im6o->im6o_multicast_ifp = NULL;
2528                 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2529                 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2530                 LIST_INIT(&im6o->im6o_memberships);
2531         }
2532
2533         switch (optname) {
2534
2535         case IPV6_MULTICAST_IF:
2536                 /*
2537                  * Select the interface for outgoing multicast packets.
2538                  */
2539                 if (m == NULL || m->m_len != sizeof(u_int)) {
2540                         error = EINVAL;
2541                         break;
2542                 }
2543                 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2544                 if (ifindex < 0 || if_index < ifindex) {
2545                         error = ENXIO;  /* XXX EINVAL? */
2546                         break;
2547                 }
2548                 ifp = ifindex2ifnet[ifindex];
2549                 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
2550                         error = EADDRNOTAVAIL;
2551                         break;
2552                 }
2553                 im6o->im6o_multicast_ifp = ifp;
2554                 break;
2555
2556         case IPV6_MULTICAST_HOPS:
2557             {
2558                 /*
2559                  * Set the IP6 hoplimit for outgoing multicast packets.
2560                  */
2561                 int optval;
2562                 if (m == NULL || m->m_len != sizeof(int)) {
2563                         error = EINVAL;
2564                         break;
2565                 }
2566                 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2567                 if (optval < -1 || optval >= 256)
2568                         error = EINVAL;
2569                 else if (optval == -1)
2570                         im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2571                 else
2572                         im6o->im6o_multicast_hlim = optval;
2573                 break;
2574             }
2575
2576         case IPV6_MULTICAST_LOOP:
2577                 /*
2578                  * Set the loopback flag for outgoing multicast packets.
2579                  * Must be zero or one.
2580                  */
2581                 if (m == NULL || m->m_len != sizeof(u_int)) {
2582                         error = EINVAL;
2583                         break;
2584                 }
2585                 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2586                 if (loop > 1) {
2587                         error = EINVAL;
2588                         break;
2589                 }
2590                 im6o->im6o_multicast_loop = loop;
2591                 break;
2592
2593         case IPV6_JOIN_GROUP:
2594                 /*
2595                  * Add a multicast group membership.
2596                  * Group must be a valid IP6 multicast address.
2597                  */
2598                 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2599                         error = EINVAL;
2600                         break;
2601                 }
2602                 mreq = mtod(m, struct ipv6_mreq *);
2603                 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2604                         /*
2605                          * We use the unspecified address to specify to accept
2606                          * all multicast addresses. Only super user is allowed
2607                          * to do this.
2608                          */
2609                         if (priv_check(td, PRIV_ROOT))
2610                         {
2611                                 error = EACCES;
2612                                 break;
2613                         }
2614                 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2615                         error = EINVAL;
2616                         break;
2617                 }
2618
2619                 /*
2620                  * If the interface is specified, validate it.
2621                  */
2622                 if (mreq->ipv6mr_interface < 0
2623                  || if_index < mreq->ipv6mr_interface) {
2624                         error = ENXIO;  /* XXX EINVAL? */
2625                         break;
2626                 }
2627                 /*
2628                  * If no interface was explicitly specified, choose an
2629                  * appropriate one according to the given multicast address.
2630                  */
2631                 if (mreq->ipv6mr_interface == 0) {
2632                         /*
2633                          * If the multicast address is in node-local scope,
2634                          * the interface should be a loopback interface.
2635                          * Otherwise, look up the routing table for the
2636                          * address, and choose the outgoing interface.
2637                          *   XXX: is it a good approach?
2638                          */
2639                         if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
2640                                 ifp = &loif[0];
2641                         } else {
2642                                 ro.ro_rt = NULL;
2643                                 dst = (struct sockaddr_in6 *)&ro.ro_dst;
2644                                 bzero(dst, sizeof(*dst));
2645                                 dst->sin6_len = sizeof(struct sockaddr_in6);
2646                                 dst->sin6_family = AF_INET6;
2647                                 dst->sin6_addr = mreq->ipv6mr_multiaddr;
2648                                 rtalloc((struct route *)&ro);
2649                                 if (ro.ro_rt == NULL) {
2650                                         error = EADDRNOTAVAIL;
2651                                         break;
2652                                 }
2653                                 ifp = ro.ro_rt->rt_ifp;
2654                                 rtfree(ro.ro_rt);
2655                         }
2656                 } else
2657                         ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2658
2659                 /*
2660                  * See if we found an interface, and confirm that it
2661                  * supports multicast
2662                  */
2663                 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) {
2664                         error = EADDRNOTAVAIL;
2665                         break;
2666                 }
2667                 /*
2668                  * Put interface index into the multicast address,
2669                  * if the address has link-local scope.
2670                  */
2671                 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2672                         mreq->ipv6mr_multiaddr.s6_addr16[1]
2673                                 = htons(mreq->ipv6mr_interface);
2674                 }
2675                 /*
2676                  * See if the membership already exists.
2677                  */
2678                 for (imm = im6o->im6o_memberships.lh_first;
2679                      imm != NULL; imm = imm->i6mm_chain.le_next)
2680                         if (imm->i6mm_maddr->in6m_ifp == ifp &&
2681                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2682                                                &mreq->ipv6mr_multiaddr))
2683                                 break;
2684                 if (imm != NULL) {
2685                         error = EADDRINUSE;
2686                         break;
2687                 }
2688                 /*
2689                  * Everything looks good; add a new record to the multicast
2690                  * address list for the given interface.
2691                  */
2692                 imm = kmalloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
2693                 if ((imm->i6mm_maddr =
2694                      in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
2695                         kfree(imm, M_IPMADDR);
2696                         break;
2697                 }
2698                 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2699                 break;
2700
2701         case IPV6_LEAVE_GROUP:
2702                 /*
2703                  * Drop a multicast group membership.
2704                  * Group must be a valid IP6 multicast address.
2705                  */
2706                 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2707                         error = EINVAL;
2708                         break;
2709                 }
2710                 mreq = mtod(m, struct ipv6_mreq *);
2711                 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2712                         if (priv_check(td, PRIV_ROOT)) {
2713                                 error = EACCES;
2714                                 break;
2715                         }
2716                 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2717                         error = EINVAL;
2718                         break;
2719                 }
2720                 /*
2721                  * If an interface address was specified, get a pointer
2722                  * to its ifnet structure.
2723                  */
2724                 if (mreq->ipv6mr_interface < 0
2725                  || if_index < mreq->ipv6mr_interface) {
2726                         error = ENXIO;  /* XXX EINVAL? */
2727                         break;
2728                 }
2729                 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2730                 /*
2731                  * Put interface index into the multicast address,
2732                  * if the address has link-local scope.
2733                  */
2734                 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
2735                         mreq->ipv6mr_multiaddr.s6_addr16[1]
2736                                 = htons(mreq->ipv6mr_interface);
2737                 }
2738                 /*
2739                  * Find the membership in the membership list.
2740                  */
2741                 for (imm = im6o->im6o_memberships.lh_first;
2742                      imm != NULL; imm = imm->i6mm_chain.le_next) {
2743                         if ((ifp == NULL ||
2744                              imm->i6mm_maddr->in6m_ifp == ifp) &&
2745                             IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2746                                                &mreq->ipv6mr_multiaddr))
2747                                 break;
2748                 }
2749                 if (imm == NULL) {
2750                         /* Unable to resolve interface */
2751                         error = EADDRNOTAVAIL;
2752                         break;
2753                 }
2754                 /*
2755                  * Give up the multicast address record to which the
2756                  * membership points.
2757                  */
2758                 LIST_REMOVE(imm, i6mm_chain);
2759                 in6_delmulti(imm->i6mm_maddr);
2760                 kfree(imm, M_IPMADDR);
2761                 break;
2762
2763         default:
2764                 error = EOPNOTSUPP;
2765                 break;
2766         }
2767
2768         /*
2769          * If all options have default values, no need to keep the mbuf.
2770          */
2771         if (im6o->im6o_multicast_ifp == NULL &&
2772             im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2773             im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2774             im6o->im6o_memberships.lh_first == NULL) {
2775                 kfree(*im6op, M_IPMOPTS);
2776                 *im6op = NULL;
2777         }
2778
2779         return (error);
2780 }
2781
2782 /*
2783  * Return the IP6 multicast options in response to user getsockopt().
2784  */
2785 static int
2786 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2787 {
2788         u_int *hlim, *loop, *ifindex;
2789
2790         *mp = m_get(MB_WAIT, MT_HEADER);                /* XXX */
2791
2792         switch (optname) {
2793
2794         case IPV6_MULTICAST_IF:
2795                 ifindex = mtod(*mp, u_int *);
2796                 (*mp)->m_len = sizeof(u_int);
2797                 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2798                         *ifindex = 0;
2799                 else
2800                         *ifindex = im6o->im6o_multicast_ifp->if_index;
2801                 return (0);
2802
2803         case IPV6_MULTICAST_HOPS:
2804                 hlim = mtod(*mp, u_int *);
2805                 (*mp)->m_len = sizeof(u_int);
2806                 if (im6o == NULL)
2807                         *hlim = ip6_defmcasthlim;
2808                 else
2809                         *hlim = im6o->im6o_multicast_hlim;
2810                 return (0);
2811
2812         case IPV6_MULTICAST_LOOP:
2813                 loop = mtod(*mp, u_int *);
2814                 (*mp)->m_len = sizeof(u_int);
2815                 if (im6o == NULL)
2816                         *loop = ip6_defmcasthlim;
2817                 else
2818                         *loop = im6o->im6o_multicast_loop;
2819                 return (0);
2820
2821         default:
2822                 return (EOPNOTSUPP);
2823         }
2824 }
2825
2826 /*
2827  * Discard the IP6 multicast options.
2828  */
2829 void
2830 ip6_freemoptions(struct ip6_moptions *im6o)
2831 {
2832         struct in6_multi_mship *imm;
2833
2834         if (im6o == NULL)
2835                 return;
2836
2837         while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2838                 LIST_REMOVE(imm, i6mm_chain);
2839                 if (imm->i6mm_maddr)
2840                         in6_delmulti(imm->i6mm_maddr);
2841                 kfree(imm, M_IPMADDR);
2842         }
2843         kfree(im6o, M_IPMOPTS);
2844 }
2845
2846 /*
2847  * Set a particular packet option, as a sticky option or an ancillary data
2848  * item.  "len" can be 0 only when it's a sticky option.
2849  * We have 4 cases of combination of "sticky" and "cmsg":
2850  * "sticky=0, cmsg=0": impossible
2851  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2852  * "sticky=1, cmsg=0": RFC3542 socket option
2853  * "sticky=1, cmsg=1": RFC2292 socket option
2854  */
2855 static int
2856 ip6_setpktoption(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2857      int sticky, int cmsg, int uproto, int priv)
2858 {
2859         int minmtupolicy, preftemp;
2860         //int error;
2861
2862         if (!sticky && !cmsg) {
2863                 kprintf("ip6_setpktoption: impossible case\n");
2864                 return (EINVAL);
2865         }
2866
2867         /*
2868          * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2869          * not be specified in the context of RFC3542.  Conversely,
2870          * RFC3542 types should not be specified in the context of RFC2292.
2871          */
2872         if (!cmsg) {
2873                 switch (optname) {
2874                 case IPV6_2292PKTINFO:
2875                 case IPV6_2292HOPLIMIT:
2876                 case IPV6_2292NEXTHOP:
2877                 case IPV6_2292HOPOPTS:
2878                 case IPV6_2292DSTOPTS:
2879                 case IPV6_2292RTHDR:
2880                 case IPV6_2292PKTOPTIONS:
2881                         return (ENOPROTOOPT);
2882                 }
2883         }
2884         if (sticky && cmsg) {
2885                 switch (optname) {
2886                 case IPV6_PKTINFO:
2887                 case IPV6_HOPLIMIT:
2888                 case IPV6_NEXTHOP:
2889                 case IPV6_HOPOPTS:
2890                 case IPV6_DSTOPTS:
2891                 case IPV6_RTHDRDSTOPTS:
2892                 case IPV6_RTHDR:
2893                 case IPV6_USE_MIN_MTU:
2894                 case IPV6_DONTFRAG:
2895                 case IPV6_TCLASS:
2896                 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
2897                         return (ENOPROTOOPT);
2898                 }
2899         }
2900
2901         switch (optname) {
2902         case IPV6_2292PKTINFO:
2903         case IPV6_PKTINFO:
2904         {
2905                 struct in6_pktinfo *pktinfo;
2906                 if (len != sizeof(struct in6_pktinfo))
2907                         return (EINVAL);
2908                 pktinfo = (struct in6_pktinfo *)buf;
2909
2910                 /*
2911                  * An application can clear any sticky IPV6_PKTINFO option by
2912                  * doing a "regular" setsockopt with ipi6_addr being
2913                  * in6addr_any and ipi6_ifindex being zero.
2914                  * [RFC 3542, Section 6]
2915                  */
2916                 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2917                     pktinfo->ipi6_ifindex == 0 &&
2918                     IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2919                         ip6_clearpktopts(opt, optname);
2920                         break;
2921                 }
2922
2923                 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2924                     sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2925                         return (EINVAL);
2926                 }
2927
2928                 /* validate the interface index if specified. */
2929                 if (pktinfo->ipi6_ifindex > if_index ||
2930                     pktinfo->ipi6_ifindex < 0) {
2931                          return (ENXIO);
2932                 }
2933                 /*
2934                  * Check if the requested source address is indeed a
2935                  * unicast address assigned to the node, and can be
2936                  * used as the packet's source address.
2937                  */
2938                 if (opt->ip6po_pktinfo != NULL &&
2939                     !IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2940                         struct in6_ifaddr *ia6;
2941                         struct sockaddr_in6 sin6;
2942
2943                         bzero(&sin6, sizeof(sin6));
2944                         sin6.sin6_len = sizeof(sin6);
2945                         sin6.sin6_family = AF_INET6;
2946                         sin6.sin6_addr =
2947                         opt->ip6po_pktinfo->ipi6_addr;
2948                         ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
2949                         if (ia6 == NULL ||
2950                                 (ia6->ia6_flags & (IN6_IFF_ANYCAST |
2951                                         IN6_IFF_NOTREADY)) != 0)
2952                         return (EADDRNOTAVAIL);
2953                 }
2954
2955                 /*
2956                  * We store the address anyway, and let in6_selectsrc()
2957                  * validate the specified address.  This is because ipi6_addr
2958                  * may not have enough information about its scope zone, and
2959                  * we may need additional information (such as outgoing
2960                  * interface or the scope zone of a destination address) to
2961                  * disambiguate the scope.
2962                  * XXX: the delay of the validation may confuse the
2963                  * application when it is used as a sticky option.
2964                  */
2965                 if (opt->ip6po_pktinfo == NULL) {
2966                         opt->ip6po_pktinfo = kmalloc(sizeof(*pktinfo),
2967                             M_IP6OPT, M_NOWAIT);
2968                         if (opt->ip6po_pktinfo == NULL)
2969                                 return (ENOBUFS);
2970                 }
2971                 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
2972                 break;
2973         }
2974
2975         case IPV6_2292HOPLIMIT:
2976         case IPV6_HOPLIMIT:
2977         {
2978                 int *hlimp;
2979
2980                 /*
2981                  * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2982                  * to simplify the ordering among hoplimit options.
2983                  */
2984                 if (optname == IPV6_HOPLIMIT && sticky)
2985                         return (ENOPROTOOPT);
2986
2987                 if (len != sizeof(int))
2988                         return (EINVAL);
2989                 hlimp = (int *)buf;
2990                 if (*hlimp < -1 || *hlimp > 255)
2991                         return (EINVAL);
2992
2993                 opt->ip6po_hlim = *hlimp;
2994                 break;
2995         }
2996
2997         case IPV6_TCLASS:
2998         {
2999                 int tclass;
3000
3001                 if (len != sizeof(int))
3002                         return (EINVAL);
3003                 tclass = *(int *)buf;
3004                 if (tclass < -1 || tclass > 255)
3005                         return (EINVAL);
3006
3007                 opt->ip6po_tclass = tclass;
3008                 break;
3009         }
3010
3011         case IPV6_2292NEXTHOP:
3012         case IPV6_NEXTHOP:
3013                 if (!priv)
3014                         return (EPERM);
3015
3016                 if (len == 0) { /* just remove the option */
3017                         ip6_clearpktopts(opt, IPV6_NEXTHOP);
3018                         break;
3019                 }
3020
3021                 /* check if cmsg_len is large enough for sa_len */
3022                 if (len < sizeof(struct sockaddr) || len < *buf)
3023                         return (EINVAL);
3024
3025                 switch (((struct sockaddr *)buf)->sa_family) {
3026                 case AF_INET6:
3027                 {
3028                         struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3029                         //int error;
3030
3031                         if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3032                                 return (EINVAL);
3033
3034                         if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3035                             IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3036                                 return (EINVAL);
3037                         }
3038                         break;
3039                 }
3040                 case AF_LINK:   /* should eventually be supported */
3041                 default:
3042                         return (EAFNOSUPPORT);
3043                 }
3044
3045                 /* turn off the previous option, then set the new option. */
3046                 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3047                 opt->ip6po_nexthop = kmalloc(*buf, M_IP6OPT, M_NOWAIT);
3048                 if (opt->ip6po_nexthop == NULL)
3049                         return (ENOBUFS);
3050                 bcopy(buf, opt->ip6po_nexthop, *buf);
3051                 break;
3052
3053         case IPV6_2292HOPOPTS:
3054         case IPV6_HOPOPTS:
3055         {
3056                 struct ip6_hbh *hbh;
3057                 int hbhlen;
3058
3059                 /*
3060                  * XXX: We don't allow a non-privileged user to set ANY HbH
3061                  * options, since per-option restriction has too much
3062                  * overhead.
3063                  */
3064                 if (!priv)
3065                         return (EPERM);
3066                 if (len == 0) {
3067                         ip6_clearpktopts(opt, IPV6_HOPOPTS);
3068                         break;  /* just remove the option */
3069                 }
3070
3071                 /* message length validation */
3072                 if (len < sizeof(struct ip6_hbh))
3073                         return (EINVAL);
3074                 hbh = (struct ip6_hbh *)buf;
3075                 hbhlen = (hbh->ip6h_len + 1) << 3;
3076                 if (len != hbhlen)
3077                         return (EINVAL);
3078
3079                 /* turn off the previous option, then set the new option. */
3080                 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3081                 opt->ip6po_hbh = kmalloc(hbhlen, M_IP6OPT, M_NOWAIT);
3082                 if (opt->ip6po_hbh == NULL)
3083                         return (ENOBUFS);
3084                 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3085
3086                 break;
3087         }
3088
3089         case IPV6_2292DSTOPTS:
3090         case IPV6_DSTOPTS:
3091         case IPV6_RTHDRDSTOPTS:
3092         {
3093                 struct ip6_dest *dest, **newdest = NULL;
3094                 int destlen;
3095                 if (!priv)
3096                         return (EPERM);
3097
3098                 if (len == 0) {
3099                         ip6_clearpktopts(opt, optname);
3100                         break;  /* just remove the option */
3101                 }
3102
3103                 /* message length validation */
3104                 if (len < sizeof(struct ip6_dest))
3105                         return (EINVAL);
3106                 dest = (struct ip6_dest *)buf;
3107                 destlen = (dest->ip6d_len + 1) << 3;
3108                 if (len != destlen)
3109                         return (EINVAL);
3110
3111                 /*
3112                  * Determine the position that the destination options header
3113                  * should be inserted; before or after the routing header.
3114                  */
3115                 switch (optname) {
3116                 case IPV6_2292DSTOPTS:
3117                         /*
3118                          * The old advacned API is ambiguous on this point.
3119                          * Our approach is to determine the position based
3120                          * according to the existence of a routing header.
3121                          * Note, however, that this depends on the order of the
3122                          * extension headers in the ancillary data; the 1st
3123                          * part of the destination options header must appear
3124                          * before the routing header in the ancillary data,
3125                          * too.
3126                          * RFC3542 solved the ambiguity by introducing
3127                          * separate ancillary data or option types.
3128                          */
3129                         if (opt->ip6po_rthdr == NULL)
3130                                 newdest = &opt->ip6po_dest1;
3131                         else
3132                                 newdest = &opt->ip6po_dest2;
3133                         break;
3134                 case IPV6_RTHDRDSTOPTS:
3135                         newdest = &opt->ip6po_dest1;
3136                         break;
3137                 case IPV6_DSTOPTS:
3138                         newdest = &opt->ip6po_dest2;
3139                         break;
3140                 }
3141
3142                 /* turn off the previous option, then set the new option. */
3143                 ip6_clearpktopts(opt, optname);
3144                 *newdest = kmalloc(destlen, M_IP6OPT, M_NOWAIT);
3145                 if (*newdest == NULL)
3146                         return (ENOBUFS);
3147                 bcopy(dest, *newdest, destlen);
3148
3149                 break;
3150         }
3151
3152         case IPV6_2292RTHDR:
3153         case IPV6_RTHDR:
3154         {
3155                 struct ip6_rthdr *rth;
3156                 int rthlen;
3157
3158                 if (len == 0) {
3159                         ip6_clearpktopts(opt, IPV6_RTHDR);
3160                         break;  /* just remove the option */
3161                 }
3162
3163                 /* message length validation */
3164                 if (len < sizeof(struct ip6_rthdr))
3165                         return (EINVAL);
3166                 rth = (struct ip6_rthdr *)buf;
3167                 rthlen = (rth->ip6r_len + 1) << 3;
3168                 if (len != rthlen)
3169                         return (EINVAL);
3170
3171                 switch (rth->ip6r_type) {
3172                 default:
3173                         return (EINVAL);        /* not supported */
3174                 }
3175
3176                 /* turn off the previous option */
3177                 ip6_clearpktopts(opt, IPV6_RTHDR);
3178                 opt->ip6po_rthdr = kmalloc(rthlen, M_IP6OPT, M_NOWAIT);
3179                 if (opt->ip6po_rthdr == NULL)
3180                         return (ENOBUFS);
3181                 bcopy(rth, opt->ip6po_rthdr, rthlen);
3182
3183                 break;
3184         }
3185
3186         case IPV6_USE_MIN_MTU:
3187                 if (len != sizeof(int))
3188                         return (EINVAL);
3189                 minmtupolicy = *(int *)buf;
3190                 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3191                     minmtupolicy != IP6PO_MINMTU_DISABLE &&
3192                     minmtupolicy != IP6PO_MINMTU_ALL) {
3193                         return (EINVAL);
3194                 }
3195                 opt->ip6po_minmtu = minmtupolicy;
3196                 break;
3197
3198         case IPV6_DONTFRAG:
3199                 if (len != sizeof(int))
3200                         return (EINVAL);
3201
3202                 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3203                         /*
3204                          * we ignore this option for TCP sockets.
3205                          * (RFC3542 leaves this case unspecified.)
3206                          */
3207                         opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3208                 } else
3209                         opt->ip6po_flags |= IP6PO_DONTFRAG;
3210                 break;
3211
3212         case IPV6_PREFER_TEMPADDR:
3213                 if (len != sizeof(int))
3214                         return (EINVAL);
3215                 preftemp = *(int *)buf;
3216                 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3217                     preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3218                     preftemp != IP6PO_TEMPADDR_PREFER) {
3219                         return (EINVAL);
3220                 }
3221                 opt->ip6po_prefer_tempaddr = preftemp;
3222                 break;
3223
3224         default:
3225                 return (ENOPROTOOPT);
3226         } /* end of switch */
3227
3228         return (0);
3229 }
3230
3231
3232 /*
3233  * Set IPv6 outgoing packet options based on advanced API.
3234  */
3235 int
3236 ip6_setpktoptions(struct mbuf *control, struct ip6_pktopts *opt,
3237     struct ip6_pktopts *stickyopt, int uproto, int priv)
3238 {
3239         struct cmsghdr *cm = NULL;
3240
3241         if (control == NULL || opt == NULL)
3242                 return (EINVAL);
3243
3244         init_ip6pktopts(opt);
3245
3246         /*
3247          * XXX: Currently, we assume all the optional information is stored
3248          * in a single mbuf.
3249          */
3250         if (stickyopt) {
3251                 int error;
3252
3253                 /*
3254                  * If stickyopt is provided, make a local copy of the options
3255                  * for this particular packet, then override them by ancillary
3256                  * objects.
3257                  * XXX: copypktopts() does not copy the cached route to a next
3258                  * hop (if any).  This is not very good in terms of efficiency,
3259                  * but we can allow this since this option should be rarely
3260                  * used.
3261                  */
3262                 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3263                         return (error);
3264         }
3265
3266         /*
3267          * XXX: Currently, we assume all the optional information is stored
3268          * in a single mbuf.
3269          */
3270         if (control->m_next)
3271                 return (EINVAL);
3272
3273         for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3274             control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3275                 int error;
3276
3277                 if (control->m_len < CMSG_LEN(0))
3278                         return (EINVAL);
3279
3280                 cm = mtod(control, struct cmsghdr *);
3281                 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3282                         return (EINVAL);
3283                 if (cm->cmsg_level != IPPROTO_IPV6)
3284                         continue;
3285
3286                 error = ip6_setpktoption(cm->cmsg_type, CMSG_DATA(cm),
3287                     cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto, priv);
3288                 if (error)
3289                         return (error);
3290         }
3291
3292         return (0);
3293 }
3294
3295 /*
3296  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3297  * packet to the input queue of a specified interface.  Note that this
3298  * calls the output routine of the loopback "driver", but with an interface
3299  * pointer that might NOT be &loif -- easier than replicating that code here.
3300  */
3301 void
3302 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
3303 {
3304         struct mbuf *copym;
3305         struct ip6_hdr *ip6;
3306
3307         copym = m_copy(m, 0, M_COPYALL);
3308         if (copym == NULL)
3309                 return;
3310
3311         /*
3312          * Make sure to deep-copy IPv6 header portion in case the data
3313          * is in an mbuf cluster, so that we can safely override the IPv6
3314          * header portion later.
3315          */
3316         if ((copym->m_flags & M_EXT) != 0 ||
3317             copym->m_len < sizeof(struct ip6_hdr)) {
3318                 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3319                 if (copym == NULL)
3320                         return;
3321         }
3322
3323 #ifdef DIAGNOSTIC
3324         if (copym->m_len < sizeof(*ip6)) {
3325                 m_freem(copym);
3326                 return;
3327         }
3328 #endif
3329
3330         ip6 = mtod(copym, struct ip6_hdr *);
3331         /*
3332          * clear embedded scope identifiers if necessary.
3333          * in6_clearscope will touch the addresses only when necessary.
3334          */
3335         in6_clearscope(&ip6->ip6_src);
3336         in6_clearscope(&ip6->ip6_dst);
3337
3338         if_simloop(ifp, copym, dst->sin6_family, 0);
3339 }
3340
3341 /*
3342  * Separate the IPv6 header from the payload into its own mbuf.
3343  *
3344  * Returns the new mbuf chain or the original mbuf if no payload.
3345  * Returns NULL if can't allocate new mbuf for header.
3346  */
3347 static struct mbuf *
3348 ip6_splithdr(struct mbuf *m)
3349 {
3350         struct mbuf *mh;
3351
3352         if (m->m_len <= sizeof(struct ip6_hdr))         /* no payload */
3353                 return (m);
3354
3355         MGETHDR(mh, MB_DONTWAIT, MT_HEADER);
3356         if (mh == NULL)
3357                 return (NULL);
3358         mh->m_len = sizeof(struct ip6_hdr);
3359         M_MOVE_PKTHDR(mh, m);
3360         MH_ALIGN(mh, sizeof(struct ip6_hdr));
3361         bcopy(mtod(m, caddr_t), mtod(mh, caddr_t), sizeof(struct ip6_hdr));
3362         m->m_data += sizeof(struct ip6_hdr);
3363         m->m_len -= sizeof(struct ip6_hdr);
3364         mh->m_next = m;
3365         return (mh);
3366 }
3367
3368 /*
3369  * Compute IPv6 extension header length.
3370  */
3371 int
3372 ip6_optlen(struct in6pcb *in6p)
3373 {
3374         int len;
3375
3376         if (!in6p->in6p_outputopts)
3377                 return 0;
3378
3379         len = 0;
3380 #define elen(x) \
3381     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3382
3383         len += elen(in6p->in6p_outputopts->ip6po_hbh);
3384         if (in6p->in6p_outputopts->ip6po_rthdr)
3385                 /* dest1 is valid with rthdr only */
3386                 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3387         len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3388         len += elen(in6p->in6p_outputopts->ip6po_dest2);
3389         return len;
3390 #undef elen
3391 }