Bring in the parallel route table code and clean up ARP. The
[dragonfly.git] / sys / netinet6 / in6_pcb.c
1 /*      $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.10.2.9 2003/01/24 05:11:35 sam Exp $   */
2 /*      $DragonFly: src/sys/netinet6/in6_pcb.c,v 1.28 2006/01/31 19:05:42 dillon Exp $  */
3 /*      $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $        */
4   
5 /*
6  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the project nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  */
34
35 /*
36  * Copyright (c) 1982, 1986, 1991, 1993
37  *      The Regents of the University of California.  All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *      This product includes software developed by the University of
50  *      California, Berkeley and its contributors.
51  * 4. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  *
67  *      @(#)in_pcb.c    8.2 (Berkeley) 1/4/94
68  */
69
70 #include "opt_inet.h"
71 #include "opt_inet6.h"
72 #include "opt_ipsec.h"
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/domain.h>
79 #include <sys/protosw.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/sockio.h>
83 #include <sys/errno.h>
84 #include <sys/time.h>
85 #include <sys/proc.h>
86 #include <sys/jail.h>
87 #include <sys/thread2.h>
88
89 #include <vm/vm_zone.h>
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_var.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip6.h>
99 #include <netinet/ip_var.h>
100 #include <netinet6/ip6_var.h>
101 #include <netinet6/nd6.h>
102 #include <netinet/in_pcb.h>
103 #include <netinet6/in6_pcb.h>
104
105 #ifdef IPSEC
106 #include <netinet6/ipsec.h>
107 #ifdef INET6
108 #include <netinet6/ipsec6.h>
109 #endif
110 #include <netinet6/ah.h>
111 #ifdef INET6
112 #include <netinet6/ah6.h>
113 #endif
114 #include <netproto/key/key.h>
115 #endif /* IPSEC */
116
117 #ifdef FAST_IPSEC
118 #include <netproto/ipsec/ipsec.h>
119 #include <netproto/ipsec/ipsec6.h>
120 #include <netproto/ipsec/key.h>
121 #define IPSEC
122 #endif /* FAST_IPSEC */
123
124 struct  in6_addr zeroin6_addr;
125
126 int
127 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
128 {
129         struct socket *so = inp->inp_socket;
130         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
131         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
132         u_short lport = 0;
133         int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
134
135         if (!in6_ifaddr) /* XXX broken! */
136                 return (EADDRNOTAVAIL);
137         if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
138                 return(EINVAL);
139         if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
140                 wild = 1;
141         if (nam) {
142                 sin6 = (struct sockaddr_in6 *)nam;
143                 if (nam->sa_len != sizeof(*sin6))
144                         return(EINVAL);
145                 /*
146                  * family check.
147                  */
148                 if (nam->sa_family != AF_INET6)
149                         return(EAFNOSUPPORT);
150
151                 /* KAME hack: embed scopeid */
152                 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0)
153                         return EINVAL;
154                 /* this must be cleared for ifa_ifwithaddr() */
155                 sin6->sin6_scope_id = 0;
156
157                 lport = sin6->sin6_port;
158                 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
159                         /*
160                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
161                          * allow compepte duplication of binding if
162                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
163                          * and a multicast address is bound on both
164                          * new and duplicated sockets.
165                          */
166                         if (so->so_options & SO_REUSEADDR)
167                                 reuseport = SO_REUSEADDR|SO_REUSEPORT;
168                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
169                         struct ifaddr *ia = NULL;
170
171                         sin6->sin6_port = 0;            /* yech... */
172                         if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0)
173                                 return(EADDRNOTAVAIL);
174
175                         /*
176                          * XXX: bind to an anycast address might accidentally
177                          * cause sending a packet with anycast source address.
178                          * We should allow to bind to a deprecated address, since
179                          * the application dare to use it.
180                          */
181                         if (ia &&
182                             ((struct in6_ifaddr *)ia)->ia6_flags &
183                             (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
184                                 return(EADDRNOTAVAIL);
185                         }
186                 }
187                 if (lport) {
188                         struct inpcb *t;
189                         struct proc *p = td->td_proc; /* may be NULL */
190
191                         /* GROSS */
192                         if (ntohs(lport) < IPV6PORT_RESERVED && p &&
193                             suser_cred(p->p_ucred, PRISON_ROOT))
194                                 return(EACCES);
195                         if (so->so_cred->cr_uid != 0 &&
196                             !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
197                                 t = in6_pcblookup_local(pcbinfo,
198                                     &sin6->sin6_addr, lport,
199                                     INPLOOKUP_WILDCARD);
200                                 if (t &&
201                                     (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
202                                      !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
203                                      (t->inp_socket->so_options &
204                                       SO_REUSEPORT) == 0) &&
205                                     (so->so_cred->cr_uid !=
206                                      t->inp_socket->so_cred->cr_uid))
207                                         return (EADDRINUSE);
208                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
209                                     IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
210                                         struct sockaddr_in sin;
211
212                                         in6_sin6_2_sin(&sin, sin6);
213                                         t = in_pcblookup_local(pcbinfo,
214                                                 sin.sin_addr, lport,
215                                                 INPLOOKUP_WILDCARD);
216                                         if (t &&
217                                             (so->so_cred->cr_uid !=
218                                              t->inp_socket->so_cred->cr_uid) &&
219                                             (ntohl(t->inp_laddr.s_addr) !=
220                                              INADDR_ANY ||
221                                              INP_SOCKAF(so) ==
222                                              INP_SOCKAF(t->inp_socket)))
223                                                 return (EADDRINUSE);
224                                 }
225                         }
226                         t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
227                                                 lport, wild);
228                         if (t && (reuseport & t->inp_socket->so_options) == 0)
229                                 return(EADDRINUSE);
230                         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
231                             IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
232                                 struct sockaddr_in sin;
233
234                                 in6_sin6_2_sin(&sin, sin6);
235                                 t = in_pcblookup_local(pcbinfo, sin.sin_addr,
236                                                        lport, wild);
237                                 if (t &&
238                                     (reuseport & t->inp_socket->so_options)
239                                     == 0 &&
240                                     (ntohl(t->inp_laddr.s_addr)
241                                      != INADDR_ANY ||
242                                      INP_SOCKAF(so) ==
243                                      INP_SOCKAF(t->inp_socket)))
244                                         return (EADDRINUSE);
245                         }
246                 }
247                 inp->in6p_laddr = sin6->sin6_addr;
248         }
249         if (lport == 0) {
250                 int e;
251                 if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, td)) != 0)
252                         return(e);
253         }
254         else {
255                 inp->inp_lport = lport;
256                 if (in_pcbinsporthash(inp) != 0) {
257                         inp->in6p_laddr = in6addr_any;
258                         inp->inp_lport = 0;
259                         return (EAGAIN);
260                 }
261         }
262         return(0);
263 }
264
265 /*
266  *   Transform old in6_pcbconnect() into an inner subroutine for new
267  *   in6_pcbconnect(): Do some validity-checking on the remote
268  *   address (in mbuf 'nam') and then determine local host address
269  *   (i.e., which interface) to use to access that remote host.
270  *
271  *   This preserves definition of in6_pcbconnect(), while supporting a
272  *   slightly different version for T/TCP.  (This is more than
273  *   a bit of a kludge, but cleaning up the internal interfaces would
274  *   have forced minor changes in every protocol).
275  */
276
277 int
278 in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
279              struct in6_addr **plocal_addr6)
280 {
281         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
282         struct ifnet *ifp = NULL;
283         int error = 0;
284
285         if (nam->sa_len != sizeof (*sin6))
286                 return (EINVAL);
287         if (sin6->sin6_family != AF_INET6)
288                 return (EAFNOSUPPORT);
289         if (sin6->sin6_port == 0)
290                 return (EADDRNOTAVAIL);
291
292         /* KAME hack: embed scopeid */
293         if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0)
294                 return EINVAL;
295
296         if (in6_ifaddr) {
297                 /*
298                  * If the destination address is UNSPECIFIED addr,
299                  * use the loopback addr, e.g ::1.
300                  */
301                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
302                         sin6->sin6_addr = in6addr_loopback;
303         }
304         {
305                 /*
306                  * XXX: in6_selectsrc might replace the bound local address
307                  * with the address specified by setsockopt(IPV6_PKTINFO).
308                  * Is it the intended behavior?
309                  */
310                 *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
311                                               inp->in6p_moptions,
312                                               &inp->in6p_route,
313                                               &inp->in6p_laddr, &error);
314                 if (*plocal_addr6 == 0) {
315                         if (error == 0)
316                                 error = EADDRNOTAVAIL;
317                         return(error);
318                 }
319                 /*
320                  * Don't do pcblookup call here; return interface in
321                  * plocal_addr6
322                  * and exit to caller, that will do the lookup.
323                  */
324         }
325
326         if (inp->in6p_route.ro_rt)
327                 ifp = inp->in6p_route.ro_rt->rt_ifp;
328
329         return(0);
330 }
331
332 /*
333  * Outer subroutine:
334  * Connect from a socket to a specified address.
335  * Both address and port must be specified in argument sin.
336  * If don't have a local address for this socket yet,
337  * then pick one.
338  */
339 int
340 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
341 {
342         struct in6_addr *addr6;
343         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
344         int error;
345
346         /*
347          * Call inner routine, to assign local interface address.
348          * in6_pcbladdr() may automatically fill in sin6_scope_id.
349          */
350         if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
351                 return(error);
352
353         if (in6_pcblookup_hash(inp->inp_cpcbinfo, &sin6->sin6_addr,
354                                sin6->sin6_port,
355                               IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
356                               ? addr6 : &inp->in6p_laddr,
357                               inp->inp_lport, 0, NULL) != NULL) {
358                 return (EADDRINUSE);
359         }
360         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
361                 if (inp->inp_lport == 0) {
362                         error = in6_pcbbind(inp, (struct sockaddr *)0, td);
363                         if (error)
364                                 return (error);
365                 }
366                 inp->in6p_laddr = *addr6;
367         }
368         inp->in6p_faddr = sin6->sin6_addr;
369         inp->inp_fport = sin6->sin6_port;
370         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
371         inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
372         if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
373                 inp->in6p_flowinfo |=
374                     (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
375
376         in_pcbinsconnhash(inp);
377         return (0);
378 }
379
380 #if 0
381 /*
382  * Return an IPv6 address, which is the most appropriate for given
383  * destination and user specified options.
384  * If necessary, this function lookups the routing table and return
385  * an entry to the caller for later use.
386  */
387 struct in6_addr *
388 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
389               struct ip6_moptions *mopts, struct route_in6 *ro,
390               struct in6_addr *laddr, int *errorp)
391 {
392         struct in6_addr *dst;
393         struct in6_ifaddr *ia6 = 0;
394         struct in6_pktinfo *pi = NULL;
395
396         dst = &dstsock->sin6_addr;
397         *errorp = 0;
398
399         /*
400          * If the source address is explicitly specified by the caller,
401          * use it.
402          */
403         if (opts && (pi = opts->ip6po_pktinfo) &&
404             !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
405                 return(&pi->ipi6_addr);
406
407         /*
408          * If the source address is not specified but the socket(if any)
409          * is already bound, use the bound address.
410          */
411         if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
412                 return(laddr);
413
414         /*
415          * If the caller doesn't specify the source address but
416          * the outgoing interface, use an address associated with
417          * the interface.
418          */
419         if (pi && pi->ipi6_ifindex) {
420                 /* XXX boundary check is assumed to be already done. */
421                 ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex],
422                                        dst);
423                 if (ia6 == 0) {
424                         *errorp = EADDRNOTAVAIL;
425                         return(0);
426                 }
427                 return(&satosin6(&ia6->ia_addr)->sin6_addr);
428         }
429
430         /*
431          * If the destination address is a link-local unicast address or
432          * a multicast address, and if the outgoing interface is specified
433          * by the sin6_scope_id filed, use an address associated with the
434          * interface.
435          * XXX: We're now trying to define more specific semantics of
436          *      sin6_scope_id field, so this part will be rewritten in
437          *      the near future.
438          */
439         if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
440             dstsock->sin6_scope_id) {
441                 /*
442                  * I'm not sure if boundary check for scope_id is done
443                  * somewhere...
444                  */
445                 if (dstsock->sin6_scope_id < 0 ||
446                     if_index < dstsock->sin6_scope_id) {
447                         *errorp = ENXIO; /* XXX: better error? */
448                         return(0);
449                 }
450                 ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id],
451                                        dst);
452                 if (ia6 == 0) {
453                         *errorp = EADDRNOTAVAIL;
454                         return(0);
455                 }
456                 return(&satosin6(&ia6->ia_addr)->sin6_addr);
457         }
458
459         /*
460          * If the destination address is a multicast address and
461          * the outgoing interface for the address is specified
462          * by the caller, use an address associated with the interface.
463          * There is a sanity check here; if the destination has node-local
464          * scope, the outgoing interfacde should be a loopback address.
465          * Even if the outgoing interface is not specified, we also
466          * choose a loopback interface as the outgoing interface.
467          */
468         if (IN6_IS_ADDR_MULTICAST(dst)) {
469                 struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
470
471                 if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
472                         ifp = &loif[0];
473                 }
474
475                 if (ifp) {
476                         ia6 = in6_ifawithscope(ifp, dst);
477                         if (ia6 == 0) {
478                                 *errorp = EADDRNOTAVAIL;
479                                 return(0);
480                         }
481                         return(&ia6->ia_addr.sin6_addr);
482                 }
483         }
484
485         /*
486          * If the next hop address for the packet is specified
487          * by caller, use an address associated with the route
488          * to the next hop.
489          */
490         {
491                 struct sockaddr_in6 *sin6_next;
492                 struct rtentry *rt;
493
494                 if (opts && opts->ip6po_nexthop) {
495                         sin6_next = satosin6(opts->ip6po_nexthop);
496                         rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
497                         if (rt) {
498                                 ia6 = in6_ifawithscope(rt->rt_ifp, dst);
499                                 if (ia6 == 0)
500                                         ia6 = ifatoia6(rt->rt_ifa);
501                         }
502                         if (ia6 == 0) {
503                                 *errorp = EADDRNOTAVAIL;
504                                 return(0);
505                         }
506                         return(&satosin6(&ia6->ia_addr)->sin6_addr);
507                 }
508         }
509
510         /*
511          * If route is known or can be allocated now,
512          * our src addr is taken from the i/f, else punt.
513          */
514         if (ro) {
515                 if (ro->ro_rt &&
516                     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) {
517                         RTFREE(ro->ro_rt);
518                         ro->ro_rt = NULL;
519                 }
520                 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL) {
521                         struct sockaddr_in6 *dst6;
522
523                         /* No route yet, so try to acquire one */
524                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
525                         dst6 = &ro->ro_dst;
526                         dst6->sin6_family = AF_INET6;
527                         dst6->sin6_len = sizeof(struct sockaddr_in6);
528                         dst6->sin6_addr = *dst;
529                         if (IN6_IS_ADDR_MULTICAST(dst)) {
530                                 ro->ro_rt =
531                                   rtpurelookup((struct sockaddr *)&ro->ro_dst);
532                         } else {
533                                 rtalloc((struct route *)ro);
534                         }
535                 }
536
537                 /*
538                  * in_pcbconnect() checks out IFF_LOOPBACK to skip using
539                  * the address. But we don't know why it does so.
540                  * It is necessary to ensure the scope even for lo0
541                  * so doesn't check out IFF_LOOPBACK.
542                  */
543
544                 if (ro->ro_rt) {
545                         ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
546                         if (ia6 == 0) /* xxx scope error ?*/
547                                 ia6 = ifatoia6(ro->ro_rt->rt_ifa);
548                 }
549                 if (ia6 == 0) {
550                         *errorp = EHOSTUNREACH; /* no route */
551                         return(0);
552                 }
553                 return(&satosin6(&ia6->ia_addr)->sin6_addr);
554         }
555
556         *errorp = EADDRNOTAVAIL;
557         return(0);
558 }
559
560 /*
561  * Default hop limit selection. The precedence is as follows:
562  * 1. Hoplimit valued specified via ioctl.
563  * 2. (If the outgoing interface is detected) the current
564  *     hop limit of the interface specified by router advertisement.
565  * 3. The system default hoplimit.
566 */
567 int
568 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
569 {
570         if (in6p && in6p->in6p_hops >= 0)
571                 return(in6p->in6p_hops);
572         else if (ifp)
573                 return(ND_IFINFO(ifp)->chlim);
574         else
575                 return(ip6_defhlim);
576 }
577 #endif
578
579 void
580 in6_pcbdisconnect(struct inpcb *inp)
581 {
582         bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
583         inp->inp_fport = 0;
584         /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
585         inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
586         in_pcbremconnhash(inp);
587         if (inp->inp_socket->so_state & SS_NOFDREF)
588                 in6_pcbdetach(inp);
589 }
590
591 void
592 in6_pcbdetach(struct inpcb *inp)
593 {
594         struct socket *so = inp->inp_socket;
595         struct inpcbinfo *ipi = inp->inp_pcbinfo;
596
597 #ifdef IPSEC
598         if (inp->in6p_sp != NULL)
599                 ipsec6_delete_pcbpolicy(inp);
600 #endif /* IPSEC */
601         inp->inp_gencnt = ++ipi->ipi_gencnt;
602         in_pcbremlists(inp);
603         so->so_pcb = NULL;
604         sofree(so);
605
606         if (inp->in6p_options)
607                 m_freem(inp->in6p_options);
608         ip6_freepcbopts(inp->in6p_outputopts);
609         ip6_freemoptions(inp->in6p_moptions);
610         if (inp->in6p_route.ro_rt)
611                 rtfree(inp->in6p_route.ro_rt);
612         /* Check and free IPv4 related resources in case of mapped addr */
613         if (inp->inp_options)
614                 m_free(inp->inp_options);
615         ip_freemoptions(inp->inp_moptions);
616
617         inp->inp_vflag = 0;
618         zfree(ipi->ipi_zone, inp);
619 }
620
621 /*
622  * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
623  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
624  * in struct pr_usrreqs, so that protocols can just reference then directly
625  * without the need for a wrapper function.  The socket must have a valid
626  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
627  * except through a kernel programming error, so it is acceptable to panic
628  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
629  * because there actually /is/ a programming error somewhere... XXX)
630  */
631 int
632 in6_setsockaddr(struct socket *so, struct sockaddr **nam)
633 {
634         struct inpcb *inp;
635         struct sockaddr_in6 *sin6;
636
637         /*
638          * Do the malloc first in case it blocks.
639          */
640         MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
641         bzero(sin6, sizeof *sin6);
642         sin6->sin6_family = AF_INET6;
643         sin6->sin6_len = sizeof(*sin6);
644
645         crit_enter();
646         inp = so->so_pcb;
647         if (!inp) {
648                 crit_exit();
649                 free(sin6, M_SONAME);
650                 return EINVAL;
651         }
652         sin6->sin6_port = inp->inp_lport;
653         sin6->sin6_addr = inp->in6p_laddr;
654         crit_exit();
655         if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
656                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
657         else
658                 sin6->sin6_scope_id = 0;        /*XXX*/
659         if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
660                 sin6->sin6_addr.s6_addr16[1] = 0;
661
662         *nam = (struct sockaddr *)sin6;
663         return 0;
664 }
665
666 int
667 in6_setpeeraddr(struct socket *so, struct sockaddr **nam)
668 {
669         struct inpcb *inp;
670         struct sockaddr_in6 *sin6;
671
672         /*
673          * Do the malloc first in case it blocks.
674          */
675         MALLOC(sin6, struct sockaddr_in6 *, sizeof(*sin6), M_SONAME, M_WAITOK);
676         bzero((caddr_t)sin6, sizeof (*sin6));
677         sin6->sin6_family = AF_INET6;
678         sin6->sin6_len = sizeof(struct sockaddr_in6);
679
680         crit_enter();
681         inp = so->so_pcb;
682         if (!inp) {
683                 crit_exit();
684                 free(sin6, M_SONAME);
685                 return EINVAL;
686         }
687         sin6->sin6_port = inp->inp_fport;
688         sin6->sin6_addr = inp->in6p_faddr;
689         crit_exit();
690         if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
691                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
692         else
693                 sin6->sin6_scope_id = 0;        /*XXX*/
694         if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
695                 sin6->sin6_addr.s6_addr16[1] = 0;
696
697         *nam = (struct sockaddr *)sin6;
698         return 0;
699 }
700
701 int
702 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
703 {
704         struct  inpcb *inp = so->so_pcb;
705         int     error;
706
707         if (inp == NULL)
708                 return EINVAL;
709         if (inp->inp_vflag & INP_IPV4) {
710                 error = in_setsockaddr(so, nam);
711                 if (error == 0)
712                         in6_sin_2_v4mapsin6_in_sock(nam);
713         } else
714         /* scope issues will be handled in in6_setsockaddr(). */
715         error = in6_setsockaddr(so, nam);
716
717         return error;
718 }
719
720 int
721 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
722 {
723         struct  inpcb *inp = so->so_pcb;
724         int     error;
725
726         if (inp == NULL)
727                 return EINVAL;
728         if (inp->inp_vflag & INP_IPV4) {
729                 error = in_setpeeraddr(so, nam);
730                 if (error == 0)
731                         in6_sin_2_v4mapsin6_in_sock(nam);
732         } else
733         /* scope issues will be handled in in6_setpeeraddr(). */
734         error = in6_setpeeraddr(so, nam);
735
736         return error;
737 }
738
739 /*
740  * Pass some notification to all connections of a protocol
741  * associated with address dst.  The local address and/or port numbers
742  * may be specified to limit the search.  The "usual action" will be
743  * taken, depending on the ctlinput cmd.  The caller must filter any
744  * cmds that are uninteresting (e.g., no error in the map).
745  * Call the protocol specific routine (if any) to report
746  * any errors for each matching socket.
747  *
748  * Must be called under crit_enter().
749  */
750 void
751 in6_pcbnotify(struct inpcbhead *head, struct sockaddr *dst, in_port_t fport,
752               const struct sockaddr *src, in_port_t lport, int cmd, int arg,
753               void (*notify) (struct inpcb *, int))
754 {
755         struct inpcb *inp, *ninp;
756         struct sockaddr_in6 sa6_src, *sa6_dst;
757         u_int32_t flowinfo;
758
759         if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
760                 return;
761
762         sa6_dst = (struct sockaddr_in6 *)dst;
763         if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
764                 return;
765
766         /*
767          * note that src can be NULL when we get notify by local fragmentation.
768          */
769         sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
770         flowinfo = sa6_src.sin6_flowinfo;
771
772         /*
773          * Redirects go to all references to the destination,
774          * and use in6_rtchange to invalidate the route cache.
775          * Dead host indications: also use in6_rtchange to invalidate
776          * the cache, and deliver the error to all the sockets.
777          * Otherwise, if we have knowledge of the local port and address,
778          * deliver only to that socket.
779          */
780         if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
781                 fport = 0;
782                 lport = 0;
783                 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
784
785                 if (cmd != PRC_HOSTDEAD)
786                         notify = in6_rtchange;
787         }
788         if (cmd != PRC_MSGSIZE)
789                 arg = inet6ctlerrmap[cmd];
790         crit_enter();
791         for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
792                 ninp = LIST_NEXT(inp, inp_list);
793
794                 if (inp->inp_flags & INP_PLACEMARKER)
795                         continue;
796
797                 if ((inp->inp_vflag & INP_IPV6) == 0)
798                         continue;
799
800                 /*
801                  * Detect if we should notify the error. If no source and
802                  * destination ports are specifed, but non-zero flowinfo and
803                  * local address match, notify the error. This is the case
804                  * when the error is delivered with an encrypted buffer
805                  * by ESP. Otherwise, just compare addresses and ports
806                  * as usual.
807                  */
808                 if (lport == 0 && fport == 0 && flowinfo &&
809                     inp->inp_socket != NULL &&
810                     flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) &&
811                     IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
812                         goto do_notify;
813                 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
814                                              &sa6_dst->sin6_addr) ||
815                          inp->inp_socket == 0 ||
816                          (lport && inp->inp_lport != lport) ||
817                          (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
818                           !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
819                                               &sa6_src.sin6_addr)) ||
820                          (fport && inp->inp_fport != fport))
821                         continue;
822
823 do_notify:
824                 if (notify)
825                         (*notify)(inp, arg);
826         }
827         crit_exit();
828 }
829
830 /*
831  * Lookup a PCB based on the local address and port.
832  */
833 struct inpcb *
834 in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
835                     u_int lport_arg, int wild_okay)
836 {
837         struct inpcb *inp;
838         int matchwild = 3, wildcard;
839         u_short lport = lport_arg;
840         struct inpcbporthead *porthash;
841         struct inpcbport *phd;
842         struct inpcb *match = NULL;
843
844         /*
845          * Best fit PCB lookup.
846          *
847          * First see if this local port is in use by looking on the
848          * port hash list.
849          */
850         porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
851             pcbinfo->porthashmask)];
852         LIST_FOREACH(phd, porthash, phd_hash) {
853                 if (phd->phd_port == lport)
854                         break;
855         }
856         if (phd != NULL) {
857                 /*
858                  * Port is in use by one or more PCBs. Look for best
859                  * fit.
860                  */
861                 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
862                         wildcard = 0;
863                         if ((inp->inp_vflag & INP_IPV6) == 0)
864                                 continue;
865                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
866                                 wildcard++;
867                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
868                                 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
869                                         wildcard++;
870                                 else if (!IN6_ARE_ADDR_EQUAL(
871                                         &inp->in6p_laddr, laddr))
872                                         continue;
873                         } else {
874                                 if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
875                                         wildcard++;
876                         }
877                         if (wildcard && !wild_okay)
878                                 continue;
879                         if (wildcard < matchwild) {
880                                 match = inp;
881                                 if (wildcard == 0)
882                                         break;
883                                 else
884                                         matchwild = wildcard;
885                         }
886                 }
887         }
888         return (match);
889 }
890
891 void
892 in6_pcbpurgeif0(struct in6pcb *head, struct ifnet *ifp)
893 {
894         struct in6pcb *in6p;
895         struct ip6_moptions *im6o;
896         struct in6_multi_mship *imm, *nimm;
897
898         for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
899                 if (in6p->in6p_flags & INP_PLACEMARKER)
900                         continue;
901                 im6o = in6p->in6p_moptions;
902                 if ((in6p->inp_vflag & INP_IPV6) &&
903                     im6o) {
904                         /*
905                          * Unselect the outgoing interface if it is being
906                          * detached.
907                          */
908                         if (im6o->im6o_multicast_ifp == ifp)
909                                 im6o->im6o_multicast_ifp = NULL;
910
911                         /*
912                          * Drop multicast group membership if we joined
913                          * through the interface being detached.
914                          * XXX controversial - is it really legal for kernel
915                          * to force this?
916                          */
917                         for (imm = im6o->im6o_memberships.lh_first;
918                              imm != NULL; imm = nimm) {
919                                 nimm = imm->i6mm_chain.le_next;
920                                 if (imm->i6mm_maddr->in6m_ifp == ifp) {
921                                         LIST_REMOVE(imm, i6mm_chain);
922                                         in6_delmulti(imm->i6mm_maddr);
923                                         free(imm, M_IPMADDR);
924                                 }
925                         }
926                 }
927         }
928 }
929
930 /*
931  * Check for alternatives when higher level complains
932  * about service problems.  For now, invalidate cached
933  * routing information.  If the route was created dynamically
934  * (by a redirect), time to try a default gateway again.
935  */
936 void
937 in6_losing(struct inpcb *in6p)
938 {
939         struct rtentry *rt;
940         struct rt_addrinfo info;
941
942         if ((rt = in6p->in6p_route.ro_rt) != NULL) {
943                 bzero((caddr_t)&info, sizeof(info));
944                 info.rti_flags = rt->rt_flags;
945                 info.rti_info[RTAX_DST] = rt_key(rt);
946                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
947                 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
948                 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
949                 if (rt->rt_flags & RTF_DYNAMIC)
950                         rtrequest1_global(RTM_DELETE, &info, NULL, NULL);
951                 in6p->in6p_route.ro_rt = NULL;
952                 rtfree(rt);
953                 /*
954                  * A new route can be allocated
955                  * the next time output is attempted.
956                  */
957         }
958 }
959
960 /*
961  * After a routing change, flush old routing
962  * and allocate a (hopefully) better one.
963  */
964 void
965 in6_rtchange(struct inpcb *inp, int errno)
966 {
967         if (inp->in6p_route.ro_rt) {
968                 rtfree(inp->in6p_route.ro_rt);
969                 inp->in6p_route.ro_rt = 0;
970                 /*
971                  * A new route can be allocated the next time
972                  * output is attempted.
973                  */
974         }
975 }
976
977 /*
978  * Lookup PCB in hash list.
979  */
980 struct inpcb *
981 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
982                    u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
983                    int wildcard, struct ifnet *ifp)
984 {
985         struct inpcbhead *head;
986         struct inpcb *inp;
987         u_short fport = fport_arg, lport = lport_arg;
988         int faith;
989
990         if (faithprefix_p != NULL)
991                 faith = (*faithprefix_p)(laddr);
992         else
993                 faith = 0;
994
995         /*
996          * First look for an exact match.
997          */
998         head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr->s6_addr32[3] /* XXX */,
999                                               fport,
1000                                               laddr->s6_addr32[3], /* XXX JH */
1001                                               lport,
1002                                               pcbinfo->hashmask)];
1003         LIST_FOREACH(inp, head, inp_hash) {
1004                 if ((inp->inp_vflag & INP_IPV6) == 0)
1005                         continue;
1006                 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
1007                     IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
1008                     inp->inp_fport == fport &&
1009                     inp->inp_lport == lport) {
1010                         /*
1011                          * Found.
1012                          */
1013                         return (inp);
1014                 }
1015         }
1016         if (wildcard) {
1017                 struct inpcontainerhead *chead;
1018                 struct inpcontainer *ic;
1019                 struct inpcb *local_wild = NULL;
1020
1021                 chead = &pcbinfo->wildcardhashbase[INP_PCBWILDCARDHASH(lport,
1022                     pcbinfo->wildcardhashmask)];
1023                 LIST_FOREACH(ic, chead, ic_list) {
1024                         inp = ic->ic_inp;
1025
1026                         if (!(inp->inp_vflag & INP_IPV6))
1027                                 continue;
1028                         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
1029                             inp->inp_lport == lport) {
1030                                 if (faith && (inp->inp_flags & INP_FAITH) == 0)
1031                                         continue;
1032                                 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
1033                                                        laddr))
1034                                         return (inp);
1035                                 else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
1036                                         local_wild = inp;
1037                         }
1038                 }
1039                 return (local_wild);
1040         }
1041
1042         /*
1043          * Not found.
1044          */
1045         return (NULL);
1046 }
1047
1048 void
1049 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
1050 {
1051         struct ip6_hdr *ip;
1052
1053         ip = mtod(m, struct ip6_hdr *);
1054         bzero(sin6, sizeof(*sin6));
1055         sin6->sin6_len = sizeof(*sin6);
1056         sin6->sin6_family = AF_INET6;
1057         sin6->sin6_addr = ip->ip6_src;
1058         if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
1059                 sin6->sin6_addr.s6_addr16[1] = 0;
1060         sin6->sin6_scope_id =
1061                 (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
1062                 ? m->m_pkthdr.rcvif->if_index : 0;
1063
1064         return;
1065 }