Merge from vendor branch LESS:
[dragonfly.git] / sys / netinet / in_pcb.c
1 /*
2  * Copyright (c) 2004 Jeffrey Hsu.  All rights reserved.
3  * Copyright (c) 1982, 1986, 1991, 1993, 1995
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by the University of
17  *      California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)in_pcb.c    8.4 (Berkeley) 5/24/95
35  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $
36  * $DragonFly: src/sys/netinet/in_pcb.c,v 1.18 2004/04/18 20:05:09 hsu Exp $
37  */
38
39 #include "opt_ipsec.h"
40 #include "opt_inet6.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/proc.h>
51 #include <sys/jail.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54
55 #include <machine/limits.h>
56
57 #include <vm/vm_zone.h>
58
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/route.h>
62
63 #include <netinet/in.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip_var.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #endif /* INET6 */
71
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netproto/key/key.h>
75 #endif
76
77 #ifdef FAST_IPSEC
78 #if defined(IPSEC) || defined(IPSEC_ESP)
79 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
80 #endif
81
82 #include <netipsec/ipsec.h>
83 #include <netipsec/key.h>
84 #define IPSEC
85 #endif /* FAST_IPSEC */
86
87 struct in_addr zeroin_addr;
88
89 /*
90  * These configure the range of local port addresses assigned to
91  * "unspecified" outgoing connections/packets/whatever.
92  */
93 int ipport_lowfirstauto = IPPORT_RESERVED - 1;  /* 1023 */
94 int ipport_lowlastauto = IPPORT_RESERVEDSTART;  /* 600 */
95
96 int ipport_firstauto = IPPORT_RESERVED;         /* 1024 */
97 int ipport_lastauto = IPPORT_USERRESERVED;      /* 5000 */
98
99 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;    /* 49152 */
100 int ipport_hilastauto = IPPORT_HILASTAUTO;      /* 65535 */
101
102 static __inline void
103 RANGECHK(int var, int min, int max)
104 {
105         if (var < min)
106                 var = min;
107         else if (var > max)
108                 var = max;
109 }
110
111 static int
112 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
113 {
114         int error;
115
116         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
117         if (!error) {
118                 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
119                 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
120
121                 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
122                 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
123
124                 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
125                 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
126         }
127         return (error);
128 }
129
130 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
131
132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
133            &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
135            &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
137            &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
139            &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
140 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
141            &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
142 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
143            &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
144
145 /*
146  * in_pcb.c: manage the Protocol Control Blocks.
147  *
148  * NOTE: It is assumed that most of these functions will be called at
149  * splnet(). XXX - There are, unfortunately, a few exceptions to this
150  * rule that should be fixed.
151  */
152
153 /*
154  * Allocate a PCB and associate it with the socket.
155  */
156 int
157 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
158 {
159         struct inpcb *inp;
160 #ifdef IPSEC
161         int error;
162 #endif
163
164         inp = zalloc(pcbinfo->ipi_zone);
165         if (inp == NULL)
166                 return (ENOBUFS);
167         bzero((caddr_t)inp, sizeof *inp);
168         inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
169         inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo;
170         inp->inp_socket = so;
171 #ifdef IPSEC
172         error = ipsec_init_policy(so, &inp->inp_sp);
173         if (error != 0) {
174                 zfree(pcbinfo->ipi_zone, inp);
175                 return (error);
176         }
177 #endif
178 #ifdef INET6
179         if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only)
180                 inp->inp_flags |= IN6P_IPV6_V6ONLY;
181         if (ip6_auto_flowlabel)
182                 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
183 #endif
184         so->so_pcb = (caddr_t)inp;
185         LIST_INSERT_HEAD(&pcbinfo->listhead, inp, inp_list);
186         pcbinfo->ipi_count++;
187         return (0);
188 }
189
190 int
191 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
192 {
193         struct socket *so = inp->inp_socket;
194         struct proc *p = td->td_proc;
195         unsigned short *lastport;
196         struct sockaddr_in *sin;
197         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
198         u_short lport = 0;
199         int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
200         int error, prison = 0;
201
202         KKASSERT(p);
203
204         if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
205                 return (EADDRNOTAVAIL);
206         if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
207                 return (EINVAL);        /* already bound */
208         if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
209                 wild = 1;    /* neither SO_REUSEADDR nor SO_REUSEPORT is set */
210         if (nam != NULL) {
211                 sin = (struct sockaddr_in *)nam;
212                 if (nam->sa_len != sizeof *sin)
213                         return (EINVAL);
214 #ifdef notdef
215                 /*
216                  * We should check the family, but old programs
217                  * incorrectly fail to initialize it.
218                  */
219                 if (sin->sin_family != AF_INET)
220                         return (EAFNOSUPPORT);
221 #endif
222                 if (sin->sin_addr.s_addr != INADDR_ANY &&
223                     prison_ip(td, 0, &sin->sin_addr.s_addr))
224                                 return (EINVAL);
225                 lport = sin->sin_port;
226                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
227                         /*
228                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
229                          * allow complete duplication of binding if
230                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
231                          * and a multicast address is bound on both
232                          * new and duplicated sockets.
233                          */
234                         if (so->so_options & SO_REUSEADDR)
235                                 reuseport = SO_REUSEADDR | SO_REUSEPORT;
236                 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
237                         sin->sin_port = 0;              /* yech... */
238                         bzero(&sin->sin_zero, sizeof sin->sin_zero);
239                         if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL)
240                                 return (EADDRNOTAVAIL);
241                 }
242                 if (lport != 0) {
243                         struct inpcb *t;
244
245                         /* GROSS */
246                         if (ntohs(lport) < IPPORT_RESERVED &&
247                             p && suser_cred(p->p_ucred, PRISON_ROOT))
248                                 return (EACCES);
249                         if (p && p->p_ucred->cr_prison)
250                                 prison = 1;
251                         if (so->so_cred->cr_uid != 0 &&
252                             !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
253                                 t = in_pcblookup_local(inp->inp_pcbinfo,
254                                     sin->sin_addr, lport,
255                                     prison ? 0 : INPLOOKUP_WILDCARD);
256                                 if (t &&
257                                     (!in_nullhost(sin->sin_addr) ||
258                                      !in_nullhost(t->inp_laddr) ||
259                                      (t->inp_socket->so_options &
260                                          SO_REUSEPORT) == 0) &&
261                                     (so->so_cred->cr_uid !=
262                                      t->inp_socket->so_cred->cr_uid)) {
263 #ifdef INET6
264                                         if (!in_nullhost(sin->sin_addr) ||
265                                             !in_nullhost(t->inp_laddr) ||
266                                             INP_SOCKAF(so) ==
267                                             INP_SOCKAF(t->inp_socket))
268 #endif
269                                         return (EADDRINUSE);
270                                 }
271                         }
272                         if (prison && prison_ip(td, 0, &sin->sin_addr.s_addr))
273                                 return (EADDRNOTAVAIL);
274                         t = in_pcblookup_local(pcbinfo, sin->sin_addr,
275                             lport, prison ? 0 : wild);
276                         if (t && !(reuseport & t->inp_socket->so_options)) {
277 #ifdef INET6
278                                 if (!in_nullhost(sin->sin_addr) ||
279                                     !in_nullhost(t->inp_laddr) ||
280                                     INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))
281 #endif
282                                 return (EADDRINUSE);
283                         }
284                 }
285                 inp->inp_laddr = sin->sin_addr;
286         }
287         if (lport == 0) {
288                 ushort first, last;
289                 int count;
290
291                 if (inp->inp_laddr.s_addr != INADDR_ANY &&
292                     prison_ip(td, 0, &inp->inp_laddr.s_addr )) {
293                         inp->inp_laddr.s_addr = INADDR_ANY;
294                         return (EINVAL);
295                 }
296                 inp->inp_flags |= INP_ANONPORT;
297
298                 if (inp->inp_flags & INP_HIGHPORT) {
299                         first = ipport_hifirstauto;     /* sysctl */
300                         last  = ipport_hilastauto;
301                         lastport = &pcbinfo->lasthi;
302                 } else if (inp->inp_flags & INP_LOWPORT) {
303                         if (p &&
304                             (error = suser_cred(p->p_ucred, PRISON_ROOT))) {
305                                 inp->inp_laddr.s_addr = INADDR_ANY;
306                                 return (error);
307                         }
308                         first = ipport_lowfirstauto;    /* 1023 */
309                         last  = ipport_lowlastauto;     /* 600 */
310                         lastport = &pcbinfo->lastlow;
311                 } else {
312                         first = ipport_firstauto;       /* sysctl */
313                         last  = ipport_lastauto;
314                         lastport = &pcbinfo->lastport;
315                 }
316                 /*
317                  * Simple check to ensure all ports are not used up causing
318                  * a deadlock here.
319                  *
320                  * We split the two cases (up and down) so that the direction
321                  * is not being tested on each round of the loop.
322                  */
323                 if (first > last) {
324                         /*
325                          * counting down
326                          */
327                         count = first - last;
328
329                         do {
330                                 if (count-- < 0) {      /* completely used? */
331                                         inp->inp_laddr.s_addr = INADDR_ANY;
332                                         return (EADDRNOTAVAIL);
333                                 }
334                                 --*lastport;
335                                 if (*lastport > first || *lastport < last)
336                                         *lastport = first;
337                                 lport = htons(*lastport);
338                         } while (in_pcblookup_local(pcbinfo,
339                                  inp->inp_laddr, lport, wild));
340                 } else {
341                         /*
342                          * counting up
343                          */
344                         count = last - first;
345
346                         do {
347                                 if (count-- < 0) {      /* completely used? */
348                                         inp->inp_laddr.s_addr = INADDR_ANY;
349                                         return (EADDRNOTAVAIL);
350                                 }
351                                 ++*lastport;
352                                 if (*lastport < first || *lastport > last)
353                                         *lastport = first;
354                                 lport = htons(*lastport);
355                         } while (in_pcblookup_local(pcbinfo,
356                                  inp->inp_laddr, lport, wild));
357                 }
358         }
359         inp->inp_lport = lport;
360         if (prison_ip(td, 0, &inp->inp_laddr.s_addr)) {
361                 inp->inp_laddr.s_addr = INADDR_ANY;
362                 inp->inp_lport = 0;
363                 return (EINVAL);
364         }
365         if (in_pcbinsporthash(inp) != 0) {
366                 inp->inp_laddr.s_addr = INADDR_ANY;
367                 inp->inp_lport = 0;
368                 return (EAGAIN);
369         }
370         return (0);
371 }
372
373 /*
374  *   Transform old in_pcbconnect() into an inner subroutine for new
375  *   in_pcbconnect(): Do some validity-checking on the remote
376  *   address (in mbuf 'nam') and then determine local host address
377  *   (i.e., which interface) to use to access that remote host.
378  *
379  *   This preserves definition of in_pcbconnect(), while supporting a
380  *   slightly different version for T/TCP.  (This is more than
381  *   a bit of a kludge, but cleaning up the internal interfaces would
382  *   have forced minor changes in every protocol).
383  */
384 int
385 in_pcbladdr(inp, nam, plocal_sin)
386         struct inpcb *inp;
387         struct sockaddr *nam;
388         struct sockaddr_in **plocal_sin;
389 {
390         struct in_ifaddr *ia;
391         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
392
393         if (nam->sa_len != sizeof *sin)
394                 return (EINVAL);
395         if (sin->sin_family != AF_INET)
396                 return (EAFNOSUPPORT);
397         if (sin->sin_port == 0)
398                 return (EADDRNOTAVAIL);
399         if (!TAILQ_EMPTY(&in_ifaddrhead)) {
400                 ia = TAILQ_FIRST(&in_ifaddrhead);
401                 /*
402                  * If the destination address is INADDR_ANY,
403                  * use the primary local address.
404                  * If the supplied address is INADDR_BROADCAST,
405                  * and the primary interface supports broadcast,
406                  * choose the broadcast address for that interface.
407                  */
408                 if (sin->sin_addr.s_addr == INADDR_ANY)
409                         sin->sin_addr = IA_SIN(ia)->sin_addr;
410                 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
411                     (ia->ia_ifp->if_flags & IFF_BROADCAST))
412                         sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
413         }
414         if (inp->inp_laddr.s_addr == INADDR_ANY) {
415                 struct route *ro;
416
417                 ia = (struct in_ifaddr *)NULL;
418                 /*
419                  * If route is known or can be allocated now,
420                  * our src addr is taken from the i/f, else punt.
421                  * Note that we should check the address family of the cached
422                  * destination, in case of sharing the cache with IPv6.
423                  */
424                 ro = &inp->inp_route;
425                 if (ro->ro_rt &&
426                     (!(ro->ro_rt->rt_flags & RTF_UP) ||
427                      ro->ro_dst.sa_family != AF_INET ||
428                      satosin(&ro->ro_dst)->sin_addr.s_addr !=
429                          sin->sin_addr.s_addr ||
430                      inp->inp_socket->so_options & SO_DONTROUTE)) {
431                         RTFREE(ro->ro_rt);
432                         ro->ro_rt = (struct rtentry *)NULL;
433                 }
434                 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/
435                     (ro->ro_rt == (struct rtentry *)NULL ||
436                     ro->ro_rt->rt_ifp == (struct ifnet *)NULL)) {
437                         /* No route yet, so try to acquire one */
438                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
439                         ro->ro_dst.sa_family = AF_INET;
440                         ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
441                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
442                                 sin->sin_addr;
443                         rtalloc(ro);
444                 }
445                 /*
446                  * If we found a route, use the address
447                  * corresponding to the outgoing interface
448                  * unless it is the loopback (in case a route
449                  * to our address on another net goes to loopback).
450                  */
451                 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
452                         ia = ifatoia(ro->ro_rt->rt_ifa);
453                 if (ia == NULL) {
454                         u_short fport = sin->sin_port;
455
456                         sin->sin_port = 0;
457                         ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
458                         if (ia == NULL)
459                                 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
460                         sin->sin_port = fport;
461                         if (ia == NULL)
462                                 ia = TAILQ_FIRST(&in_ifaddrhead);
463                         if (ia == NULL)
464                                 return (EADDRNOTAVAIL);
465                 }
466                 /*
467                  * If the destination address is multicast and an outgoing
468                  * interface has been set as a multicast option, use the
469                  * address of that interface as our source address.
470                  */
471                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
472                     inp->inp_moptions != NULL) {
473                         struct ip_moptions *imo;
474                         struct ifnet *ifp;
475
476                         imo = inp->inp_moptions;
477                         if (imo->imo_multicast_ifp != NULL) {
478                                 ifp = imo->imo_multicast_ifp;
479                                 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
480                                         if (ia->ia_ifp == ifp)
481                                                 break;
482                                 if (ia == NULL)
483                                         return (EADDRNOTAVAIL);
484                         }
485                 }
486                 /*
487                  * Don't do pcblookup call here; return interface in plocal_sin
488                  * and exit to caller, that will do the lookup.
489                  */
490                 *plocal_sin = &ia->ia_addr;
491
492         }
493         return (0);
494 }
495
496 /*
497  * Outer subroutine:
498  * Connect from a socket to a specified address.
499  * Both address and port must be specified in argument sin.
500  * If don't have a local address for this socket yet,
501  * then pick one.
502  */
503 int
504 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
505 {
506         struct sockaddr_in *if_sin;
507         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
508         struct sockaddr_in sa;
509         struct ucred *cr = td->td_proc ? td->td_proc->p_ucred : NULL;
510         int error;
511
512         if (cr && cr->cr_prison != NULL && in_nullhost(inp->inp_laddr)) {
513                 bzero(&sa, sizeof sa);
514                 sa.sin_addr.s_addr = htonl(cr->cr_prison->pr_ip);
515                 sa.sin_len = sizeof sa;
516                 sa.sin_family = AF_INET;
517                 error = in_pcbbind(inp, (struct sockaddr *)&sa, td);
518                 if (error)
519                         return (error);
520         }
521
522         /* Call inner routine to assign local interface address. */
523         if ((error = in_pcbladdr(inp, nam, &if_sin)) != 0)
524                 return (error);
525
526         if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port,
527             inp->inp_laddr.s_addr ? inp->inp_laddr : if_sin->sin_addr,
528             inp->inp_lport, FALSE, NULL) != NULL) {
529                 return (EADDRINUSE);
530         }
531         if (inp->inp_laddr.s_addr == INADDR_ANY) {
532                 if (inp->inp_lport == 0) {
533                         error = in_pcbbind(inp, (struct sockaddr *)NULL, td);
534                         if (error)
535                                 return (error);
536                 }
537                 inp->inp_laddr = if_sin->sin_addr;
538         }
539         inp->inp_faddr = sin->sin_addr;
540         inp->inp_fport = sin->sin_port;
541         in_pcbinsconnhash(inp);
542         return (0);
543 }
544
545 void
546 in_pcbdisconnect(inp)
547         struct inpcb *inp;
548 {
549
550         inp->inp_faddr.s_addr = INADDR_ANY;
551         inp->inp_fport = 0;
552         in_pcbremconnhash(inp);
553         if (inp->inp_socket->so_state & SS_NOFDREF)
554                 in_pcbdetach(inp);
555 }
556
557 void
558 in_pcbdetach(inp)
559         struct inpcb *inp;
560 {
561         struct socket *so = inp->inp_socket;
562         struct inpcbinfo *ipi = inp->inp_pcbinfo;
563
564 #ifdef IPSEC
565         ipsec4_delete_pcbpolicy(inp);
566 #endif /*IPSEC*/
567         inp->inp_gencnt = ++ipi->ipi_gencnt;
568         in_pcbremlists(inp);
569         so->so_pcb = 0;
570         sofree(so);
571         if (inp->inp_options)
572                 (void)m_free(inp->inp_options);
573         if (inp->inp_route.ro_rt)
574                 rtfree(inp->inp_route.ro_rt);
575         ip_freemoptions(inp->inp_moptions);
576         inp->inp_vflag = 0;
577         zfree(ipi->ipi_zone, inp);
578 }
579
580 /*
581  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
582  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
583  * in struct pr_usrreqs, so that protocols can just reference then directly
584  * without the need for a wrapper function.  The socket must have a valid
585  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
586  * except through a kernel programming error, so it is acceptable to panic
587  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
588  * because there actually /is/ a programming error somewhere... XXX)
589  */
590 int
591 in_setsockaddr(so, nam)
592         struct socket *so;
593         struct sockaddr **nam;
594 {
595         int s;
596         struct inpcb *inp;
597         struct sockaddr_in *sin;
598
599         /*
600          * Do the malloc first in case it blocks.
601          */
602         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
603                 M_WAITOK | M_ZERO);
604         sin->sin_family = AF_INET;
605         sin->sin_len = sizeof *sin;
606
607         s = splnet();
608         inp = sotoinpcb(so);
609         if (!inp) {
610                 splx(s);
611                 free(sin, M_SONAME);
612                 return (ECONNRESET);
613         }
614         sin->sin_port = inp->inp_lport;
615         sin->sin_addr = inp->inp_laddr;
616         splx(s);
617
618         *nam = (struct sockaddr *)sin;
619         return (0);
620 }
621
622 int
623 in_setpeeraddr(so, nam)
624         struct socket *so;
625         struct sockaddr **nam;
626 {
627         int s;
628         struct inpcb *inp;
629         struct sockaddr_in *sin;
630
631         /*
632          * Do the malloc first in case it blocks.
633          */
634         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
635                 M_WAITOK | M_ZERO);
636         sin->sin_family = AF_INET;
637         sin->sin_len = sizeof *sin;
638
639         s = splnet();
640         inp = sotoinpcb(so);
641         if (!inp) {
642                 splx(s);
643                 free(sin, M_SONAME);
644                 return (ECONNRESET);
645         }
646         sin->sin_port = inp->inp_fport;
647         sin->sin_addr = inp->inp_faddr;
648         splx(s);
649
650         *nam = (struct sockaddr *)sin;
651         return (0);
652 }
653
654 void
655 in_pcbnotifyall(head, faddr, errno, notify)
656         struct inpcbhead *head;
657         struct in_addr faddr;
658         void (*notify) (struct inpcb *, int);
659 {
660         struct inpcb *inp, *ninp;
661         int s;
662
663         s = splnet();
664         for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
665                 ninp = LIST_NEXT(inp, inp_list);
666 #ifdef INET6
667                 if (!(inp->inp_vflag & INP_IPV4))
668                         continue;
669 #endif
670                 if (inp->inp_faddr.s_addr != faddr.s_addr ||
671                     inp->inp_socket == NULL)
672                         continue;
673                 (*notify)(inp, errno);
674         }
675         splx(s);
676 }
677
678 void
679 in_pcbpurgeif0(head, ifp)
680         struct inpcb *head;
681         struct ifnet *ifp;
682 {
683         struct inpcb *inp;
684         struct ip_moptions *imo;
685         int i, gap;
686
687         for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
688                 imo = inp->inp_moptions;
689                 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) {
690                         /*
691                          * Unselect the outgoing interface if it is being
692                          * detached.
693                          */
694                         if (imo->imo_multicast_ifp == ifp)
695                                 imo->imo_multicast_ifp = NULL;
696
697                         /*
698                          * Drop multicast group membership if we joined
699                          * through the interface being detached.
700                          */
701                         for (i = 0, gap = 0; i < imo->imo_num_memberships;
702                             i++) {
703                                 if (imo->imo_membership[i]->inm_ifp == ifp) {
704                                         in_delmulti(imo->imo_membership[i]);
705                                         gap++;
706                                 } else if (gap != 0)
707                                         imo->imo_membership[i - gap] =
708                                             imo->imo_membership[i];
709                         }
710                         imo->imo_num_memberships -= gap;
711                 }
712         }
713 }
714
715 /*
716  * Check for alternatives when higher level complains
717  * about service problems.  For now, invalidate cached
718  * routing information.  If the route was created dynamically
719  * (by a redirect), time to try a default gateway again.
720  */
721 void
722 in_losing(inp)
723         struct inpcb *inp;
724 {
725         struct rtentry *rt;
726         struct rt_addrinfo info;
727
728         if ((rt = inp->inp_route.ro_rt)) {
729                 bzero((caddr_t)&info, sizeof info);
730                 info.rti_flags = rt->rt_flags;
731                 info.rti_info[RTAX_DST] = rt_key(rt);
732                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
733                 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
734                 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
735                 if (rt->rt_flags & RTF_DYNAMIC)
736                         (void) rtrequest1(RTM_DELETE, &info, NULL);
737                 inp->inp_route.ro_rt = NULL;
738                 rtfree(rt);
739                 /*
740                  * A new route can be allocated
741                  * the next time output is attempted.
742                  */
743         }
744 }
745
746 /*
747  * After a routing change, flush old routing
748  * and allocate a (hopefully) better one.
749  */
750 void
751 in_rtchange(inp, errno)
752         struct inpcb *inp;
753         int errno;
754 {
755         if (inp->inp_route.ro_rt) {
756                 rtfree(inp->inp_route.ro_rt);
757                 inp->inp_route.ro_rt = 0;
758                 /*
759                  * A new route can be allocated the next time
760                  * output is attempted.
761                  */
762         }
763 }
764
765 /*
766  * Lookup a PCB based on the local address and port.
767  */
768 struct inpcb *
769 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
770         struct inpcbinfo *pcbinfo;
771         struct in_addr laddr;
772         u_int lport_arg;
773         int wild_okay;
774 {
775         struct inpcb *inp;
776         int matchwild = 3, wildcard;
777         u_short lport = lport_arg;
778
779         struct inpcbporthead *porthash;
780         struct inpcbport *phd;
781         struct inpcb *match = NULL;
782
783         /*
784          * Best fit PCB lookup.
785          *
786          * First see if this local port is in use by looking on the
787          * port hash list.
788          */
789         porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
790             pcbinfo->porthashmask)];
791         LIST_FOREACH(phd, porthash, phd_hash) {
792                 if (phd->phd_port == lport)
793                         break;
794         }
795         if (phd != NULL) {
796                 /*
797                  * Port is in use by one or more PCBs. Look for best
798                  * fit.
799                  */
800                 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
801                         wildcard = 0;
802 #ifdef INET6
803                         if ((inp->inp_vflag & INP_IPV4) == 0)
804                                 continue;
805 #endif
806                         if (inp->inp_faddr.s_addr != INADDR_ANY)
807                                 wildcard++;
808                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
809                                 if (laddr.s_addr == INADDR_ANY)
810                                         wildcard++;
811                                 else if (inp->inp_laddr.s_addr != laddr.s_addr)
812                                         continue;
813                         } else {
814                                 if (laddr.s_addr != INADDR_ANY)
815                                         wildcard++;
816                         }
817                         if (wildcard && !wild_okay)
818                                 continue;
819                         if (wildcard < matchwild) {
820                                 match = inp;
821                                 matchwild = wildcard;
822                                 if (matchwild == 0) {
823                                         break;
824                                 }
825                         }
826                 }
827         }
828         return (match);
829 }
830
831 /*
832  * Lookup PCB in hash list.
833  */
834 struct inpcb *
835 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
836         struct inpcbinfo *pcbinfo;
837         struct in_addr faddr, laddr;
838         u_int fport_arg, lport_arg;
839         boolean_t wildcard;
840         struct ifnet *ifp;
841 {
842         struct inpcbhead *head;
843         struct inpcb *inp;
844         u_short fport = fport_arg, lport = lport_arg;
845
846         /*
847          * First look for an exact match.
848          */
849         head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport,
850             laddr.s_addr, lport, pcbinfo->hashmask)];
851         LIST_FOREACH(inp, head, inp_hash) {
852 #ifdef INET6
853                 if (!(inp->inp_vflag & INP_IPV4))
854                         continue;
855 #endif
856                 if (in_hosteq(inp->inp_faddr, faddr) &&
857                     in_hosteq(inp->inp_laddr, laddr) &&
858                     inp->inp_fport == fport && inp->inp_lport == lport) {
859                         /* found */
860                         return (inp);
861                 }
862         }
863
864         if (wildcard) {
865                 struct inpcb *local_wild = NULL;
866 #ifdef INET6
867                 struct inpcb *local_wild_mapped = NULL;
868 #endif
869                 struct inpcontainer *ic;
870                 struct inpcontainerhead *chead;
871
872                 chead = &pcbinfo->wildcardhashbase[
873                     INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)];
874                 LIST_FOREACH(ic, chead, ic_list) {
875                         inp = ic->ic_inp;
876 #ifdef INET6
877                         if (!(inp->inp_vflag & INP_IPV4))
878                                 continue;
879 #endif
880                         if (inp->inp_lport == lport) {
881                                 if (ifp && ifp->if_type == IFT_FAITH &&
882                                     !(inp->inp_flags & INP_FAITH))
883                                         continue;
884                                 if (inp->inp_laddr.s_addr == laddr.s_addr)
885                                         return (inp);
886                                 if (inp->inp_laddr.s_addr == INADDR_ANY) {
887 #ifdef INET6
888                                         if (INP_CHECK_SOCKAF(inp->inp_socket,
889                                                              AF_INET6))
890                                                 local_wild_mapped = inp;
891                                         else
892 #endif
893                                                 local_wild = inp;
894                                 }
895                         }
896                 }
897 #ifdef INET6
898                 if (local_wild == NULL)
899                         return (local_wild_mapped);
900 #endif
901                 return (local_wild);
902         }
903
904         /*
905          * Not found.
906          */
907         return (NULL);
908 }
909
910 /*
911  * Insert PCB into connection hash table.
912  */
913 void
914 in_pcbinsconnhash(struct inpcb *inp)
915 {
916         struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo;
917         struct inpcbhead *bucket;
918         u_int32_t hashkey_faddr, hashkey_laddr;
919
920 #ifdef INET6
921         if (inp->inp_vflag & INP_IPV6) {
922                 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */;
923                 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */;
924         } else {
925 #endif
926                 hashkey_faddr = inp->inp_faddr.s_addr;
927                 hashkey_laddr = inp->inp_laddr.s_addr;
928 #ifdef INET6
929         }
930 #endif
931
932         KASSERT(!(inp->inp_flags & (INP_WILDCARD | INP_CONNECTED)),
933             ("already on hash list"));
934         inp->inp_flags |= INP_CONNECTED;
935
936         /*
937          * Insert into the connection hash table.
938          */
939         bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr,
940             inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)];
941         LIST_INSERT_HEAD(bucket, inp, inp_hash);
942 }
943
944 /*
945  * Remove PCB from connection hash table.
946  */
947 void
948 in_pcbremconnhash(struct inpcb *inp)
949 {
950         KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected"));
951         LIST_REMOVE(inp, inp_hash);
952         inp->inp_flags &= ~INP_CONNECTED;
953 }
954
955 /*
956  * Insert PCB into port hash table.
957  */
958 int
959 in_pcbinsporthash(struct inpcb *inp)
960 {
961         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
962         struct inpcbporthead *pcbporthash;
963         struct inpcbport *phd;
964
965         /*
966          * Insert into the port hash table.
967          */
968         pcbporthash = &pcbinfo->porthashbase[
969             INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)];
970
971         /* Go through port list and look for a head for this lport. */
972         LIST_FOREACH(phd, pcbporthash, phd_hash)
973                 if (phd->phd_port == inp->inp_lport)
974                         break;
975
976         /* If none exists, malloc one and tack it on. */
977         if (phd == NULL) {
978                 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport),
979                     M_PCB, M_NOWAIT);
980                 if (phd == NULL)
981                         return (ENOBUFS); /* XXX */
982                 phd->phd_port = inp->inp_lport;
983                 LIST_INIT(&phd->phd_pcblist);
984                 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
985         }
986
987         inp->inp_phd = phd;
988         LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
989
990         return (0);
991 }
992
993 /*
994  * Insert PCB into wildcard hash table.
995  */
996 void
997 in_pcbinswildcardhash(struct inpcb *inp)
998 {
999         struct inpcontainer *ic;
1000         struct inpcontainerhead *bucket;
1001         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1002
1003         bucket = &pcbinfo->wildcardhashbase[
1004             INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)];
1005
1006         ic = malloc(sizeof(struct inpcontainer), M_TEMP, M_WAITOK);
1007         ic->ic_inp = inp;
1008         LIST_INSERT_HEAD(bucket, ic, ic_list);
1009
1010         inp->inp_flags |= INP_WILDCARD;
1011 }
1012
1013 /*
1014  * Remove PCB from wildcard hash table.
1015  */
1016 void
1017 in_pcbremwildcardhash(struct inpcb *inp)
1018 {
1019         struct inpcontainer *ic;
1020         struct inpcontainerhead *head;
1021         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1022
1023         KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard"));
1024
1025         /* find bucket */
1026         head = &pcbinfo->wildcardhashbase[
1027             INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)];
1028         LIST_FOREACH(ic, head, ic_list) {
1029                 if (ic->ic_inp == inp)
1030                         goto found;
1031         }
1032         return;                 /* not found! */
1033
1034 found:
1035         /* remove container from bucket chain */
1036         LIST_REMOVE(ic, ic_list);
1037
1038         /* deallocate container */
1039         free(ic, M_TEMP);
1040
1041         inp->inp_flags &= ~INP_WILDCARD;
1042 }
1043
1044 /*
1045  * Remove PCB from various lists.
1046  */
1047 void
1048 in_pcbremlists(inp)
1049         struct inpcb *inp;
1050 {
1051         if (inp->inp_lport) {
1052                 struct inpcbport *phd = inp->inp_phd;
1053
1054                 LIST_REMOVE(inp, inp_portlist);
1055                 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1056                         LIST_REMOVE(phd, phd_hash);
1057                         free(phd, M_PCB);
1058                 }
1059         }
1060         if (inp->inp_flags & INP_WILDCARD) {
1061                 in_pcbremwildcardhash(inp);
1062         } else if (inp->inp_flags & INP_CONNECTED) {
1063                 in_pcbremconnhash(inp);
1064         }
1065         LIST_REMOVE(inp, inp_list);
1066         inp->inp_pcbinfo->ipi_count--;
1067 }
1068
1069 int
1070 prison_xinpcb(struct thread *td, struct inpcb *inp)
1071 {
1072         struct ucred *cr;
1073
1074         if (td->td_proc == NULL)
1075                 return (0);
1076         cr = td->td_proc->p_ucred;
1077         if (cr->cr_prison == NULL)
1078                 return (0);
1079         if (ntohl(inp->inp_laddr.s_addr) == cr->cr_prison->pr_ip)
1080                 return (0);
1081         return (1);
1082 }