The cam_sim structure was being deallocated unconditionally by device
[dragonfly.git] / sys / netinet / in_pcb.c
1 /*
2  * Copyright (c) 2004 Jeffrey Hsu.  All rights reserved.
3  * Copyright (c) 1982, 1986, 1991, 1993, 1995
4  *      The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by the University of
17  *      California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *      @(#)in_pcb.c    8.4 (Berkeley) 5/24/95
35  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $
36  * $DragonFly: src/sys/netinet/in_pcb.c,v 1.14 2004/03/06 05:00:41 hsu Exp $
37  */
38
39 #include "opt_ipsec.h"
40 #include "opt_inet6.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/proc.h>
51 #include <sys/jail.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54
55 #include <machine/limits.h>
56
57 #include <vm/vm_zone.h>
58
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/route.h>
62
63 #include <netinet/in.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 #include <netinet/ip_var.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #endif /* INET6 */
71
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netproto/key/key.h>
75 #endif
76
77 #ifdef FAST_IPSEC
78 #if defined(IPSEC) || defined(IPSEC_ESP)
79 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
80 #endif
81
82 #include <netipsec/ipsec.h>
83 #include <netipsec/key.h>
84 #define IPSEC
85 #endif /* FAST_IPSEC */
86
87 struct in_addr zeroin_addr;
88
89 /*
90  * These configure the range of local port addresses assigned to
91  * "unspecified" outgoing connections/packets/whatever.
92  */
93 int ipport_lowfirstauto = IPPORT_RESERVED - 1;  /* 1023 */
94 int ipport_lowlastauto = IPPORT_RESERVEDSTART;  /* 600 */
95
96 int ipport_firstauto = IPPORT_RESERVED;         /* 1024 */
97 int ipport_lastauto = IPPORT_USERRESERVED;      /* 5000 */
98
99 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;    /* 49152 */
100 int ipport_hilastauto = IPPORT_HILASTAUTO;      /* 65535 */
101
102 static __inline void
103 RANGECHK(int var, int min, int max)
104 {
105         if (var < min)
106                 var = min;
107         else if (var > max)
108                 var = max;
109 }
110
111 static int
112 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
113 {
114         int error;
115
116         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
117         if (!error) {
118                 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
119                 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
120
121                 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
122                 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
123
124                 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
125                 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
126         }
127         return (error);
128 }
129
130 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
131
132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
133            &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
135            &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
137            &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
139            &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
140 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
141            &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
142 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
143            &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
144
145 /*
146  * in_pcb.c: manage the Protocol Control Blocks.
147  *
148  * NOTE: It is assumed that most of these functions will be called at
149  * splnet(). XXX - There are, unfortunately, a few exceptions to this
150  * rule that should be fixed.
151  */
152
153 /*
154  * Allocate a PCB and associate it with the socket.
155  */
156 int
157 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
158 {
159         struct inpcb *inp;
160 #ifdef IPSEC
161         int error;
162 #endif
163
164         inp = zalloc(pcbinfo->ipi_zone);
165         if (inp == NULL)
166                 return (ENOBUFS);
167         bzero((caddr_t)inp, sizeof *inp);
168         inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
169         inp->inp_pcbinfo = pcbinfo;
170         inp->inp_socket = so;
171 #ifdef IPSEC
172         error = ipsec_init_policy(so, &inp->inp_sp);
173         if (error != 0) {
174                 zfree(pcbinfo->ipi_zone, inp);
175                 return (error);
176         }
177 #endif
178 #ifdef INET6
179         if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only)
180                 inp->inp_flags |= IN6P_IPV6_V6ONLY;
181         if (ip6_auto_flowlabel)
182                 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
183 #endif
184         so->so_pcb = (caddr_t)inp;
185         LIST_INSERT_HEAD(&pcbinfo->listhead, inp, inp_list);
186         pcbinfo->ipi_count++;
187         return (0);
188 }
189
190 int
191 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
192 {
193         struct socket *so = inp->inp_socket;
194         struct proc *p = td->td_proc;
195         unsigned short *lastport;
196         struct sockaddr_in *sin;
197         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
198         u_short lport = 0;
199         int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
200         int error, prison = 0;
201
202         KKASSERT(p);
203
204         if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
205                 return (EADDRNOTAVAIL);
206         if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
207                 return (EINVAL);        /* already bound */
208         if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
209                 wild = 1;    /* neither SO_REUSEADDR nor SO_REUSEPORT is set */
210         if (nam != NULL) {
211                 sin = (struct sockaddr_in *)nam;
212                 if (nam->sa_len != sizeof *sin)
213                         return (EINVAL);
214 #ifdef notdef
215                 /*
216                  * We should check the family, but old programs
217                  * incorrectly fail to initialize it.
218                  */
219                 if (sin->sin_family != AF_INET)
220                         return (EAFNOSUPPORT);
221 #endif
222                 if (sin->sin_addr.s_addr != INADDR_ANY &&
223                     prison_ip(td, 0, &sin->sin_addr.s_addr))
224                                 return (EINVAL);
225                 lport = sin->sin_port;
226                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
227                         /*
228                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
229                          * allow complete duplication of binding if
230                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
231                          * and a multicast address is bound on both
232                          * new and duplicated sockets.
233                          */
234                         if (so->so_options & SO_REUSEADDR)
235                                 reuseport = SO_REUSEADDR | SO_REUSEPORT;
236                 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
237                         sin->sin_port = 0;              /* yech... */
238                         bzero(&sin->sin_zero, sizeof sin->sin_zero);
239                         if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL)
240                                 return (EADDRNOTAVAIL);
241                 }
242                 if (lport != 0) {
243                         struct inpcb *t;
244
245                         /* GROSS */
246                         if (ntohs(lport) < IPPORT_RESERVED &&
247                             p && suser_cred(p->p_ucred, PRISON_ROOT))
248                                 return (EACCES);
249                         if (p && p->p_ucred->cr_prison)
250                                 prison = 1;
251                         if (so->so_cred->cr_uid != 0 &&
252                             !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
253                                 t = in_pcblookup_local(inp->inp_pcbinfo,
254                                     sin->sin_addr, lport,
255                                     prison ? 0 : INPLOOKUP_WILDCARD);
256                                 if (t &&
257                                     (!in_nullhost(sin->sin_addr) ||
258                                      !in_nullhost(t->inp_laddr) ||
259                                      (t->inp_socket->so_options &
260                                          SO_REUSEPORT) == 0) &&
261                                     (so->so_cred->cr_uid !=
262                                      t->inp_socket->so_cred->cr_uid)) {
263 #ifdef INET6
264                                         if (!in_nullhost(sin->sin_addr) ||
265                                             !in_nullhost(t->inp_laddr) ||
266                                             INP_SOCKAF(so) ==
267                                             INP_SOCKAF(t->inp_socket))
268 #endif
269                                         return (EADDRINUSE);
270                                 }
271                         }
272                         if (prison && prison_ip(td, 0, &sin->sin_addr.s_addr))
273                                 return (EADDRNOTAVAIL);
274                         t = in_pcblookup_local(pcbinfo, sin->sin_addr,
275                             lport, prison ? 0 : wild);
276                         if (t && !(reuseport & t->inp_socket->so_options)) {
277 #ifdef INET6
278                                 if (!in_nullhost(sin->sin_addr) ||
279                                     !in_nullhost(t->inp_laddr) ||
280                                     INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))
281 #endif
282                                 return (EADDRINUSE);
283                         }
284                 }
285                 inp->inp_laddr = sin->sin_addr;
286         }
287         if (lport == 0) {
288                 ushort first, last;
289                 int count;
290
291                 if (inp->inp_laddr.s_addr != INADDR_ANY &&
292                     prison_ip(td, 0, &inp->inp_laddr.s_addr )) {
293                         inp->inp_laddr.s_addr = INADDR_ANY;
294                         return (EINVAL);
295                 }
296                 inp->inp_flags |= INP_ANONPORT;
297
298                 if (inp->inp_flags & INP_HIGHPORT) {
299                         first = ipport_hifirstauto;     /* sysctl */
300                         last  = ipport_hilastauto;
301                         lastport = &pcbinfo->lasthi;
302                 } else if (inp->inp_flags & INP_LOWPORT) {
303                         if (p &&
304                             (error = suser_cred(p->p_ucred, PRISON_ROOT))) {
305                                 inp->inp_laddr.s_addr = INADDR_ANY;
306                                 return (error);
307                         }
308                         first = ipport_lowfirstauto;    /* 1023 */
309                         last  = ipport_lowlastauto;     /* 600 */
310                         lastport = &pcbinfo->lastlow;
311                 } else {
312                         first = ipport_firstauto;       /* sysctl */
313                         last  = ipport_lastauto;
314                         lastport = &pcbinfo->lastport;
315                 }
316                 /*
317                  * Simple check to ensure all ports are not used up causing
318                  * a deadlock here.
319                  *
320                  * We split the two cases (up and down) so that the direction
321                  * is not being tested on each round of the loop.
322                  */
323                 if (first > last) {
324                         /*
325                          * counting down
326                          */
327                         count = first - last;
328
329                         do {
330                                 if (count-- < 0) {      /* completely used? */
331                                         inp->inp_laddr.s_addr = INADDR_ANY;
332                                         return (EADDRNOTAVAIL);
333                                 }
334                                 --*lastport;
335                                 if (*lastport > first || *lastport < last)
336                                         *lastport = first;
337                                 lport = htons(*lastport);
338                         } while (in_pcblookup_local(pcbinfo,
339                                  inp->inp_laddr, lport, wild));
340                 } else {
341                         /*
342                          * counting up
343                          */
344                         count = last - first;
345
346                         do {
347                                 if (count-- < 0) {      /* completely used? */
348                                         inp->inp_laddr.s_addr = INADDR_ANY;
349                                         return (EADDRNOTAVAIL);
350                                 }
351                                 ++*lastport;
352                                 if (*lastport < first || *lastport > last)
353                                         *lastport = first;
354                                 lport = htons(*lastport);
355                         } while (in_pcblookup_local(pcbinfo,
356                                  inp->inp_laddr, lport, wild));
357                 }
358         }
359         inp->inp_lport = lport;
360         if (prison_ip(td, 0, &inp->inp_laddr.s_addr)) {
361                 inp->inp_laddr.s_addr = INADDR_ANY;
362                 inp->inp_lport = 0;
363                 return (EINVAL);
364         }
365         if (in_pcbinsporthash(inp) != 0) {
366                 inp->inp_laddr.s_addr = INADDR_ANY;
367                 inp->inp_lport = 0;
368                 return (EAGAIN);
369         }
370         in_pcbinsbindhash(inp);
371         return (0);
372 }
373
374 /*
375  *   Transform old in_pcbconnect() into an inner subroutine for new
376  *   in_pcbconnect(): Do some validity-checking on the remote
377  *   address (in mbuf 'nam') and then determine local host address
378  *   (i.e., which interface) to use to access that remote host.
379  *
380  *   This preserves definition of in_pcbconnect(), while supporting a
381  *   slightly different version for T/TCP.  (This is more than
382  *   a bit of a kludge, but cleaning up the internal interfaces would
383  *   have forced minor changes in every protocol).
384  */
385
386 int
387 in_pcbladdr(inp, nam, plocal_sin)
388         struct inpcb *inp;
389         struct sockaddr *nam;
390         struct sockaddr_in **plocal_sin;
391 {
392         struct in_ifaddr *ia;
393         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
394
395         if (nam->sa_len != sizeof *sin)
396                 return (EINVAL);
397         if (sin->sin_family != AF_INET)
398                 return (EAFNOSUPPORT);
399         if (sin->sin_port == 0)
400                 return (EADDRNOTAVAIL);
401         if (!TAILQ_EMPTY(&in_ifaddrhead)) {
402                 ia = TAILQ_FIRST(&in_ifaddrhead);
403                 /*
404                  * If the destination address is INADDR_ANY,
405                  * use the primary local address.
406                  * If the supplied address is INADDR_BROADCAST,
407                  * and the primary interface supports broadcast,
408                  * choose the broadcast address for that interface.
409                  */
410                 if (sin->sin_addr.s_addr == INADDR_ANY)
411                         sin->sin_addr = IA_SIN(ia)->sin_addr;
412                 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
413                     (ia->ia_ifp->if_flags & IFF_BROADCAST))
414                         sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
415         }
416         if (inp->inp_laddr.s_addr == INADDR_ANY) {
417                 struct route *ro;
418
419                 ia = (struct in_ifaddr *)NULL;
420                 /*
421                  * If route is known or can be allocated now,
422                  * our src addr is taken from the i/f, else punt.
423                  * Note that we should check the address family of the cached
424                  * destination, in case of sharing the cache with IPv6.
425                  */
426                 ro = &inp->inp_route;
427                 if (ro->ro_rt &&
428                     (!(ro->ro_rt->rt_flags & RTF_UP) ||
429                      ro->ro_dst.sa_family != AF_INET ||
430                      satosin(&ro->ro_dst)->sin_addr.s_addr !=
431                          sin->sin_addr.s_addr ||
432                      inp->inp_socket->so_options & SO_DONTROUTE)) {
433                         RTFREE(ro->ro_rt);
434                         ro->ro_rt = (struct rtentry *)NULL;
435                 }
436                 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/
437                     (ro->ro_rt == (struct rtentry *)NULL ||
438                     ro->ro_rt->rt_ifp == (struct ifnet *)NULL)) {
439                         /* No route yet, so try to acquire one */
440                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
441                         ro->ro_dst.sa_family = AF_INET;
442                         ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
443                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
444                                 sin->sin_addr;
445                         rtalloc(ro);
446                 }
447                 /*
448                  * If we found a route, use the address
449                  * corresponding to the outgoing interface
450                  * unless it is the loopback (in case a route
451                  * to our address on another net goes to loopback).
452                  */
453                 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
454                         ia = ifatoia(ro->ro_rt->rt_ifa);
455                 if (ia == NULL) {
456                         u_short fport = sin->sin_port;
457
458                         sin->sin_port = 0;
459                         ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
460                         if (ia == NULL)
461                                 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
462                         sin->sin_port = fport;
463                         if (ia == NULL)
464                                 ia = TAILQ_FIRST(&in_ifaddrhead);
465                         if (ia == NULL)
466                                 return (EADDRNOTAVAIL);
467                 }
468                 /*
469                  * If the destination address is multicast and an outgoing
470                  * interface has been set as a multicast option, use the
471                  * address of that interface as our source address.
472                  */
473                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
474                     inp->inp_moptions != NULL) {
475                         struct ip_moptions *imo;
476                         struct ifnet *ifp;
477
478                         imo = inp->inp_moptions;
479                         if (imo->imo_multicast_ifp != NULL) {
480                                 ifp = imo->imo_multicast_ifp;
481                                 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
482                                         if (ia->ia_ifp == ifp)
483                                                 break;
484                                 if (ia == NULL)
485                                         return (EADDRNOTAVAIL);
486                         }
487                 }
488                 /*
489                  * Don't do pcblookup call here; return interface in plocal_sin
490                  * and exit to caller, that will do the lookup.
491                  */
492                 *plocal_sin = &ia->ia_addr;
493
494         }
495         return (0);
496 }
497
498 /*
499  * Outer subroutine:
500  * Connect from a socket to a specified address.
501  * Both address and port must be specified in argument sin.
502  * If don't have a local address for this socket yet,
503  * then pick one.
504  */
505 int
506 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
507 {
508         struct sockaddr_in *if_sin;
509         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
510         struct sockaddr_in sa;
511         struct ucred *cr = td->td_proc ? td->td_proc->p_ucred : NULL;
512         int error;
513
514         if (cr && cr->cr_prison != NULL && in_nullhost(inp->inp_laddr)) {
515                 bzero(&sa, sizeof sa);
516                 sa.sin_addr.s_addr = htonl(cr->cr_prison->pr_ip);
517                 sa.sin_len = sizeof sa;
518                 sa.sin_family = AF_INET;
519                 error = in_pcbbind(inp, (struct sockaddr *)&sa, td);
520                 if (error)
521                         return (error);
522         }
523
524         /* Call inner routine to assign local interface address. */
525         if ((error = in_pcbladdr(inp, nam, &if_sin)) != 0)
526                 return (error);
527
528         if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
529             inp->inp_laddr.s_addr ? inp->inp_laddr : if_sin->sin_addr,
530             inp->inp_lport, FALSE, NULL) != NULL) {
531                 return (EADDRINUSE);
532         }
533         if (inp->inp_laddr.s_addr == INADDR_ANY) {
534                 if (inp->inp_lport == 0) {
535                         error = in_pcbbind(inp, (struct sockaddr *)NULL, td);
536                         if (error)
537                                 return (error);
538                 }
539                 inp->inp_laddr = if_sin->sin_addr;
540         }
541         inp->inp_faddr = sin->sin_addr;
542         inp->inp_fport = sin->sin_port;
543         in_pcbrehash(inp, INP_CONNECTED);
544         return (0);
545 }
546
547 void
548 in_pcbdisconnect(inp)
549         struct inpcb *inp;
550 {
551
552         inp->inp_faddr.s_addr = INADDR_ANY;
553         inp->inp_fport = 0;
554         in_pcbremconnhash(inp);
555         if (inp->inp_socket->so_state & SS_NOFDREF)
556                 in_pcbdetach(inp);
557 }
558
559 void
560 in_pcbdetach(inp)
561         struct inpcb *inp;
562 {
563         struct socket *so = inp->inp_socket;
564         struct inpcbinfo *ipi = inp->inp_pcbinfo;
565
566 #ifdef IPSEC
567         ipsec4_delete_pcbpolicy(inp);
568 #endif /*IPSEC*/
569         inp->inp_gencnt = ++ipi->ipi_gencnt;
570         in_pcbremlists(inp);
571         so->so_pcb = 0;
572         sofree(so);
573         if (inp->inp_options)
574                 (void)m_free(inp->inp_options);
575         if (inp->inp_route.ro_rt)
576                 rtfree(inp->inp_route.ro_rt);
577         ip_freemoptions(inp->inp_moptions);
578         inp->inp_vflag = 0;
579         zfree(ipi->ipi_zone, inp);
580 }
581
582 /*
583  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
584  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
585  * in struct pr_usrreqs, so that protocols can just reference then directly
586  * without the need for a wrapper function.  The socket must have a valid
587  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
588  * except through a kernel programming error, so it is acceptable to panic
589  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
590  * because there actually /is/ a programming error somewhere... XXX)
591  */
592 int
593 in_setsockaddr(so, nam)
594         struct socket *so;
595         struct sockaddr **nam;
596 {
597         int s;
598         struct inpcb *inp;
599         struct sockaddr_in *sin;
600
601         /*
602          * Do the malloc first in case it blocks.
603          */
604         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
605                 M_WAITOK | M_ZERO);
606         sin->sin_family = AF_INET;
607         sin->sin_len = sizeof *sin;
608
609         s = splnet();
610         inp = sotoinpcb(so);
611         if (!inp) {
612                 splx(s);
613                 free(sin, M_SONAME);
614                 return (ECONNRESET);
615         }
616         sin->sin_port = inp->inp_lport;
617         sin->sin_addr = inp->inp_laddr;
618         splx(s);
619
620         *nam = (struct sockaddr *)sin;
621         return (0);
622 }
623
624 int
625 in_setpeeraddr(so, nam)
626         struct socket *so;
627         struct sockaddr **nam;
628 {
629         int s;
630         struct inpcb *inp;
631         struct sockaddr_in *sin;
632
633         /*
634          * Do the malloc first in case it blocks.
635          */
636         MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
637                 M_WAITOK | M_ZERO);
638         sin->sin_family = AF_INET;
639         sin->sin_len = sizeof *sin;
640
641         s = splnet();
642         inp = sotoinpcb(so);
643         if (!inp) {
644                 splx(s);
645                 free(sin, M_SONAME);
646                 return (ECONNRESET);
647         }
648         sin->sin_port = inp->inp_fport;
649         sin->sin_addr = inp->inp_faddr;
650         splx(s);
651
652         *nam = (struct sockaddr *)sin;
653         return (0);
654 }
655
656 void
657 in_pcbnotifyall(head, faddr, errno, notify)
658         struct inpcbhead *head;
659         struct in_addr faddr;
660         void (*notify) (struct inpcb *, int);
661 {
662         struct inpcb *inp, *ninp;
663         int s;
664
665         s = splnet();
666         for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
667                 ninp = LIST_NEXT(inp, inp_list);
668 #ifdef INET6
669                 if (!(inp->inp_vflag & INP_IPV4))
670                         continue;
671 #endif
672                 if (inp->inp_faddr.s_addr != faddr.s_addr ||
673                     inp->inp_socket == NULL)
674                         continue;
675                 (*notify)(inp, errno);
676         }
677         splx(s);
678 }
679
680 void
681 in_pcbpurgeif0(head, ifp)
682         struct inpcb *head;
683         struct ifnet *ifp;
684 {
685         struct inpcb *inp;
686         struct ip_moptions *imo;
687         int i, gap;
688
689         for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
690                 imo = inp->inp_moptions;
691                 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) {
692                         /*
693                          * Unselect the outgoing interface if it is being
694                          * detached.
695                          */
696                         if (imo->imo_multicast_ifp == ifp)
697                                 imo->imo_multicast_ifp = NULL;
698
699                         /*
700                          * Drop multicast group membership if we joined
701                          * through the interface being detached.
702                          */
703                         for (i = 0, gap = 0; i < imo->imo_num_memberships;
704                             i++) {
705                                 if (imo->imo_membership[i]->inm_ifp == ifp) {
706                                         in_delmulti(imo->imo_membership[i]);
707                                         gap++;
708                                 } else if (gap != 0)
709                                         imo->imo_membership[i - gap] =
710                                             imo->imo_membership[i];
711                         }
712                         imo->imo_num_memberships -= gap;
713                 }
714         }
715 }
716
717 /*
718  * Check for alternatives when higher level complains
719  * about service problems.  For now, invalidate cached
720  * routing information.  If the route was created dynamically
721  * (by a redirect), time to try a default gateway again.
722  */
723 void
724 in_losing(inp)
725         struct inpcb *inp;
726 {
727         struct rtentry *rt;
728         struct rt_addrinfo info;
729
730         if ((rt = inp->inp_route.ro_rt)) {
731                 bzero((caddr_t)&info, sizeof info);
732                 info.rti_flags = rt->rt_flags;
733                 info.rti_info[RTAX_DST] = rt_key(rt);
734                 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
735                 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
736                 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
737                 if (rt->rt_flags & RTF_DYNAMIC)
738                         (void) rtrequest1(RTM_DELETE, &info, NULL);
739                 inp->inp_route.ro_rt = NULL;
740                 rtfree(rt);
741                 /*
742                  * A new route can be allocated
743                  * the next time output is attempted.
744                  */
745         }
746 }
747
748 /*
749  * After a routing change, flush old routing
750  * and allocate a (hopefully) better one.
751  */
752 void
753 in_rtchange(inp, errno)
754         struct inpcb *inp;
755         int errno;
756 {
757         if (inp->inp_route.ro_rt) {
758                 rtfree(inp->inp_route.ro_rt);
759                 inp->inp_route.ro_rt = 0;
760                 /*
761                  * A new route can be allocated the next time
762                  * output is attempted.
763                  */
764         }
765 }
766
767 /*
768  * Lookup a PCB based on the local address and port.
769  */
770 struct inpcb *
771 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
772         struct inpcbinfo *pcbinfo;
773         struct in_addr laddr;
774         u_int lport_arg;
775         int wild_okay;
776 {
777         struct inpcb *inp;
778         int matchwild = 3, wildcard;
779         u_short lport = lport_arg;
780
781         struct inpcbporthead *porthash;
782         struct inpcbport *phd;
783         struct inpcb *match = NULL;
784
785         /*
786          * Best fit PCB lookup.
787          *
788          * First see if this local port is in use by looking on the
789          * port hash list.
790          */
791         porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
792             pcbinfo->porthashmask)];
793         LIST_FOREACH(phd, porthash, phd_hash) {
794                 if (phd->phd_port == lport)
795                         break;
796         }
797         if (phd != NULL) {
798                 /*
799                  * Port is in use by one or more PCBs. Look for best
800                  * fit.
801                  */
802                 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
803                         wildcard = 0;
804 #ifdef INET6
805                         if ((inp->inp_vflag & INP_IPV4) == 0)
806                                 continue;
807 #endif
808                         if (inp->inp_faddr.s_addr != INADDR_ANY)
809                                 wildcard++;
810                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
811                                 if (laddr.s_addr == INADDR_ANY)
812                                         wildcard++;
813                                 else if (inp->inp_laddr.s_addr != laddr.s_addr)
814                                         continue;
815                         } else {
816                                 if (laddr.s_addr != INADDR_ANY)
817                                         wildcard++;
818                         }
819                         if (wildcard && !wild_okay)
820                                 continue;
821                         if (wildcard < matchwild) {
822                                 match = inp;
823                                 matchwild = wildcard;
824                                 if (matchwild == 0) {
825                                         break;
826                                 }
827                         }
828                 }
829         }
830         return (match);
831 }
832
833 /*
834  * Lookup PCB in hash list.
835  */
836 struct inpcb *
837 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
838         struct inpcbinfo *pcbinfo;
839         struct in_addr faddr, laddr;
840         u_int fport_arg, lport_arg;
841         boolean_t wildcard;
842         struct ifnet *ifp;
843 {
844         struct inpcbhead *head;
845         struct inpcb *inp;
846         u_short fport = fport_arg, lport = lport_arg;
847
848         /*
849          * First look for an exact match.
850          */
851         head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport,
852             laddr.s_addr, lport, pcbinfo->hashmask)];
853         LIST_FOREACH(inp, head, inp_hash) {
854 #ifdef INET6
855                 if (!(inp->inp_vflag & INP_IPV4))
856                         continue;
857 #endif
858                 if (in_hosteq(inp->inp_faddr, faddr) &&
859                     in_hosteq(inp->inp_laddr, laddr) &&
860                     inp->inp_fport == fport && inp->inp_lport == lport) {
861                         /* found */
862                         return (inp);
863                 }
864         }
865
866         if (wildcard) {
867                 struct inpcb *local_wild = NULL;
868 #ifdef INET6
869                 struct inpcb *local_wild_mapped = NULL;
870 #endif
871
872                 head = &pcbinfo->bindhashbase[INP_PCBBINDHASH(lport,
873                     pcbinfo->bindhashmask)];
874                 LIST_FOREACH(inp, head, inp_hash) {
875 #ifdef INET6
876                         if (!(inp->inp_vflag & INP_IPV4))
877                                 continue;
878 #endif
879                         if (inp->inp_lport == lport) {
880                                 if (ifp && ifp->if_type == IFT_FAITH &&
881                                     !(inp->inp_flags & INP_FAITH))
882                                         continue;
883                                 if (inp->inp_laddr.s_addr == laddr.s_addr)
884                                         return (inp);
885                                 if (inp->inp_laddr.s_addr == INADDR_ANY) {
886 #ifdef INET6
887                                         if (INP_CHECK_SOCKAF(inp->inp_socket,
888                                                              AF_INET6))
889                                                 local_wild_mapped = inp;
890                                         else
891 #endif
892                                                 local_wild = inp;
893                                 }
894                         }
895                 }
896 #ifdef INET6
897                 if (local_wild == NULL)
898                         return (local_wild_mapped);
899 #endif
900                 return (local_wild);
901         }
902
903         /*
904          * Not found.
905          */
906         return (NULL);
907 }
908
909 /*
910  * Insert PCB into connection hash table.
911  */
912 void
913 in_pcbinsconnhash(struct inpcb *inp)
914 {
915         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
916         struct inpcbhead *bucket;
917         u_int32_t hashkey_faddr, hashkey_laddr;
918
919 #ifdef INET6
920         if (inp->inp_vflag & INP_IPV6) {
921                 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */;
922                 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */;
923         } else {
924 #endif
925                 hashkey_faddr = inp->inp_faddr.s_addr;
926                 hashkey_laddr = inp->inp_laddr.s_addr;
927 #ifdef INET6
928         }
929 #endif
930
931         inp->inp_flags |= INP_CONNECTED;
932
933         /*
934          * Insert into the connection hash table.
935          */
936         bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr,
937             inp->inp_fport, hashkey_laddr,
938             inp->inp_lport, pcbinfo->hashmask)];
939         LIST_INSERT_HEAD(bucket, inp, inp_hash);
940 }
941
942 /*
943  * Remove PCB from connection hash table.
944  */
945 void
946 in_pcbremconnhash(struct inpcb *inp)
947 {
948         KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected"));
949         LIST_REMOVE(inp, inp_hash);
950         inp->inp_flags &= ~INP_CONNECTED;
951 }
952
953 /*
954  * Insert PCB into port hash table.
955  */
956 int
957 in_pcbinsporthash(struct inpcb *inp)
958 {
959         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
960         struct inpcbporthead *pcbporthash;
961         struct inpcbport *phd;
962
963         /*
964          * Insert into the port hash table.
965          */
966         pcbporthash = &pcbinfo->porthashbase[
967             INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)];
968
969         /* Go through port list and look for a head for this lport. */
970         LIST_FOREACH(phd, pcbporthash, phd_hash)
971                 if (phd->phd_port == inp->inp_lport)
972                         break;
973
974         /* If none exists, malloc one and tack it on. */
975         if (phd == NULL) {
976                 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport),
977                     M_PCB, M_NOWAIT);
978                 if (phd == NULL)
979                         return (ENOBUFS); /* XXX */
980                 phd->phd_port = inp->inp_lport;
981                 LIST_INIT(&phd->phd_pcblist);
982                 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
983         }
984
985         inp->inp_phd = phd;
986         LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
987
988         return (0);
989 }
990
991 /*
992  * Insert PCB into bind hash table.
993  */
994 void
995 in_pcbinsbindhash(struct inpcb *inp)
996 {
997         struct inpcbhead *bucket;
998
999         bucket = &inp->inp_pcbinfo->bindhashbase[INP_PCBBINDHASH(inp->inp_lport,
1000             inp->inp_pcbinfo->porthashmask)];
1001
1002         inp->inp_flags |= INP_BOUND;
1003         LIST_INSERT_HEAD(bucket, inp, inp_hash);
1004 }
1005
1006 /*
1007  * Remove PCB from bind hash table.
1008  */
1009 void
1010 in_pcbrembindhash(struct inpcb *inp)
1011 {
1012         KASSERT(inp->inp_flags & INP_BOUND, ("inp not bound"));
1013         LIST_REMOVE(inp, inp_hash);
1014         inp->inp_flags &= ~INP_BOUND;
1015 }
1016
1017 static void
1018 in_pcbremhash(struct inpcb *inp)
1019 {
1020         if (inp->inp_flags & (INP_BOUND | INP_CONNECTED)) {
1021                 LIST_REMOVE(inp, inp_hash);
1022                 inp->inp_flags &= ~(INP_BOUND | INP_CONNECTED);
1023         }
1024 }
1025
1026 void
1027 in_pcbrehash(struct inpcb *inp, int state)
1028 {
1029         in_pcbremhash(inp);
1030         if (state == INP_BOUND)
1031                 in_pcbinsbindhash(inp);
1032         else
1033                 in_pcbinsconnhash(inp);
1034 }
1035
1036 /*
1037  * Remove PCB from various lists.
1038  */
1039 void
1040 in_pcbremlists(inp)
1041         struct inpcb *inp;
1042 {
1043         if (inp->inp_lport) {
1044                 struct inpcbport *phd = inp->inp_phd;
1045
1046                 LIST_REMOVE(inp, inp_portlist);
1047                 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1048                         LIST_REMOVE(phd, phd_hash);
1049                         free(phd, M_PCB);
1050                 }
1051         }
1052         in_pcbremhash(inp);
1053         LIST_REMOVE(inp, inp_list);
1054         inp->inp_pcbinfo->ipi_count--;
1055 }
1056
1057 int
1058 prison_xinpcb(struct thread *td, struct inpcb *inp)
1059 {
1060         struct ucred *cr;
1061
1062         if (td->td_proc == NULL)
1063                 return (0);
1064         cr = td->td_proc->p_ucred;
1065         if (cr->cr_prison == NULL)
1066                 return (0);
1067         if (ntohl(inp->inp_laddr.s_addr) == cr->cr_prison->pr_ip)
1068                 return (0);
1069         return (1);
1070 }