Correct BSD License clause numbering from 1-2-4 to 1-2-3.
[dragonfly.git] / sys / netinet / in_pcb.c
1 /*
2  * Copyright (c) 2004 Jeffrey M. Hsu.  All rights reserved.
3  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Jeffrey M. Hsu.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of The DragonFly Project nor the names of its
17  *    contributors may be used to endorse or promote products derived
18  *    from this software without specific, prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 /*
35  * Copyright (c) 1982, 1986, 1991, 1993, 1995
36  *      The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *      @(#)in_pcb.c    8.4 (Berkeley) 5/24/95
63  * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $
64  */
65
66 #include "opt_ipsec.h"
67 #include "opt_inet6.h"
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/proc.h>
78 #include <sys/priv.h>
79 #include <sys/jail.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82
83 #include <sys/thread2.h>
84 #include <sys/socketvar2.h>
85 #include <sys/msgport2.h>
86
87 #include <machine/limits.h>
88
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_pcb.h>
95 #include <netinet/in_var.h>
96 #include <netinet/ip_var.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #include <netinet6/ip6_var.h>
100 #endif /* INET6 */
101
102 #ifdef IPSEC
103 #include <netinet6/ipsec.h>
104 #include <netproto/key/key.h>
105 #include <netproto/ipsec/esp_var.h>
106 #endif
107
108 #ifdef FAST_IPSEC
109 #if defined(IPSEC) || defined(IPSEC_ESP)
110 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
111 #endif
112
113 #include <netproto/ipsec/ipsec.h>
114 #include <netproto/ipsec/key.h>
115 #define IPSEC
116 #endif /* FAST_IPSEC */
117
118 #define INP_LOCALGROUP_SIZMIN   8
119 #define INP_LOCALGROUP_SIZMAX   256
120
121 struct in_addr zeroin_addr;
122
123 /*
124  * These configure the range of local port addresses assigned to
125  * "unspecified" outgoing connections/packets/whatever.
126  */
127 int ipport_lowfirstauto = IPPORT_RESERVED - 1;  /* 1023 */
128 int ipport_lowlastauto = IPPORT_RESERVEDSTART;  /* 600 */
129
130 int ipport_firstauto = IPPORT_RESERVED;         /* 1024 */
131 int ipport_lastauto = IPPORT_USERRESERVED;      /* 5000 */
132
133 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;    /* 49152 */
134 int ipport_hilastauto = IPPORT_HILASTAUTO;      /* 65535 */
135
136 #define RANGECHK(var, min, max) \
137         if ((var) < (min)) { (var) = (min); } \
138         else if ((var) > (max)) { (var) = (max); }
139
140 int udpencap_enable = 1;        /* enabled by default */
141 int udpencap_port = 4500;       /* triggers decapsulation */
142
143 static int
144 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
145 {
146         int error;
147
148         error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
149         if (!error) {
150                 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
151                 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
152
153                 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
154                 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
155
156                 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
157                 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
158         }
159         return (error);
160 }
161
162 #undef RANGECHK
163
164 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
165
166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
167            &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
169            &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
171            &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
173            &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
175            &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
177            &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
178
179 /*
180  * in_pcb.c: manage the Protocol Control Blocks.
181  *
182  * NOTE: It is assumed that most of these functions will be called from
183  * a critical section.  XXX - There are, unfortunately, a few exceptions
184  * to this rule that should be fixed.
185  *
186  * NOTE: The caller should initialize the cpu field to the cpu running the
187  * protocol stack associated with this inpcbinfo.
188  */
189
190 void
191 in_pcbinfo_init(struct inpcbinfo *pcbinfo)
192 {
193         LIST_INIT(&pcbinfo->pcblisthead);
194         pcbinfo->cpu = -1;
195         pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB,
196                                     M_WAITOK | M_ZERO);
197 }
198
199 struct baddynamicports baddynamicports;
200
201 /*
202  * Check if the specified port is invalid for dynamic allocation.
203  */
204 int
205 in_baddynamic(u_int16_t port, u_int16_t proto)
206 {
207         switch (proto) {
208         case IPPROTO_TCP:
209                 return (DP_ISSET(baddynamicports.tcp, port));
210         case IPPROTO_UDP:
211 #ifdef IPSEC
212                 /* Cannot preset this as it is a sysctl */
213                 if (port == udpencap_port)
214                         return (1);
215 #endif
216                 return (DP_ISSET(baddynamicports.udp, port));
217         default:
218                 return (0);
219         }
220 }
221
222
223 /*
224  * Allocate a PCB and associate it with the socket.
225  */
226 int
227 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
228 {
229         struct inpcb *inp;
230 #ifdef IPSEC
231         int error;
232 #endif
233
234         inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO);
235         inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
236         inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo;
237         inp->inp_socket = so;
238 #ifdef IPSEC
239         error = ipsec_init_policy(so, &inp->inp_sp);
240         if (error != 0) {
241                 kfree(inp, M_PCB);
242                 return (error);
243         }
244 #endif
245 #ifdef INET6
246         if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only)
247                 inp->inp_flags |= IN6P_IPV6_V6ONLY;
248         if (ip6_auto_flowlabel)
249                 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
250 #endif
251         soreference(so);
252         so->so_pcb = inp;
253         LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list);
254         pcbinfo->ipi_count++;
255         return (0);
256 }
257
258 /*
259  * Unlink a pcb with the intention of moving it to another cpu with a
260  * different pcbinfo.  While unlinked nothing should attempt to dereference
261  * inp_pcbinfo, NULL it out so we assert if it does.
262  */
263 void
264 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo)
265 {
266         KKASSERT(inp->inp_pcbinfo == pcbinfo);
267
268         LIST_REMOVE(inp, inp_list);
269         pcbinfo->ipi_count--;
270         inp->inp_pcbinfo = NULL;
271 }
272
273 /*
274  * Relink a pcb into a new pcbinfo.
275  */
276 void
277 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo)
278 {
279         KKASSERT(inp->inp_pcbinfo == NULL);
280         inp->inp_pcbinfo = pcbinfo;
281         LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list);
282         pcbinfo->ipi_count++;
283 }
284
285 int
286 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
287 {
288         struct socket *so = inp->inp_socket;
289         unsigned short *lastport;
290         struct sockaddr_in *sin;
291         struct sockaddr_in jsin;
292         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
293         struct ucred *cred = NULL;
294         u_short lport = 0;
295         int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
296         int error;
297
298         if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */
299                 return (EADDRNOTAVAIL);
300         if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
301                 return (EINVAL);        /* already bound */
302
303         if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
304                 wild = 1;    /* neither SO_REUSEADDR nor SO_REUSEPORT is set */
305         if (td->td_proc)
306                 cred = td->td_proc->p_ucred;
307
308         /*
309          * This has to be atomic.  If the porthash is shared across multiple
310          * protocol threads (aka tcp) then the token will be non-NULL.
311          */
312         if (pcbinfo->porttoken)
313                 lwkt_gettoken(pcbinfo->porttoken);
314
315         if (nam != NULL) {
316                 sin = (struct sockaddr_in *)nam;
317                 if (nam->sa_len != sizeof *sin) {
318                         error = EINVAL;
319                         goto done;
320                 }
321 #ifdef notdef
322                 /*
323                  * We should check the family, but old programs
324                  * incorrectly fail to initialize it.
325                  */
326                 if (sin->sin_family != AF_INET) {
327                         error = EAFNOSUPPORT;
328                         goto done;
329                 }
330 #endif
331                 if (!prison_replace_wildcards(td, nam)) {
332                         error = EINVAL;
333                         goto done;
334                 }
335                 lport = sin->sin_port;
336                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
337                         /*
338                          * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
339                          * allow complete duplication of binding if
340                          * SO_REUSEPORT is set, or if SO_REUSEADDR is set
341                          * and a multicast address is bound on both
342                          * new and duplicated sockets.
343                          */
344                         if (so->so_options & SO_REUSEADDR)
345                                 reuseport = SO_REUSEADDR | SO_REUSEPORT;
346                 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
347                         sin->sin_port = 0;              /* yech... */
348                         bzero(&sin->sin_zero, sizeof sin->sin_zero);
349                         if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) {
350                                 error = EADDRNOTAVAIL;
351                                 goto done;
352                         }
353                 }
354                 if (lport != 0) {
355                         struct inpcb *t;
356
357                         /* GROSS */
358                         if (ntohs(lport) < IPPORT_RESERVED &&
359                             cred &&
360                             priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) {
361                                 error = EACCES;
362                                 goto done;
363                         }
364                         if (so->so_cred->cr_uid != 0 &&
365                             !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
366                                 t = in_pcblookup_local(pcbinfo,
367                                                        sin->sin_addr,
368                                                        lport,
369                                                        INPLOOKUP_WILDCARD,
370                                                        cred);
371                                 if (t &&
372                                     (!in_nullhost(sin->sin_addr) ||
373                                      !in_nullhost(t->inp_laddr) ||
374                                      (t->inp_socket->so_options &
375                                          SO_REUSEPORT) == 0) &&
376                                     (so->so_cred->cr_uid !=
377                                      t->inp_socket->so_cred->cr_uid)) {
378 #ifdef INET6
379                                         if (!in_nullhost(sin->sin_addr) ||
380                                             !in_nullhost(t->inp_laddr) ||
381                                             INP_SOCKAF(so) ==
382                                             INP_SOCKAF(t->inp_socket))
383 #endif
384                                         {
385                                                 error = EADDRINUSE;
386                                                 goto done;
387                                         }
388                                 }
389                         }
390                         if (cred && !prison_replace_wildcards(td, nam)) {
391                                 error = EADDRNOTAVAIL;
392                                 goto done;
393                         }
394                         t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport,
395                                                wild, cred);
396                         if (t && !(reuseport & t->inp_socket->so_options)) {
397 #ifdef INET6
398                                 if (!in_nullhost(sin->sin_addr) ||
399                                     !in_nullhost(t->inp_laddr) ||
400                                     INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket))
401 #endif
402                                 {
403                                         error = EADDRINUSE;
404                                         goto done;
405                                 }
406                         }
407                 }
408                 inp->inp_laddr = sin->sin_addr;
409         }
410         if (lport == 0) {
411                 ushort first, last;
412                 int count;
413
414                 jsin.sin_family = AF_INET;
415                 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr;
416                 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) {
417                         inp->inp_laddr.s_addr = INADDR_ANY;
418                         error = EINVAL;
419                         goto done;
420                 }
421                 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr;
422
423                 inp->inp_flags |= INP_ANONPORT;
424
425                 if (inp->inp_flags & INP_HIGHPORT) {
426                         first = ipport_hifirstauto;     /* sysctl */
427                         last  = ipport_hilastauto;
428                         lastport = &pcbinfo->lasthi;
429                 } else if (inp->inp_flags & INP_LOWPORT) {
430                         if (cred &&
431                             (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) {
432                                 inp->inp_laddr.s_addr = INADDR_ANY;
433                                 goto done;
434                         }
435                         first = ipport_lowfirstauto;    /* 1023 */
436                         last  = ipport_lowlastauto;     /* 600 */
437                         lastport = &pcbinfo->lastlow;
438                 } else {
439                         first = ipport_firstauto;       /* sysctl */
440                         last  = ipport_lastauto;
441                         lastport = &pcbinfo->lastport;
442                 }
443                 /*
444                  * Simple check to ensure all ports are not used up causing
445                  * a deadlock here.
446                  *
447                  * We split the two cases (up and down) so that the direction
448                  * is not being tested on each round of the loop.
449                  */
450                 if (first > last) {
451                         /*
452                          * counting down
453                          */
454                         count = first - last;
455
456                         do {
457                                 if (count-- < 0) {      /* completely used? */
458                                         inp->inp_laddr.s_addr = INADDR_ANY;
459                                         error = EADDRNOTAVAIL;
460                                         goto done;
461                                 }
462                                 --*lastport;
463                                 if (*lastport > first || *lastport < last)
464                                         *lastport = first;
465                                 lport = htons(*lastport);
466                         } while (in_pcblookup_local(pcbinfo, inp->inp_laddr,
467                                                     lport, wild, cred));
468                 } else {
469                         /*
470                          * counting up
471                          */
472                         count = last - first;
473
474                         do {
475                                 if (count-- < 0) {      /* completely used? */
476                                         inp->inp_laddr.s_addr = INADDR_ANY;
477                                         error = EADDRNOTAVAIL;
478                                         goto done;
479                                 }
480                                 ++*lastport;
481                                 if (*lastport < first || *lastport > last)
482                                         *lastport = first;
483                                 lport = htons(*lastport);
484                         } while (in_pcblookup_local(pcbinfo, inp->inp_laddr,
485                                                     lport, wild, cred));
486                 }
487         }
488         inp->inp_lport = lport;
489
490         jsin.sin_family = AF_INET;
491         jsin.sin_addr.s_addr = inp->inp_laddr.s_addr;
492         if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) {
493                 inp->inp_laddr.s_addr = INADDR_ANY;
494                 inp->inp_lport = 0;
495                 error = EINVAL;
496                 goto done;
497         }
498         inp->inp_laddr.s_addr = jsin.sin_addr.s_addr;
499
500         if (in_pcbinsporthash(inp) != 0) {
501                 inp->inp_laddr.s_addr = INADDR_ANY;
502                 inp->inp_lport = 0;
503                 error = EAGAIN;
504                 goto done;
505         }
506         error = 0;
507 done:
508         if (pcbinfo->porttoken)
509                 lwkt_reltoken(pcbinfo->porttoken);
510         return error;
511 }
512
513 static struct inpcb *
514 in_pcblookup_addrport(struct inpcbinfo *pcbinfo, struct in_addr laddr,
515     u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred)
516 {
517         struct inpcb *inp;
518         struct inpcbporthead *porthash;
519         struct inpcbport *phd;
520         struct inpcb *match = NULL;
521
522         /*
523          * If the porthashbase is shared across several cpus we need
524          * to lock.
525          */
526         if (pcbinfo->porttoken)
527                 lwkt_gettoken(pcbinfo->porttoken);
528
529         /*
530          * Best fit PCB lookup.
531          *
532          * First see if this local port is in use by looking on the
533          * port hash list.
534          */
535         porthash = &pcbinfo->porthashbase[
536                         INP_PCBPORTHASH(lport, pcbinfo->porthashmask)];
537         LIST_FOREACH(phd, porthash, phd_hash) {
538                 if (phd->phd_port == lport)
539                         break;
540         }
541         if (phd != NULL) {
542                 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
543 #ifdef INET6
544                         if ((inp->inp_vflag & INP_IPV4) == 0)
545                                 continue;
546 #endif
547                         if (inp->inp_laddr.s_addr != INADDR_ANY &&
548                             inp->inp_laddr.s_addr != laddr.s_addr)
549                                 continue;
550
551                         if (inp->inp_faddr.s_addr != INADDR_ANY &&
552                             inp->inp_faddr.s_addr != faddr.s_addr)
553                                 continue;
554
555                         if (inp->inp_fport != 0 && inp->inp_fport != fport)
556                                 continue;
557
558                         if (cred == NULL ||
559                             cred->cr_prison ==
560                             inp->inp_socket->so_cred->cr_prison) {
561                                 match = inp;
562                                 break;
563                         }
564                 }
565         }
566         if (pcbinfo->porttoken)
567                 lwkt_reltoken(pcbinfo->porttoken);
568         return (match);
569 }
570
571 int
572 in_pcbconn_bind(struct inpcb *inp, const struct sockaddr *nam,
573     struct thread *td)
574 {
575         struct proc *p = td->td_proc;
576         unsigned short *lastport;
577         const struct sockaddr_in *sin = (const struct sockaddr_in *)nam;
578         struct sockaddr_in jsin;
579         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
580         struct ucred *cred = NULL;
581         u_short lport = 0;
582         ushort first, last;
583         int count, error, dup = 0;
584
585         if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */
586                 return (EADDRNOTAVAIL);
587
588         KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY);
589         if (inp->inp_lport != 0)
590                 return (EINVAL);        /* already bound */
591
592         KKASSERT(p);
593         cred = p->p_ucred;
594
595         /*
596          * This has to be atomic.  If the porthash is shared across multiple
597          * protocol threads (aka tcp) then the token will be non-NULL.
598          */
599         if (pcbinfo->porttoken)
600                 lwkt_gettoken(pcbinfo->porttoken);
601
602         jsin.sin_family = AF_INET;
603         jsin.sin_addr.s_addr = inp->inp_laddr.s_addr;
604         if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) {
605                 inp->inp_laddr.s_addr = INADDR_ANY;
606                 error = EINVAL;
607                 goto done;
608         }
609         inp->inp_laddr.s_addr = jsin.sin_addr.s_addr;
610
611         inp->inp_flags |= INP_ANONPORT;
612
613         if (inp->inp_flags & INP_HIGHPORT) {
614                 first = ipport_hifirstauto;     /* sysctl */
615                 last  = ipport_hilastauto;
616                 lastport = &pcbinfo->lasthi;
617         } else if (inp->inp_flags & INP_LOWPORT) {
618                 if (cred &&
619                     (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) {
620                         inp->inp_laddr.s_addr = INADDR_ANY;
621                         goto done;
622                 }
623                 first = ipport_lowfirstauto;    /* 1023 */
624                 last  = ipport_lowlastauto;     /* 600 */
625                 lastport = &pcbinfo->lastlow;
626         } else {
627                 first = ipport_firstauto;       /* sysctl */
628                 last  = ipport_lastauto;
629                 lastport = &pcbinfo->lastport;
630         }
631
632 again:
633         /*
634          * Simple check to ensure all ports are not used up causing
635          * a deadlock here.
636          *
637          * We split the two cases (up and down) so that the direction
638          * is not being tested on each round of the loop.
639          */
640         if (first > last) {
641                 /*
642                  * counting down
643                  */
644                 count = first - last;
645
646                 do {
647                         if (count-- < 0) {      /* completely used? */
648                                 inp->inp_laddr.s_addr = INADDR_ANY;
649                                 error = EADDRNOTAVAIL;
650                                 goto done;
651                         }
652                         --*lastport;
653                         if (*lastport > first || *lastport < last)
654                                 *lastport = first;
655                         lport = htons(*lastport);
656                 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport,
657                                 sin->sin_addr, sin->sin_port, cred));
658         } else {
659                 /*
660                  * counting up
661                  */
662                 count = last - first;
663
664                 do {
665                         if (count-- < 0) {      /* completely used? */
666                                 inp->inp_laddr.s_addr = INADDR_ANY;
667                                 error = EADDRNOTAVAIL;
668                                 goto done;
669                         }
670                         ++*lastport;
671                         if (*lastport < first || *lastport > last)
672                                 *lastport = first;
673                         lport = htons(*lastport);
674                 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport,
675                                 sin->sin_addr, sin->sin_port, cred));
676         }
677
678         /* This could happen on loopback interface */
679         if (sin->sin_port == lport &&
680             sin->sin_addr.s_addr == inp->inp_laddr.s_addr) {
681                 if (dup) {
682                         /*
683                          * Duplicate again; give up
684                          */
685                         inp->inp_laddr.s_addr = INADDR_ANY;
686                         error = EADDRNOTAVAIL;
687                         goto done;
688                 }
689                 dup = 1;
690                 goto again;
691         }
692         inp->inp_lport = lport;
693
694         jsin.sin_family = AF_INET;
695         jsin.sin_addr.s_addr = inp->inp_laddr.s_addr;
696         if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) {
697                 inp->inp_laddr.s_addr = INADDR_ANY;
698                 inp->inp_lport = 0;
699                 error = EINVAL;
700                 goto done;
701         }
702         inp->inp_laddr.s_addr = jsin.sin_addr.s_addr;
703
704         if (in_pcbinsporthash(inp) != 0) {
705                 inp->inp_laddr.s_addr = INADDR_ANY;
706                 inp->inp_lport = 0;
707                 error = EAGAIN;
708                 goto done;
709         }
710         error = 0;
711 done:
712         if (pcbinfo->porttoken)
713                 lwkt_reltoken(pcbinfo->porttoken);
714         return error;
715 }
716
717 /*
718  *   Transform old in_pcbconnect() into an inner subroutine for new
719  *   in_pcbconnect(): Do some validity-checking on the remote
720  *   address (in mbuf 'nam') and then determine local host address
721  *   (i.e., which interface) to use to access that remote host.
722  *
723  *   This preserves definition of in_pcbconnect(), while supporting a
724  *   slightly different version for T/TCP.  (This is more than
725  *   a bit of a kludge, but cleaning up the internal interfaces would
726  *   have forced minor changes in every protocol).
727  */
728 int
729 in_pcbladdr_find(struct inpcb *inp, struct sockaddr *nam,
730     struct sockaddr_in **plocal_sin, struct thread *td, int find)
731 {
732         struct in_ifaddr *ia;
733         struct ucred *cred = NULL;
734         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
735         struct sockaddr *jsin;
736         int jailed = 0, alloc_route = 0;
737
738         if (nam->sa_len != sizeof *sin)
739                 return (EINVAL);
740         if (sin->sin_family != AF_INET)
741                 return (EAFNOSUPPORT);
742         if (sin->sin_port == 0)
743                 return (EADDRNOTAVAIL);
744         if (td && td->td_proc && td->td_proc->p_ucred)
745                 cred = td->td_proc->p_ucred;
746         if (cred && cred->cr_prison)
747                 jailed = 1;
748         if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) {
749                 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia;
750                 /*
751                  * If the destination address is INADDR_ANY,
752                  * use the primary local address.
753                  * If the supplied address is INADDR_BROADCAST,
754                  * and the primary interface supports broadcast,
755                  * choose the broadcast address for that interface.
756                  */
757                 if (sin->sin_addr.s_addr == INADDR_ANY)
758                         sin->sin_addr = IA_SIN(ia)->sin_addr;
759                 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
760                     (ia->ia_ifp->if_flags & IFF_BROADCAST))
761                         sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr;
762         }
763         if (find) {
764                 struct route *ro;
765
766                 ia = NULL;
767                 /*
768                  * If route is known or can be allocated now,
769                  * our src addr is taken from the i/f, else punt.
770                  * Note that we should check the address family of the cached
771                  * destination, in case of sharing the cache with IPv6.
772                  */
773                 ro = &inp->inp_route;
774                 if (ro->ro_rt &&
775                     (!(ro->ro_rt->rt_flags & RTF_UP) ||
776                      ro->ro_dst.sa_family != AF_INET ||
777                      satosin(&ro->ro_dst)->sin_addr.s_addr !=
778                                       sin->sin_addr.s_addr ||
779                      inp->inp_socket->so_options & SO_DONTROUTE)) {
780                         RTFREE(ro->ro_rt);
781                         ro->ro_rt = NULL;
782                 }
783                 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/
784                     (ro->ro_rt == NULL ||
785                     ro->ro_rt->rt_ifp == NULL)) {
786                         /* No route yet, so try to acquire one */
787                         bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
788                         ro->ro_dst.sa_family = AF_INET;
789                         ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
790                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
791                                 sin->sin_addr;
792                         rtalloc(ro);
793                         alloc_route = 1;
794                 }
795                 /*
796                  * If we found a route, use the address
797                  * corresponding to the outgoing interface
798                  * unless it is the loopback (in case a route
799                  * to our address on another net goes to loopback).
800                  */
801                 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
802                         if (jailed) {
803                                 if (jailed_ip(cred->cr_prison, 
804                                     ro->ro_rt->rt_ifa->ifa_addr)) {
805                                         ia = ifatoia(ro->ro_rt->rt_ifa);
806                                 }
807                         } else {
808                                 ia = ifatoia(ro->ro_rt->rt_ifa);
809                         }
810                 }
811                 if (ia == NULL) {
812                         u_short fport = sin->sin_port;
813
814                         sin->sin_port = 0;
815                         ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
816                         if (ia && jailed && !jailed_ip(cred->cr_prison,
817                             sintosa(&ia->ia_addr)))
818                                 ia = NULL;
819                         if (ia == NULL)
820                                 ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
821                         if (ia && jailed && !jailed_ip(cred->cr_prison,
822                             sintosa(&ia->ia_addr)))
823                                 ia = NULL;
824                         sin->sin_port = fport;
825                         if (ia == NULL &&
826                             !TAILQ_EMPTY(&in_ifaddrheads[mycpuid]))
827                                 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia;
828                         if (ia && jailed && !jailed_ip(cred->cr_prison,
829                             sintosa(&ia->ia_addr)))
830                                 ia = NULL;
831
832                         if (!jailed && ia == NULL)
833                                 goto fail;
834                 }
835                 /*
836                  * If the destination address is multicast and an outgoing
837                  * interface has been set as a multicast option, use the
838                  * address of that interface as our source address.
839                  */
840                 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
841                     inp->inp_moptions != NULL) {
842                         struct ip_moptions *imo;
843                         struct ifnet *ifp;
844
845                         imo = inp->inp_moptions;
846                         if (imo->imo_multicast_ifp != NULL) {
847                                 struct in_ifaddr_container *iac;
848
849                                 ifp = imo->imo_multicast_ifp;
850                                 ia = NULL;
851                                 TAILQ_FOREACH(iac,
852                                 &in_ifaddrheads[mycpuid], ia_link) {
853                                         if (iac->ia->ia_ifp == ifp) {
854                                                 ia = iac->ia;
855                                                 break;
856                                         }
857                                 }
858                                 if (ia == NULL)
859                                         goto fail;
860                         }
861                 }
862                 /*
863                  * Don't do pcblookup call here; return interface in plocal_sin
864                  * and exit to caller, that will do the lookup.
865                  */
866                 if (ia == NULL && jailed) {
867                         if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL ||
868                             (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) {
869                                 *plocal_sin = satosin(jsin);
870                         } else {
871                                 /* IPv6 only Jail */
872                                 goto fail;
873                         }
874                 } else {
875                         *plocal_sin = &ia->ia_addr;
876                 }
877         }
878         return (0);
879 fail:
880         if (alloc_route) {
881                 struct route *ro = &inp->inp_route;
882
883                 if (ro->ro_rt != NULL)
884                         RTFREE(ro->ro_rt);
885                 bzero(ro, sizeof(*ro));
886         }
887         return (EADDRNOTAVAIL);
888 }
889
890 int
891 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
892     struct sockaddr_in **plocal_sin, struct thread *td)
893 {
894         return in_pcbladdr_find(inp, nam, plocal_sin, td,
895             (inp->inp_laddr.s_addr == INADDR_ANY));
896 }
897
898 /*
899  * Outer subroutine:
900  * Connect from a socket to a specified address.
901  * Both address and port must be specified in argument sin.
902  * If don't have a local address for this socket yet,
903  * then pick one.
904  */
905 int
906 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td)
907 {
908         struct sockaddr_in *if_sin;
909         struct sockaddr_in *sin = (struct sockaddr_in *)nam;
910         int error;
911
912         /* Call inner routine to assign local interface address. */
913         if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0)
914                 return (error);
915
916         if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port,
917                               inp->inp_laddr.s_addr ?
918                                 inp->inp_laddr : if_sin->sin_addr,
919                               inp->inp_lport, FALSE, NULL) != NULL) {
920                 return (EADDRINUSE);
921         }
922         if (inp->inp_laddr.s_addr == INADDR_ANY) {
923                 if (inp->inp_lport == 0) {
924                         error = in_pcbbind(inp, NULL, td);
925                         if (error)
926                                 return (error);
927                 }
928                 inp->inp_laddr = if_sin->sin_addr;
929         }
930         inp->inp_faddr = sin->sin_addr;
931         inp->inp_fport = sin->sin_port;
932         in_pcbinsconnhash(inp);
933         return (0);
934 }
935
936 void
937 in_pcbdisconnect(struct inpcb *inp)
938 {
939
940         inp->inp_faddr.s_addr = INADDR_ANY;
941         inp->inp_fport = 0;
942         in_pcbremconnhash(inp);
943         if (inp->inp_socket->so_state & SS_NOFDREF)
944                 in_pcbdetach(inp);
945 }
946
947 void
948 in_pcbdetach(struct inpcb *inp)
949 {
950         struct socket *so = inp->inp_socket;
951         struct inpcbinfo *ipi = inp->inp_pcbinfo;
952
953 #ifdef IPSEC
954         ipsec4_delete_pcbpolicy(inp);
955 #endif /*IPSEC*/
956         inp->inp_gencnt = ++ipi->ipi_gencnt;
957         KKASSERT((so->so_state & SS_ASSERTINPROG) == 0);
958         in_pcbremlists(inp);
959         so->so_pcb = NULL;
960         sofree(so);                     /* remove pcb ref */
961         if (inp->inp_options)
962                 m_free(inp->inp_options);
963         if (inp->inp_route.ro_rt)
964                 rtfree(inp->inp_route.ro_rt);
965         ip_freemoptions(inp->inp_moptions);
966         inp->inp_vflag = 0;
967         kfree(inp, M_PCB);
968 }
969
970 /*
971  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
972  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
973  * in struct pr_usrreqs, so that protocols can just reference then directly
974  * without the need for a wrapper function.  The socket must have a valid
975  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
976  * except through a kernel programming error, so it is acceptable to panic
977  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
978  * because there actually /is/ a programming error somewhere... XXX)
979  */
980 int
981 in_setsockaddr(struct socket *so, struct sockaddr **nam)
982 {
983         struct inpcb *inp;
984         struct sockaddr_in *sin;
985
986         /*
987          * Do the malloc first in case it blocks.
988          */
989         sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO);
990         sin->sin_family = AF_INET;
991         sin->sin_len = sizeof *sin;
992
993         crit_enter();
994         inp = so->so_pcb;
995         if (!inp) {
996                 crit_exit();
997                 kfree(sin, M_SONAME);
998                 return (ECONNRESET);
999         }
1000         sin->sin_port = inp->inp_lport;
1001         sin->sin_addr = inp->inp_laddr;
1002         crit_exit();
1003
1004         *nam = (struct sockaddr *)sin;
1005         return (0);
1006 }
1007
1008 void
1009 in_setsockaddr_dispatch(netmsg_t msg)
1010 {
1011         int error;
1012
1013         error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam);
1014         lwkt_replymsg(&msg->lmsg, error);
1015 }
1016
1017 int
1018 in_setpeeraddr(struct socket *so, struct sockaddr **nam)
1019 {
1020         struct inpcb *inp;
1021         struct sockaddr_in *sin;
1022
1023         /*
1024          * Do the malloc first in case it blocks.
1025          */
1026         sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO);
1027         sin->sin_family = AF_INET;
1028         sin->sin_len = sizeof *sin;
1029
1030         crit_enter();
1031         inp = so->so_pcb;
1032         if (!inp) {
1033                 crit_exit();
1034                 kfree(sin, M_SONAME);
1035                 return (ECONNRESET);
1036         }
1037         sin->sin_port = inp->inp_fport;
1038         sin->sin_addr = inp->inp_faddr;
1039         crit_exit();
1040
1041         *nam = (struct sockaddr *)sin;
1042         return (0);
1043 }
1044
1045 void
1046 in_setpeeraddr_dispatch(netmsg_t msg)
1047 {
1048         int error;
1049
1050         error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam);
1051         lwkt_replymsg(&msg->lmsg, error);
1052 }
1053
1054 void
1055 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err,
1056                 void (*notify)(struct inpcb *, int))
1057 {
1058         struct inpcb *inp, *ninp;
1059
1060         /*
1061          * note: if INP_PLACEMARKER is set we must ignore the rest of
1062          * the structure and skip it.
1063          */
1064         crit_enter();
1065         LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) {
1066                 if (inp->inp_flags & INP_PLACEMARKER)
1067                         continue;
1068 #ifdef INET6
1069                 if (!(inp->inp_vflag & INP_IPV4))
1070                         continue;
1071 #endif
1072                 if (inp->inp_faddr.s_addr != faddr.s_addr ||
1073                     inp->inp_socket == NULL)
1074                         continue;
1075                 (*notify)(inp, err);            /* can remove inp from list! */
1076         }
1077         crit_exit();
1078 }
1079
1080 void
1081 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp)
1082 {
1083         struct inpcb *inp;
1084         struct ip_moptions *imo;
1085         int i, gap;
1086
1087         for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) {
1088                 if (inp->inp_flags & INP_PLACEMARKER)
1089                         continue;
1090                 imo = inp->inp_moptions;
1091                 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) {
1092                         /*
1093                          * Unselect the outgoing interface if it is being
1094                          * detached.
1095                          */
1096                         if (imo->imo_multicast_ifp == ifp)
1097                                 imo->imo_multicast_ifp = NULL;
1098
1099                         /*
1100                          * Drop multicast group membership if we joined
1101                          * through the interface being detached.
1102                          */
1103                         for (i = 0, gap = 0; i < imo->imo_num_memberships;
1104                             i++) {
1105                                 if (imo->imo_membership[i]->inm_ifp == ifp) {
1106                                         in_delmulti(imo->imo_membership[i]);
1107                                         gap++;
1108                                 } else if (gap != 0)
1109                                         imo->imo_membership[i - gap] =
1110                                             imo->imo_membership[i];
1111                         }
1112                         imo->imo_num_memberships -= gap;
1113                 }
1114         }
1115 }
1116
1117 /*
1118  * Check for alternatives when higher level complains
1119  * about service problems.  For now, invalidate cached
1120  * routing information.  If the route was created dynamically
1121  * (by a redirect), time to try a default gateway again.
1122  */
1123 void
1124 in_losing(struct inpcb *inp)
1125 {
1126         struct rtentry *rt;
1127         struct rt_addrinfo rtinfo;
1128
1129         if ((rt = inp->inp_route.ro_rt)) {
1130                 bzero(&rtinfo, sizeof(struct rt_addrinfo));
1131                 rtinfo.rti_info[RTAX_DST] = rt_key(rt);
1132                 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1133                 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt);
1134                 rtinfo.rti_flags = rt->rt_flags;
1135                 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0);
1136                 if (rt->rt_flags & RTF_DYNAMIC)
1137                         rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL);
1138                 inp->inp_route.ro_rt = NULL;
1139                 rtfree(rt);
1140                 /*
1141                  * A new route can be allocated
1142                  * the next time output is attempted.
1143                  */
1144         }
1145 }
1146
1147 /*
1148  * After a routing change, flush old routing
1149  * and allocate a (hopefully) better one.
1150  */
1151 void
1152 in_rtchange(struct inpcb *inp, int err)
1153 {
1154         if (inp->inp_route.ro_rt) {
1155                 rtfree(inp->inp_route.ro_rt);
1156                 inp->inp_route.ro_rt = NULL;
1157                 /*
1158                  * A new route can be allocated the next time
1159                  * output is attempted.
1160                  */
1161         }
1162 }
1163
1164 /*
1165  * Lookup a PCB based on the local address and port.
1166  */
1167 struct inpcb *
1168 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
1169                    u_int lport_arg, int wild_okay, struct ucred *cred)
1170 {
1171         struct inpcb *inp;
1172         int matchwild = 3, wildcard;
1173         u_short lport = lport_arg;
1174         struct inpcbporthead *porthash;
1175         struct inpcbport *phd;
1176         struct inpcb *match = NULL;
1177
1178         /*
1179          * If the porthashbase is shared across several cpus we need
1180          * to lock.
1181          */
1182         if (pcbinfo->porttoken)
1183                 lwkt_gettoken(pcbinfo->porttoken);
1184
1185         /*
1186          * Best fit PCB lookup.
1187          *
1188          * First see if this local port is in use by looking on the
1189          * port hash list.
1190          */
1191         porthash = &pcbinfo->porthashbase[
1192                         INP_PCBPORTHASH(lport, pcbinfo->porthashmask)];
1193         LIST_FOREACH(phd, porthash, phd_hash) {
1194                 if (phd->phd_port == lport)
1195                         break;
1196         }
1197         if (phd != NULL) {
1198                 /*
1199                  * Port is in use by one or more PCBs. Look for best
1200                  * fit.
1201                  */
1202                 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
1203                         wildcard = 0;
1204 #ifdef INET6
1205                         if ((inp->inp_vflag & INP_IPV4) == 0)
1206                                 continue;
1207 #endif
1208                         if (inp->inp_faddr.s_addr != INADDR_ANY)
1209                                 wildcard++;
1210                         if (inp->inp_laddr.s_addr != INADDR_ANY) {
1211                                 if (laddr.s_addr == INADDR_ANY)
1212                                         wildcard++;
1213                                 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1214                                         continue;
1215                         } else {
1216                                 if (laddr.s_addr != INADDR_ANY)
1217                                         wildcard++;
1218                         }
1219                         if (wildcard && !wild_okay)
1220                                 continue;
1221                         if (wildcard < matchwild &&
1222                             (cred == NULL ||
1223                              cred->cr_prison == 
1224                                         inp->inp_socket->so_cred->cr_prison)) {
1225                                 match = inp;
1226                                 matchwild = wildcard;
1227                                 if (matchwild == 0) {
1228                                         break;
1229                                 }
1230                         }
1231                 }
1232         }
1233         if (pcbinfo->porttoken)
1234                 lwkt_reltoken(pcbinfo->porttoken);
1235         return (match);
1236 }
1237
1238 static struct inpcb *
1239 inp_localgroup_lookup(const struct inpcbinfo *pcbinfo,
1240     struct in_addr laddr, uint16_t lport, uint32_t pkt_hash)
1241 {
1242         struct inpcb *local_wild = NULL;
1243         const struct inp_localgrphead *hdr;
1244         const struct inp_localgroup *grp;
1245
1246         hdr = &pcbinfo->localgrphashbase[
1247             INP_PCBLOCALGRPHASH(lport, pcbinfo->localgrphashmask)];
1248         pkt_hash >>= ncpus2_shift;
1249
1250         /*
1251          * Order of socket selection:
1252          * 1. non-wild.
1253          * 2. wild.
1254          *
1255          * NOTE:
1256          * - Local group does not contain jailed sockets
1257          * - Local group does not contain IPv4 mapped INET6 wild sockets
1258          */
1259         LIST_FOREACH(grp, hdr, il_list) {
1260 #ifdef INET6
1261                 if (!(grp->il_vflag & INP_IPV4))
1262                         continue;
1263 #endif
1264                 if (grp->il_lport == lport) {
1265                         int idx;
1266
1267                         idx = pkt_hash / grp->il_factor;
1268                         KASSERT(idx < grp->il_inpcnt && idx >= 0,
1269                             ("invalid hash %04x, cnt %d or fact %d",
1270                              pkt_hash, grp->il_inpcnt, grp->il_factor));
1271
1272                         if (grp->il_laddr.s_addr == laddr.s_addr)
1273                                 return grp->il_inp[idx];
1274                         else if (grp->il_laddr.s_addr == INADDR_ANY)
1275                                 local_wild = grp->il_inp[idx];
1276                 }
1277         }
1278         if (local_wild != NULL)
1279                 return local_wild;
1280         return NULL;
1281 }
1282
1283 /*
1284  * Lookup PCB in hash list.
1285  */
1286 struct inpcb *
1287 in_pcblookup_pkthash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1288     u_int fport_arg, struct in_addr laddr, u_int lport_arg,
1289     boolean_t wildcard, struct ifnet *ifp, const struct mbuf *m)
1290 {
1291         struct inpcbhead *head;
1292         struct inpcb *inp, *jinp=NULL;
1293         u_short fport = fport_arg, lport = lport_arg;
1294
1295         /*
1296          * First look for an exact match.
1297          */
1298         head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport,
1299             laddr.s_addr, lport, pcbinfo->hashmask)];
1300         LIST_FOREACH(inp, head, inp_hash) {
1301 #ifdef INET6
1302                 if (!(inp->inp_vflag & INP_IPV4))
1303                         continue;
1304 #endif
1305                 if (in_hosteq(inp->inp_faddr, faddr) &&
1306                     in_hosteq(inp->inp_laddr, laddr) &&
1307                     inp->inp_fport == fport && inp->inp_lport == lport) {
1308                         /* found */
1309                         if (inp->inp_socket == NULL ||
1310                             inp->inp_socket->so_cred->cr_prison == NULL) {
1311                                 return (inp);
1312                         } else {
1313                                 if  (jinp == NULL)
1314                                         jinp = inp;
1315                         }
1316                 }
1317         }
1318         if (jinp != NULL)
1319                 return (jinp);
1320         if (wildcard) {
1321                 struct inpcb *local_wild = NULL;
1322                 struct inpcb *jinp_wild = NULL;
1323 #ifdef INET6
1324                 struct inpcb *local_wild_mapped = NULL;
1325 #endif
1326                 struct inpcontainer *ic;
1327                 struct inpcontainerhead *chead;
1328                 struct sockaddr_in jsin;
1329                 struct ucred *cred;
1330
1331                 /*
1332                  * Check local group first
1333                  */
1334                 if (pcbinfo->localgrphashbase != NULL &&
1335                     m != NULL && (m->m_flags & M_HASH) &&
1336                     !(ifp && ifp->if_type == IFT_FAITH)) {
1337                         inp = inp_localgroup_lookup(pcbinfo,
1338                             laddr, lport, m->m_pkthdr.hash);
1339                         if (inp != NULL)
1340                                 return inp;
1341                 }
1342
1343                 /*
1344                  * Order of socket selection:
1345                  * 1. non-jailed, non-wild.
1346                  * 2. non-jailed, wild.
1347                  * 3. jailed, non-wild.
1348                  * 4. jailed, wild.
1349                  */
1350                 jsin.sin_family = AF_INET;
1351                 chead = &pcbinfo->wildcardhashbase[
1352                     INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)];
1353                 LIST_FOREACH(ic, chead, ic_list) {
1354                         inp = ic->ic_inp;
1355                         jsin.sin_addr.s_addr = laddr.s_addr;
1356 #ifdef INET6
1357                         if (!(inp->inp_vflag & INP_IPV4))
1358                                 continue;
1359 #endif
1360                         if (inp->inp_socket != NULL)
1361                                 cred = inp->inp_socket->so_cred;
1362                         else
1363                                 cred = NULL;
1364                         if (cred != NULL && jailed(cred)) {
1365                                 if (jinp != NULL)
1366                                         continue;
1367                                 else
1368                                         if (!jailed_ip(cred->cr_prison,
1369                                             (struct sockaddr *)&jsin))
1370                                                 continue;
1371                         }
1372                         if (inp->inp_lport == lport) {
1373                                 if (ifp && ifp->if_type == IFT_FAITH &&
1374                                     !(inp->inp_flags & INP_FAITH))
1375                                         continue;
1376                                 if (inp->inp_laddr.s_addr == laddr.s_addr) {
1377                                         if (cred != NULL && jailed(cred))
1378                                                 jinp = inp;
1379                                         else
1380                                                 return (inp);
1381                                 }
1382                                 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1383 #ifdef INET6
1384                                         if (INP_CHECK_SOCKAF(inp->inp_socket,
1385                                                              AF_INET6))
1386                                                 local_wild_mapped = inp;
1387                                         else
1388 #endif
1389                                                 if (cred != NULL &&
1390                                                     jailed(cred))
1391                                                         jinp_wild = inp;
1392                                                 else
1393                                                         local_wild = inp;
1394                                 }
1395                         }
1396                 }
1397                 if (local_wild != NULL)
1398                         return (local_wild);
1399 #ifdef INET6
1400                 if (local_wild_mapped != NULL)
1401                         return (local_wild_mapped);
1402 #endif
1403                 if (jinp != NULL)
1404                         return (jinp);
1405                 return (jinp_wild);
1406         }
1407
1408         /*
1409          * Not found.
1410          */
1411         return (NULL);
1412 }
1413
1414 struct inpcb *
1415 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1416     u_int fport_arg, struct in_addr laddr, u_int lport_arg,
1417     boolean_t wildcard, struct ifnet *ifp)
1418 {
1419         return in_pcblookup_pkthash(pcbinfo, faddr, fport_arg,
1420             laddr, lport_arg, wildcard, ifp, NULL);
1421 }
1422
1423 /*
1424  * Insert PCB into connection hash table.
1425  */
1426 void
1427 in_pcbinsconnhash(struct inpcb *inp)
1428 {
1429         struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo;
1430         struct inpcbhead *bucket;
1431         u_int32_t hashkey_faddr, hashkey_laddr;
1432
1433 #ifdef INET6
1434         if (inp->inp_vflag & INP_IPV6) {
1435                 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */;
1436                 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */;
1437         } else {
1438 #endif
1439                 hashkey_faddr = inp->inp_faddr.s_addr;
1440                 hashkey_laddr = inp->inp_laddr.s_addr;
1441 #ifdef INET6
1442         }
1443 #endif
1444
1445         KASSERT(!(inp->inp_flags & INP_WILDCARD),
1446                 ("already on wildcardhash"));
1447         KASSERT(!(inp->inp_flags & INP_CONNECTED),
1448                 ("already on connhash"));
1449         inp->inp_flags |= INP_CONNECTED;
1450
1451         /*
1452          * Insert into the connection hash table.
1453          */
1454         bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr,
1455             inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)];
1456         LIST_INSERT_HEAD(bucket, inp, inp_hash);
1457 }
1458
1459 /*
1460  * Remove PCB from connection hash table.
1461  */
1462 void
1463 in_pcbremconnhash(struct inpcb *inp)
1464 {
1465         KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected"));
1466         LIST_REMOVE(inp, inp_hash);
1467         inp->inp_flags &= ~INP_CONNECTED;
1468 }
1469
1470 /*
1471  * Insert PCB into port hash table.
1472  */
1473 int
1474 in_pcbinsporthash(struct inpcb *inp)
1475 {
1476         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1477         struct inpcbporthead *pcbporthash;
1478         struct inpcbport *phd;
1479
1480         /*
1481          * If the porthashbase is shared across several cpus we need
1482          * to lock.
1483          */
1484         if (pcbinfo->porttoken)
1485                 lwkt_gettoken(pcbinfo->porttoken);
1486
1487         /*
1488          * Insert into the port hash table.
1489          */
1490         pcbporthash = &pcbinfo->porthashbase[
1491             INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)];
1492
1493         /* Go through port list and look for a head for this lport. */
1494         LIST_FOREACH(phd, pcbporthash, phd_hash) {
1495                 if (phd->phd_port == inp->inp_lport)
1496                         break;
1497         }
1498
1499         /* If none exists, malloc one and tack it on. */
1500         if (phd == NULL) {
1501                 KKASSERT(pcbinfo->portsave != NULL);
1502                 phd = pcbinfo->portsave;
1503                 pcbinfo->portsave = NULL;
1504                 phd->phd_port = inp->inp_lport;
1505                 LIST_INIT(&phd->phd_pcblist);
1506                 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1507         }
1508
1509         inp->inp_phd = phd;
1510         LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1511
1512         if (pcbinfo->porttoken)
1513                 lwkt_reltoken(pcbinfo->porttoken);
1514         if (pcbinfo->portsave == NULL) {
1515                 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave),
1516                                             M_PCB, M_INTWAIT | M_ZERO);
1517         }
1518         return (0);
1519 }
1520
1521 static struct inp_localgroup *
1522 inp_localgroup_alloc(struct inp_localgrphead *hdr, u_char vflag,
1523     uint16_t port, const union in_dependaddr *addr, int size)
1524 {
1525         struct inp_localgroup *grp;
1526
1527         grp = kmalloc(__offsetof(struct inp_localgroup, il_inp[size]),
1528             M_TEMP, M_INTWAIT | M_ZERO);
1529         grp->il_vflag = vflag;
1530         grp->il_lport = port;
1531         grp->il_dependladdr = *addr;
1532         grp->il_inpsiz = size;
1533
1534         LIST_INSERT_HEAD(hdr, grp, il_list);
1535
1536         return grp;
1537 }
1538
1539 static void
1540 inp_localgroup_free(struct inp_localgroup *grp)
1541 {
1542         LIST_REMOVE(grp, il_list);
1543         kfree(grp, M_TEMP);
1544 }
1545
1546 static struct inp_localgroup *
1547 inp_localgroup_resize(struct inp_localgrphead *hdr,
1548     struct inp_localgroup *old_grp, int size)
1549 {
1550         struct inp_localgroup *grp;
1551         int i;
1552
1553         grp = inp_localgroup_alloc(hdr, old_grp->il_vflag,
1554             old_grp->il_lport, &old_grp->il_dependladdr, size);
1555
1556         KASSERT(old_grp->il_inpcnt < grp->il_inpsiz,
1557             ("invalid new local group size %d and old local group count %d",
1558              grp->il_inpsiz, old_grp->il_inpcnt));
1559         for (i = 0; i < old_grp->il_inpcnt; ++i)
1560                 grp->il_inp[i] = old_grp->il_inp[i];
1561         grp->il_inpcnt = old_grp->il_inpcnt;
1562         grp->il_factor = old_grp->il_factor;
1563
1564         inp_localgroup_free(old_grp);
1565
1566         return grp;
1567 }
1568
1569 static void
1570 inp_localgroup_factor(struct inp_localgroup *grp)
1571 {
1572         grp->il_factor =
1573             ((uint32_t)(0xffff >> ncpus2_shift) / grp->il_inpcnt) + 1;
1574         KASSERT(grp->il_factor != 0, ("invalid local group factor, "
1575             "ncpus2_shift %d, inpcnt %d", ncpus2_shift, grp->il_inpcnt));
1576 }
1577
1578 static void
1579 in_pcbinslocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1580 {
1581         struct inp_localgrphead *hdr;
1582         struct inp_localgroup *grp;
1583         struct ucred *cred;
1584
1585         if (pcbinfo->localgrphashbase == NULL)
1586                 return;
1587
1588         /*
1589          * XXX don't allow jailed socket to join local group
1590          */
1591         if (inp->inp_socket != NULL)
1592                 cred = inp->inp_socket->so_cred;
1593         else
1594                 cred = NULL;
1595         if (cred != NULL && jailed(cred))
1596                 return;
1597
1598 #ifdef INET6
1599         /*
1600          * XXX don't allow IPv4 mapped INET6 wild socket
1601          */
1602         if ((inp->inp_vflag & INP_IPV4) &&
1603             inp->inp_laddr.s_addr == INADDR_ANY &&
1604             INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6))
1605                 return;
1606 #endif
1607
1608         hdr = &pcbinfo->localgrphashbase[
1609             INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)];
1610
1611         LIST_FOREACH(grp, hdr, il_list) {
1612                 if (grp->il_vflag == inp->inp_vflag &&
1613                     grp->il_lport == inp->inp_lport &&
1614                     memcmp(&grp->il_dependladdr,
1615                         &inp->inp_inc.inc_ie.ie_dependladdr,
1616                         sizeof(grp->il_dependladdr)) == 0) {
1617                         break;
1618                 }
1619         }
1620         if (grp == NULL) {
1621                 /* Create new local group */
1622                 grp = inp_localgroup_alloc(hdr, inp->inp_vflag,
1623                     inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr,
1624                     INP_LOCALGROUP_SIZMIN);
1625         } else if (grp->il_inpcnt == grp->il_inpsiz) {
1626                 if (grp->il_inpsiz >= INP_LOCALGROUP_SIZMAX) {
1627                         static int limit_logged = 0;
1628
1629                         if (!limit_logged) {
1630                                 limit_logged = 1;
1631                                 kprintf("local group port %d, "
1632                                     "limit reached\n", ntohs(grp->il_lport));
1633                         }
1634                         return;
1635                 }
1636
1637                 /* Expand this local group */
1638                 grp = inp_localgroup_resize(hdr, grp, grp->il_inpsiz * 2);
1639         }
1640
1641         KASSERT(grp->il_inpcnt < grp->il_inpsiz,
1642             ("invalid local group size %d and count %d",
1643              grp->il_inpsiz, grp->il_inpcnt));
1644         grp->il_inp[grp->il_inpcnt] = inp;
1645         grp->il_inpcnt++;
1646         inp_localgroup_factor(grp);
1647 }
1648
1649 void
1650 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1651 {
1652         struct inpcontainer *ic;
1653         struct inpcontainerhead *bucket;
1654
1655         in_pcbinslocalgrphash_oncpu(inp, pcbinfo);
1656
1657         bucket = &pcbinfo->wildcardhashbase[
1658             INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)];
1659
1660         ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT);
1661         ic->ic_inp = inp;
1662         LIST_INSERT_HEAD(bucket, ic, ic_list);
1663 }
1664
1665 /*
1666  * Insert PCB into wildcard hash table.
1667  */
1668 void
1669 in_pcbinswildcardhash(struct inpcb *inp)
1670 {
1671         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1672
1673         KASSERT(!(inp->inp_flags & INP_CONNECTED),
1674                 ("already on connhash"));
1675         KASSERT(!(inp->inp_flags & INP_WILDCARD),
1676                 ("already on wildcardhash"));
1677         inp->inp_flags |= INP_WILDCARD;
1678
1679         in_pcbinswildcardhash_oncpu(inp, pcbinfo);
1680 }
1681
1682 static void
1683 in_pcbremlocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1684 {
1685         struct inp_localgrphead *hdr;
1686         struct inp_localgroup *grp;
1687
1688         if (pcbinfo->localgrphashbase == NULL)
1689                 return;
1690
1691         hdr = &pcbinfo->localgrphashbase[
1692             INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)];
1693
1694         LIST_FOREACH(grp, hdr, il_list) {
1695                 int i;
1696
1697                 for (i = 0; i < grp->il_inpcnt; ++i) {
1698                         if (grp->il_inp[i] != inp)
1699                                 continue;
1700
1701                         if (grp->il_inpcnt == 1) {
1702                                 /* Free this local group */
1703                                 inp_localgroup_free(grp);
1704                         } else {
1705                                 /* Pull up inpcbs */
1706                                 for (; i + 1 < grp->il_inpcnt; ++i)
1707                                         grp->il_inp[i] = grp->il_inp[i + 1];
1708                                 grp->il_inpcnt--;
1709                                 inp_localgroup_factor(grp);
1710
1711                                 if (grp->il_inpsiz > INP_LOCALGROUP_SIZMIN &&
1712                                     grp->il_inpcnt <= (grp->il_inpsiz / 4)) {
1713                                         /* Shrink this local group */
1714                                         grp = inp_localgroup_resize(hdr, grp,
1715                                             grp->il_inpsiz / 2);
1716                                 }
1717                         }
1718                         return;
1719                 }
1720         }
1721 }
1722
1723 void
1724 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1725 {
1726         struct inpcontainer *ic;
1727         struct inpcontainerhead *head;
1728
1729         in_pcbremlocalgrphash_oncpu(inp, pcbinfo);
1730
1731         /* find bucket */
1732         head = &pcbinfo->wildcardhashbase[
1733             INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)];
1734
1735         LIST_FOREACH(ic, head, ic_list) {
1736                 if (ic->ic_inp == inp)
1737                         goto found;
1738         }
1739         return;                 /* not found! */
1740
1741 found:
1742         LIST_REMOVE(ic, ic_list);       /* remove container from bucket chain */
1743         kfree(ic, M_TEMP);              /* deallocate container */
1744 }
1745
1746 /*
1747  * Remove PCB from wildcard hash table.
1748  */
1749 void
1750 in_pcbremwildcardhash(struct inpcb *inp)
1751 {
1752         struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1753
1754         KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard"));
1755         in_pcbremwildcardhash_oncpu(inp, pcbinfo);
1756         inp->inp_flags &= ~INP_WILDCARD;
1757 }
1758
1759 /*
1760  * Remove PCB from various lists.
1761  */
1762 void
1763 in_pcbremlists(struct inpcb *inp)
1764 {
1765         struct inpcbinfo *pcbinfo;
1766
1767         if (inp->inp_lport) {
1768                 struct inpcbport *phd;
1769
1770                 pcbinfo = inp->inp_pcbinfo;
1771                 if (pcbinfo->porttoken)
1772                         lwkt_gettoken(pcbinfo->porttoken);
1773
1774                 phd = inp->inp_phd;
1775                 LIST_REMOVE(inp, inp_portlist);
1776                 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1777                         LIST_REMOVE(phd, phd_hash);
1778                         kfree(phd, M_PCB);
1779                 }
1780                 if (pcbinfo->porttoken)
1781                         lwkt_reltoken(pcbinfo->porttoken);
1782         }
1783         if (inp->inp_flags & INP_WILDCARD) {
1784                 in_pcbremwildcardhash(inp);
1785         } else if (inp->inp_flags & INP_CONNECTED) {
1786                 in_pcbremconnhash(inp);
1787         }
1788         LIST_REMOVE(inp, inp_list);
1789         inp->inp_pcbinfo->ipi_count--;
1790 }
1791
1792 int
1793 prison_xinpcb(struct thread *td, struct inpcb *inp)
1794 {
1795         struct ucred *cr;
1796
1797         if (td->td_proc == NULL)
1798                 return (0);
1799         cr = td->td_proc->p_ucred;
1800         if (cr->cr_prison == NULL)
1801                 return (0);
1802         if (inp->inp_socket && inp->inp_socket->so_cred &&
1803             inp->inp_socket->so_cred->cr_prison &&
1804             cr->cr_prison == inp->inp_socket->so_cred->cr_prison)
1805                 return (0);
1806         return (1);
1807 }
1808
1809 int
1810 in_pcblist_global(SYSCTL_HANDLER_ARGS)
1811 {
1812         struct inpcbinfo *pcbinfo = arg1;
1813         struct inpcb *inp, *marker;
1814         struct xinpcb xi;
1815         int error, i, n;
1816
1817         /*
1818          * The process of preparing the TCB list is too time-consuming and
1819          * resource-intensive to repeat twice on every request.
1820          */
1821         if (req->oldptr == NULL) {
1822                 n = pcbinfo->ipi_count;
1823                 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb);
1824                 return 0;
1825         }
1826
1827         if (req->newptr != NULL)
1828                 return EPERM;
1829
1830         /*
1831          * OK, now we're committed to doing something.  Re-fetch ipi_count
1832          * after obtaining the generation count.
1833          */
1834         n = pcbinfo->ipi_count;
1835
1836         marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO);
1837         marker->inp_flags |= INP_PLACEMARKER;
1838         LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list);
1839
1840         i = 0;
1841         error = 0;
1842
1843         while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) {
1844                 LIST_REMOVE(marker, inp_list);
1845                 LIST_INSERT_AFTER(inp, marker, inp_list);
1846
1847                 if (inp->inp_flags & INP_PLACEMARKER)
1848                         continue;
1849                 if (prison_xinpcb(req->td, inp))
1850                         continue;
1851                 bzero(&xi, sizeof xi);
1852                 xi.xi_len = sizeof xi;
1853                 bcopy(inp, &xi.xi_inp, sizeof *inp);
1854                 if (inp->inp_socket)
1855                         sotoxsocket(inp->inp_socket, &xi.xi_socket);
1856                 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0)
1857                         break;
1858                 ++i;
1859         }
1860         LIST_REMOVE(marker, inp_list);
1861         if (error == 0 && i < n) {
1862                 bzero(&xi, sizeof xi);
1863                 xi.xi_len = sizeof xi;
1864                 while (i < n) {
1865                         error = SYSCTL_OUT(req, &xi, sizeof xi);
1866                         ++i;
1867                 }
1868         }
1869         kfree(marker, M_TEMP);
1870         return(error);
1871 }
1872
1873 int
1874 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0)
1875 {
1876         struct inpcbinfo *pcbinfo = arg1;
1877         struct inpcb *inp;
1878         struct xinpcb *xi;
1879         int nxi;
1880
1881         *nxi0 = 0;
1882         *xi0 = NULL;
1883
1884         /*
1885          * The process of preparing the PCB list is too time-consuming and
1886          * resource-intensive to repeat twice on every request.
1887          */
1888         if (req->oldptr == NULL) {
1889                 int n = pcbinfo->ipi_count;
1890
1891                 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb);
1892                 return 0;
1893         }
1894
1895         if (req->newptr != NULL)
1896                 return EPERM;
1897
1898         if (pcbinfo->ipi_count == 0)
1899                 return 0;
1900
1901         nxi = 0;
1902         xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP,
1903                      M_WAITOK | M_ZERO | M_NULLOK);
1904         if (xi == NULL)
1905                 return ENOMEM;
1906
1907         LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) {
1908                 struct xinpcb *xi_ptr = &xi[nxi];
1909
1910                 if (prison_xinpcb(req->td, inp))
1911                         continue;
1912
1913                 xi_ptr->xi_len = sizeof(*xi_ptr);
1914                 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp));
1915                 if (inp->inp_socket)
1916                         sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket);
1917                 ++nxi;
1918         }
1919
1920         if (nxi == 0) {
1921                 kfree(xi, M_TEMP);
1922                 return 0;
1923         }
1924
1925         *nxi0 = nxi;
1926         *xi0 = xi;
1927
1928         return 0;
1929 }
1930
1931 void
1932 in_savefaddr(struct socket *so, const struct sockaddr *faddr)
1933 {
1934         struct sockaddr_in *sin;
1935
1936         KASSERT(faddr->sa_family == AF_INET,
1937             ("not AF_INET faddr %d", faddr->sa_family));
1938
1939         sin = kmalloc(sizeof(*sin), M_SONAME, M_WAITOK | M_ZERO);
1940         sin->sin_family = AF_INET;
1941         sin->sin_len = sizeof(*sin);
1942         sin->sin_port = ((const struct sockaddr_in *)faddr)->sin_port;
1943         sin->sin_addr = ((const struct sockaddr_in *)faddr)->sin_addr;
1944
1945         so->so_faddr = (struct sockaddr *)sin;
1946 }