uipc: Fix lockless unp_conn accessing and uipc_detach() race.
[dragonfly.git] / sys / kern / uipc_usrreq.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
dc71b7ab 13 * 3. Neither the name of the University nor the names of its contributors
984263bc
MD
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
30 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
31 */
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/kernel.h>
36#include <sys/domain.h>
37#include <sys/fcntl.h>
38#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
dadab5e9 39#include <sys/proc.h>
984263bc
MD
40#include <sys/file.h>
41#include <sys/filedesc.h>
42#include <sys/mbuf.h>
fad57d0e 43#include <sys/nlookup.h>
984263bc
MD
44#include <sys/protosw.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/resourcevar.h>
48#include <sys/stat.h>
fad57d0e 49#include <sys/mount.h>
984263bc
MD
50#include <sys/sysctl.h>
51#include <sys/un.h>
52#include <sys/unpcb.h>
53#include <sys/vnode.h>
6cef7136 54
dadab5e9 55#include <sys/file2.h>
2dd63755 56#include <sys/spinlock2.h>
6cef7136 57#include <sys/socketvar2.h>
002c1265 58#include <sys/msgport2.h>
984263bc 59
5df2d9a0 60#define UNP_DETACHED UNP_PRIVATE1
c2bfe86d 61#define UNP_CONNECTING UNP_PRIVATE2
16e0b14d 62#define UNP_DROPPED UNP_PRIVATE3
5df2d9a0
SZ
63
64#define UNP_ISATTACHED(unp) \
65 ((unp) != NULL && ((unp)->unp_flags & UNP_DETACHED) == 0)
e63aadf9 66
c2bfe86d
SZ
67#ifdef INVARIANTS
68#define UNP_ASSERT_TOKEN_HELD(unp) \
69 ASSERT_LWKT_TOKEN_HELD(lwkt_token_pool_lookup((unp)))
70#else /* !INVARIANTS */
71#define UNP_ASSERT_TOKEN_HELD(unp)
72#endif /* INVARIANTS */
73
82b1d9f2
MD
74typedef struct unp_defdiscard {
75 struct unp_defdiscard *next;
76 struct file *fp;
77} *unp_defdiscard_t;
78
386d5278 79static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct");
984263bc
MD
80static unp_gen_t unp_gencnt;
81static u_int unp_count;
82
83static struct unp_head unp_shead, unp_dhead;
84
a3c18566 85static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token);
82b1d9f2
MD
86static int unp_defdiscard_nest;
87static unp_defdiscard_t unp_defdiscard_base;
6cef7136 88
984263bc
MD
89/*
90 * Unix communications domain.
91 *
92 * TODO:
3a6117bb 93 * RDM
984263bc
MD
94 * rethink name space problems
95 * need a proper out-of-band
96 * lock pushdown
97 */
98static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
2ad080fe 99static ino_t unp_ino = 1; /* prototype for fake inode numbers */
ba87a4ab 100static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin, "unp_ino_spin");
984263bc 101
e4700d00 102static int unp_attach (struct socket *, struct pru_attach_info *);
402ed7e1
RG
103static void unp_detach (struct unpcb *);
104static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *);
105static int unp_connect (struct socket *,struct sockaddr *,
106 struct thread *);
16e0b14d 107static void unp_disconnect(struct unpcb *, int);
402ed7e1 108static void unp_shutdown (struct unpcb *);
402ed7e1 109static void unp_gc (void);
2dd63755
MD
110static int unp_gc_clearmarks(struct file *, void *);
111static int unp_gc_checkmarks(struct file *, void *);
112static int unp_gc_checkrefs(struct file *, void *);
ea8f324c 113static int unp_revoke_gc_check(struct file *, void *);
2dd63755
MD
114static void unp_scan (struct mbuf *, void (*)(struct file *, void *),
115 void *data);
116static void unp_mark (struct file *, void *data);
117static void unp_discard (struct file *, void *);
402ed7e1
RG
118static int unp_internalize (struct mbuf *, struct thread *);
119static int unp_listen (struct unpcb *, struct thread *);
f3a2d8c4 120static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd);
c2bfe86d
SZ
121static int unp_find_lockref(struct sockaddr *nam, struct thread *td,
122 short type, struct unpcb **unp_ret);
a4095867 123static int unp_connect_pair(struct unpcb *unp, struct unpcb *unp2);
16e0b14d 124static void unp_drop(struct unpcb *unp, int error);
984263bc 125
76d4bfa3 126/*
524d0e3c
MD
127 * SMP Considerations:
128 *
129 * Since unp_token will be automaticly released upon execution of
130 * blocking code, we need to reference unp_conn before any possible
131 * blocking code to prevent it from being ripped behind our back.
132 *
133 * Any adjustment to unp->unp_conn requires both the global unp_token
134 * AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held.
135 *
136 * Any access to so_pcb to obtain unp requires the pool token for
137 * unp to be held.
76d4bfa3
SZ
138 */
139
140/* NOTE: unp_token MUST be held */
141static __inline void
142unp_reference(struct unpcb *unp)
143{
144 atomic_add_int(&unp->unp_refcnt, 1);
145}
146
147/* NOTE: unp_token MUST be held */
148static __inline void
149unp_free(struct unpcb *unp)
150{
151 KKASSERT(unp->unp_refcnt > 0);
152 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1)
153 unp_detach(unp);
154}
155
a10b308d
SZ
156static __inline struct unpcb *
157unp_getsocktoken(struct socket *so)
158{
159 struct unpcb *unp;
160
161 /*
162 * The unp pointer is invalid until we verify that it is
163 * good by re-checking so_pcb AFTER obtaining the token.
164 */
165 while ((unp = so->so_pcb) != NULL) {
166 lwkt_getpooltoken(unp);
167 if (unp == so->so_pcb)
168 break;
169 lwkt_relpooltoken(unp);
170 }
171 return unp;
172}
173
4b7795a3 174static __inline void
a10b308d
SZ
175unp_reltoken(struct unpcb *unp)
176{
177 if (unp != NULL)
178 lwkt_relpooltoken(unp);
179}
180
4b7795a3
SZ
181static __inline void
182unp_setflags(struct unpcb *unp, int flags)
183{
184 atomic_set_int(&unp->unp_flags, flags);
185}
186
187static __inline void
188unp_clrflags(struct unpcb *unp, int flags)
189{
190 atomic_clear_int(&unp->unp_flags, flags);
191}
192
6cef7136
MD
193/*
194 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
195 * will sofree() it when we return.
196 */
002c1265
MD
197static void
198uipc_abort(netmsg_t msg)
984263bc 199{
6cef7136
MD
200 struct unpcb *unp;
201 int error;
984263bc 202
6cef7136 203 lwkt_gettoken(&unp_token);
5df2d9a0
SZ
204 unp = unp_getsocktoken(msg->base.nm_so);
205
e63aadf9 206 if (UNP_ISATTACHED(unp)) {
5df2d9a0 207 unp_setflags(unp, UNP_DETACHED);
6cef7136 208 unp_drop(unp, ECONNABORTED);
76d4bfa3 209 unp_free(unp);
6cef7136
MD
210 error = 0;
211 } else {
212 error = EINVAL;
213 }
5df2d9a0
SZ
214
215 unp_reltoken(unp);
6cef7136
MD
216 lwkt_reltoken(&unp_token);
217
002c1265 218 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
219}
220
002c1265
MD
221static void
222uipc_accept(netmsg_t msg)
984263bc 223{
6cef7136 224 struct unpcb *unp;
002c1265 225 int error;
984263bc 226
6cef7136 227 lwkt_gettoken(&unp_token);
002c1265 228 unp = msg->base.nm_so->so_pcb;
e63aadf9 229 if (!UNP_ISATTACHED(unp)) {
002c1265 230 error = EINVAL;
984263bc 231 } else {
76d4bfa3
SZ
232 struct unpcb *unp2 = unp->unp_conn;
233
002c1265
MD
234 /*
235 * Pass back name of connected socket,
236 * if it was bound and we are still connected
237 * (our peer may have closed already!).
238 */
76d4bfa3
SZ
239 if (unp2 && unp2->unp_addr) {
240 unp_reference(unp2);
002c1265 241 *msg->accept.nm_nam = dup_sockaddr(
76d4bfa3
SZ
242 (struct sockaddr *)unp2->unp_addr);
243 unp_free(unp2);
002c1265 244 } else {
51295aee 245 *msg->accept.nm_nam = dup_sockaddr(&sun_noname);
002c1265
MD
246 }
247 error = 0;
984263bc 248 }
6cef7136 249 lwkt_reltoken(&unp_token);
002c1265 250 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
251}
252
002c1265
MD
253static void
254uipc_attach(netmsg_t msg)
984263bc 255{
6cef7136
MD
256 struct unpcb *unp;
257 int error;
258
259 lwkt_gettoken(&unp_token);
002c1265 260 unp = msg->base.nm_so->so_pcb;
e63aadf9
SZ
261 KASSERT(unp == NULL, ("double unp attach"));
262 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai);
6cef7136 263 lwkt_reltoken(&unp_token);
002c1265 264 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
265}
266
002c1265
MD
267static void
268uipc_bind(netmsg_t msg)
984263bc 269{
6cef7136
MD
270 struct unpcb *unp;
271 int error;
984263bc 272
6cef7136 273 lwkt_gettoken(&unp_token);
002c1265 274 unp = msg->base.nm_so->so_pcb;
e63aadf9 275 if (UNP_ISATTACHED(unp))
002c1265 276 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td);
6cef7136
MD
277 else
278 error = EINVAL;
279 lwkt_reltoken(&unp_token);
002c1265 280 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
281}
282
002c1265
MD
283static void
284uipc_connect(netmsg_t msg)
984263bc 285{
6cef7136 286 int error;
984263bc 287
c2bfe86d
SZ
288 error = unp_connect(msg->base.nm_so, msg->connect.nm_nam,
289 msg->connect.nm_td);
002c1265 290 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
291}
292
002c1265
MD
293static void
294uipc_connect2(netmsg_t msg)
984263bc 295{
6cef7136 296 int error;
984263bc 297
c2bfe86d 298 error = unp_connect2(msg->connect2.nm_so1, msg->connect2.nm_so2);
002c1265 299 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
300}
301
302/* control is EOPNOTSUPP */
303
002c1265
MD
304static void
305uipc_detach(netmsg_t msg)
984263bc 306{
6cef7136
MD
307 struct unpcb *unp;
308 int error;
984263bc 309
6cef7136 310 lwkt_gettoken(&unp_token);
5df2d9a0
SZ
311 unp = unp_getsocktoken(msg->base.nm_so);
312
e63aadf9 313 if (UNP_ISATTACHED(unp)) {
5df2d9a0 314 unp_setflags(unp, UNP_DETACHED);
16e0b14d 315 unp_drop(unp, 0);
76d4bfa3 316 unp_free(unp);
6cef7136
MD
317 error = 0;
318 } else {
319 error = EINVAL;
320 }
5df2d9a0
SZ
321
322 unp_reltoken(unp);
6cef7136 323 lwkt_reltoken(&unp_token);
5df2d9a0 324
002c1265 325 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
326}
327
002c1265
MD
328static void
329uipc_disconnect(netmsg_t msg)
984263bc 330{
6cef7136
MD
331 struct unpcb *unp;
332 int error;
984263bc 333
6cef7136 334 lwkt_gettoken(&unp_token);
16e0b14d
SZ
335 unp = unp_getsocktoken(msg->base.nm_so);
336
e63aadf9 337 if (UNP_ISATTACHED(unp)) {
16e0b14d 338 unp_disconnect(unp, 0);
6cef7136
MD
339 error = 0;
340 } else {
341 error = EINVAL;
342 }
16e0b14d
SZ
343
344 unp_reltoken(unp);
6cef7136 345 lwkt_reltoken(&unp_token);
16e0b14d 346
002c1265 347 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
348}
349
002c1265
MD
350static void
351uipc_listen(netmsg_t msg)
984263bc 352{
6cef7136
MD
353 struct unpcb *unp;
354 int error;
984263bc 355
6cef7136 356 lwkt_gettoken(&unp_token);
002c1265 357 unp = msg->base.nm_so->so_pcb;
e63aadf9 358 if (!UNP_ISATTACHED(unp) || unp->unp_vnode == NULL)
6cef7136
MD
359 error = EINVAL;
360 else
002c1265 361 error = unp_listen(unp, msg->listen.nm_td);
6cef7136 362 lwkt_reltoken(&unp_token);
002c1265 363 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
364}
365
002c1265
MD
366static void
367uipc_peeraddr(netmsg_t msg)
984263bc 368{
6cef7136
MD
369 struct unpcb *unp;
370 int error;
984263bc 371
6cef7136 372 lwkt_gettoken(&unp_token);
002c1265 373 unp = msg->base.nm_so->so_pcb;
e63aadf9 374 if (!UNP_ISATTACHED(unp)) {
6cef7136
MD
375 error = EINVAL;
376 } else if (unp->unp_conn && unp->unp_conn->unp_addr) {
76d4bfa3
SZ
377 struct unpcb *unp2 = unp->unp_conn;
378
379 unp_reference(unp2);
002c1265 380 *msg->peeraddr.nm_nam = dup_sockaddr(
76d4bfa3
SZ
381 (struct sockaddr *)unp2->unp_addr);
382 unp_free(unp2);
6cef7136
MD
383 error = 0;
384 } else {
984263bc
MD
385 /*
386 * XXX: It seems that this test always fails even when
387 * connection is established. So, this else clause is
388 * added as workaround to return PF_LOCAL sockaddr.
389 */
51295aee 390 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname);
6cef7136 391 error = 0;
984263bc 392 }
6cef7136 393 lwkt_reltoken(&unp_token);
002c1265 394 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
395}
396
002c1265
MD
397static void
398uipc_rcvd(netmsg_t msg)
984263bc 399{
76d4bfa3 400 struct unpcb *unp, *unp2;
002c1265 401 struct socket *so;
984263bc 402 struct socket *so2;
002c1265 403 int error;
984263bc 404
524d0e3c
MD
405 /*
406 * so_pcb is only modified with both the global and the unp
a10b308d 407 * pool token held.
524d0e3c 408 */
002c1265 409 so = msg->base.nm_so;
a10b308d
SZ
410 unp = unp_getsocktoken(so);
411
e63aadf9 412 if (!UNP_ISATTACHED(unp)) {
002c1265
MD
413 error = EINVAL;
414 goto done;
6cef7136
MD
415 }
416
984263bc
MD
417 switch (so->so_type) {
418 case SOCK_DGRAM:
419 panic("uipc_rcvd DGRAM?");
420 /*NOTREACHED*/
984263bc 421 case SOCK_STREAM:
91be174d 422 case SOCK_SEQPACKET:
9dc5418a 423 if (unp->unp_conn == NULL)
984263bc 424 break;
524d0e3c 425 unp2 = unp->unp_conn; /* protected by pool token */
76d4bfa3 426
984263bc 427 /*
3a6117bb
MD
428 * Because we are transfering mbufs directly to the
429 * peer socket we have to use SSB_STOP on the sender
430 * to prevent it from building up infinite mbufs.
524d0e3c
MD
431 *
432 * As in several places in this module w ehave to ref unp2
433 * to ensure that it does not get ripped out from under us
434 * if we block on the so2 token or in sowwakeup().
984263bc 435 */
76d4bfa3 436 so2 = unp2->unp_socket;
524d0e3c
MD
437 unp_reference(unp2);
438 lwkt_gettoken(&so2->so_rcv.ssb_token);
3a6117bb
MD
439 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
440 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
441 ) {
14343ad3 442 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP);
76d4bfa3 443
3a6117bb
MD
444 sowwakeup(so2);
445 }
524d0e3c
MD
446 lwkt_reltoken(&so2->so_rcv.ssb_token);
447 unp_free(unp2);
984263bc 448 break;
984263bc
MD
449 default:
450 panic("uipc_rcvd unknown socktype");
6cef7136 451 /*NOTREACHED*/
984263bc 452 }
002c1265
MD
453 error = 0;
454done:
a10b308d 455 unp_reltoken(unp);
002c1265 456 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
457}
458
459/* pru_rcvoob is EOPNOTSUPP */
460
002c1265
MD
461static void
462uipc_send(netmsg_t msg)
984263bc 463{
76d4bfa3 464 struct unpcb *unp, *unp2;
002c1265 465 struct socket *so;
984263bc 466 struct socket *so2;
002c1265
MD
467 struct mbuf *control;
468 struct mbuf *m;
6cef7136 469 int error = 0;
984263bc 470
002c1265
MD
471 so = msg->base.nm_so;
472 control = msg->send.nm_control;
473 m = msg->send.nm_m;
002c1265 474
524d0e3c
MD
475 /*
476 * so_pcb is only modified with both the global and the unp
a10b308d 477 * pool token held.
524d0e3c
MD
478 */
479 so = msg->base.nm_so;
a10b308d
SZ
480 unp = unp_getsocktoken(so);
481
e63aadf9 482 if (!UNP_ISATTACHED(unp)) {
984263bc 483 error = EINVAL;
a10b308d 484 goto release;
984263bc 485 }
524d0e3c 486
002c1265 487 if (msg->send.nm_flags & PRUS_OOB) {
984263bc
MD
488 error = EOPNOTSUPP;
489 goto release;
490 }
491
b0da0c88
MD
492 wakeup_start_delayed();
493
002c1265 494 if (control && (error = unp_internalize(control, msg->send.nm_td)))
984263bc
MD
495 goto release;
496
497 switch (so->so_type) {
498 case SOCK_DGRAM:
499 {
500 struct sockaddr *from;
501
002c1265 502 if (msg->send.nm_addr) {
984263bc
MD
503 if (unp->unp_conn) {
504 error = EISCONN;
505 break;
506 }
3d3093e5
SZ
507 error = unp_find_lockref(msg->send.nm_addr,
508 msg->send.nm_td, so->so_type, &unp2);
984263bc
MD
509 if (error)
510 break;
3d3093e5
SZ
511 /*
512 * NOTE:
513 * unp2 is locked and referenced.
514 *
515 * We could unlock unp2 now, since it was checked
516 * and referenced.
517 */
518 unp_reltoken(unp2);
984263bc 519 } else {
9dc5418a 520 if (unp->unp_conn == NULL) {
984263bc
MD
521 error = ENOTCONN;
522 break;
523 }
3d3093e5
SZ
524 /* XXX racy. */
525 unp2 = unp->unp_conn;
526 unp_reference(unp2);
984263bc 527 }
3d3093e5 528 /* NOTE: unp2 is referenced. */
76d4bfa3 529 so2 = unp2->unp_socket;
3d3093e5 530
984263bc
MD
531 if (unp->unp_addr)
532 from = (struct sockaddr *)unp->unp_addr;
533 else
534 from = &sun_noname;
74d8470d
MD
535
536 lwkt_gettoken(&so2->so_rcv.ssb_token);
6d49aa6f 537 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) {
984263bc 538 sorwakeup(so2);
9dc5418a
JH
539 m = NULL;
540 control = NULL;
6d49aa6f 541 } else {
984263bc 542 error = ENOBUFS;
6d49aa6f 543 }
74d8470d 544 lwkt_reltoken(&so2->so_rcv.ssb_token);
76d4bfa3
SZ
545
546 unp_free(unp2);
984263bc
MD
547 break;
548 }
549
550 case SOCK_STREAM:
91be174d 551 case SOCK_SEQPACKET:
984263bc
MD
552 /* Connect if not connected yet. */
553 /*
554 * Note: A better implementation would complain
555 * if not equal to the peer's address.
556 */
9dc5418a 557 if (!(so->so_state & SS_ISCONNECTED)) {
002c1265
MD
558 if (msg->send.nm_addr) {
559 error = unp_connect(so,
560 msg->send.nm_addr,
561 msg->send.nm_td);
984263bc
MD
562 if (error)
563 break; /* XXX */
564 } else {
565 error = ENOTCONN;
566 break;
567 }
568 }
569
570 if (so->so_state & SS_CANTSENDMORE) {
571 error = EPIPE;
572 break;
573 }
9dc5418a 574 if (unp->unp_conn == NULL)
984263bc 575 panic("uipc_send connected but no connection?");
76d4bfa3
SZ
576 unp2 = unp->unp_conn;
577 so2 = unp2->unp_socket;
578
579 unp_reference(unp2);
580
984263bc
MD
581 /*
582 * Send to paired receive port, and then reduce
583 * send buffer hiwater marks to maintain backpressure.
584 * Wake up readers.
585 */
74d8470d 586 lwkt_gettoken(&so2->so_rcv.ssb_token);
984263bc 587 if (control) {
ee38db90 588 if (ssb_appendcontrol(&so2->so_rcv, m, control)) {
9dc5418a 589 control = NULL;
ee38db90
MD
590 m = NULL;
591 }
91be174d
MD
592 } else if (so->so_type == SOCK_SEQPACKET) {
593 sbappendrecord(&so2->so_rcv.sb, m);
594 m = NULL;
6d49aa6f
MD
595 } else {
596 sbappend(&so2->so_rcv.sb, m);
ee38db90 597 m = NULL;
6d49aa6f 598 }
3a6117bb
MD
599
600 /*
601 * Because we are transfering mbufs directly to the
602 * peer socket we have to use SSB_STOP on the sender
603 * to prevent it from building up infinite mbufs.
604 */
605 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
606 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
607 ) {
14343ad3 608 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP);
3a6117bb 609 }
74d8470d 610 lwkt_reltoken(&so2->so_rcv.ssb_token);
984263bc 611 sorwakeup(so2);
76d4bfa3
SZ
612
613 unp_free(unp2);
984263bc
MD
614 break;
615
616 default:
617 panic("uipc_send unknown socktype");
618 }
619
620 /*
9dc5418a 621 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN.
984263bc 622 */
002c1265 623 if (msg->send.nm_flags & PRUS_EOF) {
984263bc
MD
624 socantsendmore(so);
625 unp_shutdown(unp);
626 }
627
628 if (control && error != 0)
629 unp_dispose(control);
984263bc 630release:
a10b308d 631 unp_reltoken(unp);
b0da0c88 632 wakeup_end_delayed();
6cef7136 633
984263bc
MD
634 if (control)
635 m_freem(control);
636 if (m)
637 m_freem(m);
002c1265 638 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
639}
640
2ad080fe
MD
641/*
642 * MPSAFE
643 */
002c1265
MD
644static void
645uipc_sense(netmsg_t msg)
984263bc 646{
6cef7136 647 struct unpcb *unp;
002c1265
MD
648 struct socket *so;
649 struct stat *sb;
650 int error;
984263bc 651
002c1265
MD
652 so = msg->base.nm_so;
653 sb = msg->sense.nm_stat;
524d0e3c
MD
654
655 /*
656 * so_pcb is only modified with both the global and the unp
a10b308d 657 * pool token held.
524d0e3c 658 */
a10b308d
SZ
659 unp = unp_getsocktoken(so);
660
e63aadf9 661 if (!UNP_ISATTACHED(unp)) {
002c1265
MD
662 error = EINVAL;
663 goto done;
6cef7136 664 }
524d0e3c 665
6d49aa6f 666 sb->st_blksize = so->so_snd.ssb_hiwat;
984263bc 667 sb->st_dev = NOUDEV;
2ad080fe 668 if (unp->unp_ino == 0) { /* make up a non-zero inode number */
287a8577 669 spin_lock(&unp_ino_spin);
2ad080fe 670 unp->unp_ino = unp_ino++;
287a8577 671 spin_unlock(&unp_ino_spin);
2ad080fe 672 }
984263bc 673 sb->st_ino = unp->unp_ino;
002c1265
MD
674 error = 0;
675done:
a10b308d 676 unp_reltoken(unp);
002c1265 677 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
678}
679
002c1265
MD
680static void
681uipc_shutdown(netmsg_t msg)
984263bc 682{
002c1265 683 struct socket *so;
6cef7136
MD
684 struct unpcb *unp;
685 int error;
984263bc 686
524d0e3c
MD
687 /*
688 * so_pcb is only modified with both the global and the unp
a10b308d 689 * pool token held.
524d0e3c 690 */
002c1265 691 so = msg->base.nm_so;
a10b308d
SZ
692 unp = unp_getsocktoken(so);
693
e63aadf9 694 if (UNP_ISATTACHED(unp)) {
6cef7136
MD
695 socantsendmore(so);
696 unp_shutdown(unp);
697 error = 0;
698 } else {
699 error = EINVAL;
700 }
a10b308d
SZ
701
702 unp_reltoken(unp);
002c1265 703 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
704}
705
002c1265
MD
706static void
707uipc_sockaddr(netmsg_t msg)
984263bc 708{
524d0e3c 709 struct socket *so;
6cef7136
MD
710 struct unpcb *unp;
711 int error;
984263bc 712
524d0e3c
MD
713 /*
714 * so_pcb is only modified with both the global and the unp
a10b308d 715 * pool token held.
524d0e3c
MD
716 */
717 so = msg->base.nm_so;
a10b308d
SZ
718 unp = unp_getsocktoken(so);
719
e63aadf9 720 if (UNP_ISATTACHED(unp)) {
002c1265
MD
721 if (unp->unp_addr) {
722 *msg->sockaddr.nm_nam =
723 dup_sockaddr((struct sockaddr *)unp->unp_addr);
724 }
6cef7136
MD
725 error = 0;
726 } else {
727 error = EINVAL;
728 }
a10b308d
SZ
729
730 unp_reltoken(unp);
002c1265 731 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
732}
733
734struct pr_usrreqs uipc_usrreqs = {
fa5e758c
MD
735 .pru_abort = uipc_abort,
736 .pru_accept = uipc_accept,
737 .pru_attach = uipc_attach,
738 .pru_bind = uipc_bind,
739 .pru_connect = uipc_connect,
740 .pru_connect2 = uipc_connect2,
002c1265 741 .pru_control = pr_generic_notsupp,
fa5e758c
MD
742 .pru_detach = uipc_detach,
743 .pru_disconnect = uipc_disconnect,
744 .pru_listen = uipc_listen,
745 .pru_peeraddr = uipc_peeraddr,
746 .pru_rcvd = uipc_rcvd,
002c1265 747 .pru_rcvoob = pr_generic_notsupp,
fa5e758c
MD
748 .pru_send = uipc_send,
749 .pru_sense = uipc_sense,
750 .pru_shutdown = uipc_shutdown,
751 .pru_sockaddr = uipc_sockaddr,
752 .pru_sosend = sosend,
8b5c39bb 753 .pru_soreceive = soreceive
984263bc
MD
754};
755
002c1265
MD
756void
757uipc_ctloutput(netmsg_t msg)
984263bc 758{
002c1265
MD
759 struct socket *so;
760 struct sockopt *sopt;
6cef7136 761 struct unpcb *unp;
95c20811 762 int error = 0;
984263bc 763
6cef7136 764 lwkt_gettoken(&unp_token);
002c1265
MD
765 so = msg->base.nm_so;
766 sopt = msg->ctloutput.nm_sopt;
6cef7136
MD
767 unp = so->so_pcb;
768
984263bc
MD
769 switch (sopt->sopt_dir) {
770 case SOPT_GET:
771 switch (sopt->sopt_name) {
772 case LOCAL_PEERCRED:
773 if (unp->unp_flags & UNP_HAVEPC)
95c20811
AE
774 soopt_from_kbuf(sopt, &unp->unp_peercred,
775 sizeof(unp->unp_peercred));
984263bc
MD
776 else {
777 if (so->so_type == SOCK_STREAM)
778 error = ENOTCONN;
91be174d
MD
779 else if (so->so_type == SOCK_SEQPACKET)
780 error = ENOTCONN;
984263bc
MD
781 else
782 error = EINVAL;
783 }
784 break;
785 default:
786 error = EOPNOTSUPP;
787 break;
788 }
789 break;
790 case SOPT_SET:
791 default:
792 error = EOPNOTSUPP;
793 break;
794 }
6cef7136 795 lwkt_reltoken(&unp_token);
002c1265 796 lwkt_replymsg(&msg->lmsg, error);
984263bc
MD
797}
798
799/*
800 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
801 * for stream sockets, although the total for sender and receiver is
802 * actually only PIPSIZ.
0560a274 803 *
984263bc
MD
804 * Datagram sockets really use the sendspace as the maximum datagram size,
805 * and don't really want to reserve the sendspace. Their recvspace should
806 * be large enough for at least one max-size datagram plus address.
0560a274
MD
807 *
808 * We want the local send/recv space to be significant larger then lo0's
809 * mtu of 16384.
984263bc
MD
810 */
811#ifndef PIPSIZ
0560a274 812#define PIPSIZ 57344
984263bc
MD
813#endif
814static u_long unpst_sendspace = PIPSIZ;
815static u_long unpst_recvspace = PIPSIZ;
816static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
817static u_long unpdg_recvspace = 4*1024;
818
819static int unp_rights; /* file descriptors in flight */
ba87a4ab 820static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin");
984263bc 821
91be174d 822SYSCTL_DECL(_net_local_seqpacket);
984263bc
MD
823SYSCTL_DECL(_net_local_stream);
824SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
c9458756 825 &unpst_sendspace, 0, "Size of stream socket send buffer");
984263bc 826SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
c9458756 827 &unpst_recvspace, 0, "Size of stream socket receive buffer");
9dc5418a 828
984263bc
MD
829SYSCTL_DECL(_net_local_dgram);
830SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
c9458756 831 &unpdg_sendspace, 0, "Max datagram socket size");
984263bc 832SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
c9458756 833 &unpdg_recvspace, 0, "Size of datagram socket receive buffer");
9dc5418a 834
984263bc 835SYSCTL_DECL(_net_local);
c9458756
SG
836SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
837 "File descriptors in flight");
984263bc
MD
838
839static int
e4700d00 840unp_attach(struct socket *so, struct pru_attach_info *ai)
984263bc 841{
1fd87d54 842 struct unpcb *unp;
984263bc
MD
843 int error;
844
6cef7136 845 lwkt_gettoken(&unp_token);
002c1265 846
6d49aa6f 847 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) {
984263bc 848 switch (so->so_type) {
984263bc 849 case SOCK_STREAM:
91be174d 850 case SOCK_SEQPACKET:
e4700d00
JH
851 error = soreserve(so, unpst_sendspace, unpst_recvspace,
852 ai->sb_rlimit);
984263bc
MD
853 break;
854
855 case SOCK_DGRAM:
e4700d00
JH
856 error = soreserve(so, unpdg_sendspace, unpdg_recvspace,
857 ai->sb_rlimit);
984263bc
MD
858 break;
859
860 default:
861 panic("unp_attach");
862 }
863 if (error)
6cef7136 864 goto failed;
984263bc 865 }
a5e93826
MD
866
867 /*
868 * In order to support sendfile we have to set either SSB_STOPSUPP
869 * or SSB_PREALLOC. Unix domain sockets use the SSB_STOP flow
870 * control mechanism.
871 */
872 if (so->so_type == SOCK_STREAM) {
873 atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP);
874 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP);
875 }
876
c444f2f5 877 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK);
6cef7136
MD
878 if (unp == NULL) {
879 error = ENOBUFS;
880 goto failed;
881 }
76d4bfa3 882 unp->unp_refcnt = 1;
984263bc
MD
883 unp->unp_gencnt = ++unp_gencnt;
884 unp_count++;
885 LIST_INIT(&unp->unp_refs);
886 unp->unp_socket = so;
e4700d00 887 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */
984263bc
MD
888 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
889 : &unp_shead, unp, unp_link);
890 so->so_pcb = (caddr_t)unp;
6cef7136 891 soreference(so);
6cef7136
MD
892 error = 0;
893failed:
894 lwkt_reltoken(&unp_token);
895 return error;
984263bc
MD
896}
897
898static void
9dc5418a 899unp_detach(struct unpcb *unp)
984263bc 900{
6cef7136
MD
901 struct socket *so;
902
903 lwkt_gettoken(&unp_token);
524d0e3c 904 lwkt_getpooltoken(unp);
6cef7136 905
524d0e3c 906 LIST_REMOVE(unp, unp_link); /* both tokens required */
984263bc
MD
907 unp->unp_gencnt = ++unp_gencnt;
908 --unp_count;
909 if (unp->unp_vnode) {
9dc5418a 910 unp->unp_vnode->v_socket = NULL;
984263bc 911 vrele(unp->unp_vnode);
9dc5418a 912 unp->unp_vnode = NULL;
984263bc 913 }
984263bc 914 soisdisconnected(unp->unp_socket);
6cef7136 915 so = unp->unp_socket;
524d0e3c
MD
916 soreference(so); /* for delayed sorflush */
917 KKASSERT(so->so_pcb == unp);
918 so->so_pcb = NULL; /* both tokens required */
6cef7136
MD
919 unp->unp_socket = NULL;
920 sofree(so); /* remove pcb ref */
921
984263bc
MD
922 if (unp_rights) {
923 /*
924 * Normally the receive buffer is flushed later,
925 * in sofree, but if our receive buffer holds references
926 * to descriptors that are now garbage, we will dispose
927 * of those descriptor references after the garbage collector
928 * gets them (resulting in a "panic: closef: count < 0").
929 */
6cef7136 930 sorflush(so);
984263bc
MD
931 unp_gc();
932 }
6cef7136 933 sofree(so);
524d0e3c 934 lwkt_relpooltoken(unp);
6cef7136
MD
935 lwkt_reltoken(&unp_token);
936
16e0b14d
SZ
937 KASSERT(unp->unp_conn == NULL, ("unp is still connected"));
938 KASSERT(LIST_EMPTY(&unp->unp_refs), ("unp still has references"));
939
984263bc 940 if (unp->unp_addr)
efda3bd0 941 kfree(unp->unp_addr, M_SONAME);
386d5278 942 kfree(unp, M_UNPCB);
984263bc
MD
943}
944
945static int
dadab5e9 946unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td)
984263bc 947{
dadab5e9 948 struct proc *p = td->td_proc;
984263bc 949 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
dadab5e9 950 struct vnode *vp;
984263bc
MD
951 struct vattr vattr;
952 int error, namelen;
fad57d0e 953 struct nlookupdata nd;
984263bc
MD
954 char buf[SOCK_MAXADDRLEN];
955
6cef7136
MD
956 lwkt_gettoken(&unp_token);
957 if (unp->unp_vnode != NULL) {
958 error = EINVAL;
959 goto failed;
960 }
984263bc 961 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
6cef7136
MD
962 if (namelen <= 0) {
963 error = EINVAL;
964 goto failed;
965 }
984263bc
MD
966 strncpy(buf, soun->sun_path, namelen);
967 buf[namelen] = 0; /* null-terminate the string */
5312fa43
MD
968 error = nlookup_init(&nd, buf, UIO_SYSSPACE,
969 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP);
fad57d0e
MD
970 if (error == 0)
971 error = nlookup(&nd);
28623bf9 972 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL)
fad57d0e 973 error = EADDRINUSE;
984263bc 974 if (error)
fad57d0e
MD
975 goto done;
976
984263bc
MD
977 VATTR_NULL(&vattr);
978 vattr.va_type = VSOCK;
979 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
5312fa43 980 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr);
fad57d0e 981 if (error == 0) {
524d0e3c
MD
982 if (unp->unp_vnode == NULL) {
983 vp->v_socket = unp->unp_socket;
984 unp->unp_vnode = vp;
985 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam);
986 vn_unlock(vp);
987 } else {
988 vput(vp); /* late race */
989 error = EINVAL;
990 }
fad57d0e
MD
991 }
992done:
993 nlookup_done(&nd);
6cef7136
MD
994failed:
995 lwkt_reltoken(&unp_token);
fad57d0e 996 return (error);
984263bc
MD
997}
998
999static int
dadab5e9 1000unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
984263bc 1001{
c2bfe86d
SZ
1002 struct unpcb *unp, *unp2;
1003 int error, flags = 0;
984263bc 1004
6cef7136 1005 lwkt_gettoken(&unp_token);
dadab5e9 1006
c2bfe86d
SZ
1007 unp = unp_getsocktoken(so);
1008 if (!UNP_ISATTACHED(unp)) {
6cef7136
MD
1009 error = EINVAL;
1010 goto failed;
1011 }
984263bc 1012
c2bfe86d
SZ
1013 if ((unp->unp_flags & UNP_CONNECTING) || unp->unp_conn != NULL) {
1014 error = EISCONN;
6cef7136 1015 goto failed;
984263bc 1016 }
c2bfe86d
SZ
1017
1018 flags = UNP_CONNECTING;
1019 unp_setflags(unp, flags);
1020
1021 error = unp_find_lockref(nam, td, so->so_type, &unp2);
984263bc 1022 if (error)
c2bfe86d
SZ
1023 goto failed;
1024 /*
1025 * NOTE:
1026 * unp2 is locked and referenced.
1027 */
1028
984263bc 1029 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
c2bfe86d
SZ
1030 struct socket *so2, *so3;
1031 struct unpcb *unp3;
1032
1033 so2 = unp2->unp_socket;
9dc5418a 1034 if (!(so2->so_options & SO_ACCEPTCONN) ||
c2bfe86d
SZ
1035 (so3 = sonewconn_faddr(so2, 0, NULL,
1036 TRUE /* keep ref */)) == NULL) {
984263bc 1037 error = ECONNREFUSED;
c2bfe86d 1038 goto done;
984263bc 1039 }
c2bfe86d
SZ
1040 /* so3 has a socket reference. */
1041
1042 unp3 = unp_getsocktoken(so3);
1043 if (!UNP_ISATTACHED(unp3)) {
1044 unp_reltoken(unp3);
1045 /*
1046 * Already aborted; we only need to drop the
1047 * socket reference held by sonewconn_faddr().
1048 */
524d0e3c 1049 sofree(so3);
c2bfe86d
SZ
1050 error = ECONNREFUSED;
1051 goto done;
524d0e3c 1052 }
c2bfe86d
SZ
1053 unp_reference(unp3);
1054 /*
1055 * NOTE:
1056 * unp3 is locked and referenced.
1057 */
1058
1059 /*
1060 * Release so3 socket reference held by sonewconn_faddr().
1061 * Since we have referenced unp3, neither unp3 nor so3 will
1062 * be destroyed here.
1063 */
1064 sofree(so3);
1065
1066 if (unp2->unp_addr != NULL) {
984263bc 1067 unp3->unp_addr = (struct sockaddr_un *)
c2bfe86d
SZ
1068 dup_sockaddr((struct sockaddr *)unp2->unp_addr);
1069 }
984263bc
MD
1070
1071 /*
1072 * unp_peercred management:
1073 *
1074 * The connecter's (client's) credentials are copied
1075 * from its process structure at the time of connect()
1076 * (which is now).
1077 */
c2bfe86d 1078 cru2x(td->td_proc->p_ucred, &unp3->unp_peercred);
4b7795a3 1079 unp_setflags(unp3, UNP_HAVEPC);
984263bc
MD
1080 /*
1081 * The receiver's (server's) credentials are copied
1082 * from the unp_peercred member of socket on which the
1083 * former called listen(); unp_listen() cached that
1084 * process's credentials at that time so we can use
1085 * them now.
1086 */
1087 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
1088 ("unp_connect: listener without cached peercred"));
1089 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
1090 sizeof(unp->unp_peercred));
4b7795a3 1091 unp_setflags(unp, UNP_HAVEPC);
984263bc 1092
a4095867
SZ
1093 error = unp_connect_pair(unp, unp3);
1094 if (error) {
1095 /* XXX we need a better name */
1096 soabort_oncpu(so3);
1097 }
c2bfe86d
SZ
1098
1099 /* Done with unp3 */
1100 unp_free(unp3);
1101 unp_reltoken(unp3);
1102 } else {
a4095867 1103 error = unp_connect_pair(unp, unp2);
984263bc 1104 }
c2bfe86d
SZ
1105done:
1106 unp_free(unp2);
1107 unp_reltoken(unp2);
6cef7136 1108failed:
c2bfe86d
SZ
1109 if (flags)
1110 unp_clrflags(unp, flags);
1111 unp_reltoken(unp);
1112
6cef7136 1113 lwkt_reltoken(&unp_token);
984263bc
MD
1114 return (error);
1115}
1116
524d0e3c
MD
1117/*
1118 * Connect two unix domain sockets together.
1119 *
1120 * NOTE: Semantics for any change to unp_conn requires that the per-unp
1121 * pool token also be held.
1122 */
984263bc 1123int
9dc5418a 1124unp_connect2(struct socket *so, struct socket *so2)
984263bc 1125{
c2bfe86d
SZ
1126 struct unpcb *unp, *unp2;
1127 int error;
984263bc 1128
6cef7136 1129 lwkt_gettoken(&unp_token);
6cef7136
MD
1130 if (so2->so_type != so->so_type) {
1131 lwkt_reltoken(&unp_token);
984263bc 1132 return (EPROTOTYPE);
6cef7136 1133 }
a10b308d
SZ
1134 unp = unp_getsocktoken(so);
1135 unp2 = unp_getsocktoken(so2);
524d0e3c 1136
c2bfe86d
SZ
1137 if (!UNP_ISATTACHED(unp)) {
1138 error = EINVAL;
1139 goto done;
1140 }
1141 if (!UNP_ISATTACHED(unp2)) {
1142 error = ECONNREFUSED;
1143 goto done;
1144 }
984263bc 1145
c2bfe86d
SZ
1146 if (unp->unp_conn != NULL) {
1147 error = EISCONN;
1148 goto done;
1149 }
1150 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
1151 unp2->unp_conn != NULL) {
1152 error = EISCONN;
1153 goto done;
984263bc 1154 }
a10b308d 1155
a4095867 1156 error = unp_connect_pair(unp, unp2);
c2bfe86d 1157done:
a10b308d
SZ
1158 unp_reltoken(unp2);
1159 unp_reltoken(unp);
6cef7136 1160 lwkt_reltoken(&unp_token);
c2bfe86d 1161 return (error);
984263bc
MD
1162}
1163
524d0e3c
MD
1164/*
1165 * Disconnect a unix domain socket pair.
1166 *
1167 * NOTE: Semantics for any change to unp_conn requires that the per-unp
1168 * pool token also be held.
1169 */
984263bc 1170static void
16e0b14d 1171unp_disconnect(struct unpcb *unp, int error)
984263bc 1172{
16e0b14d 1173 struct socket *so = unp->unp_socket;
6cef7136 1174 struct unpcb *unp2;
984263bc 1175
16e0b14d
SZ
1176 ASSERT_LWKT_TOKEN_HELD(&unp_token);
1177 UNP_ASSERT_TOKEN_HELD(unp);
1178
1179 if (error)
1180 so->so_error = error;
6cef7136 1181
524d0e3c
MD
1182 while ((unp2 = unp->unp_conn) != NULL) {
1183 lwkt_getpooltoken(unp2);
1184 if (unp2 == unp->unp_conn)
1185 break;
1186 lwkt_relpooltoken(unp2);
6cef7136 1187 }
524d0e3c 1188 if (unp2 == NULL)
16e0b14d
SZ
1189 return;
1190 /* unp2 is locked. */
1191
1192 KASSERT((unp2->unp_flags & UNP_DROPPED) == 0, ("unp2 was dropped"));
984263bc 1193
9dc5418a
JH
1194 unp->unp_conn = NULL;
1195
16e0b14d 1196 switch (so->so_type) {
984263bc
MD
1197 case SOCK_DGRAM:
1198 LIST_REMOVE(unp, unp_reflink);
16e0b14d 1199 soclrstate(so, SS_ISCONNECTED);
984263bc 1200 break;
76d4bfa3 1201
984263bc 1202 case SOCK_STREAM:
91be174d 1203 case SOCK_SEQPACKET:
16e0b14d
SZ
1204 /*
1205 * Keep a reference before clearing the unp_conn
1206 * to avoid racing uipc_detach()/uipc_abort() in
1207 * other thread.
1208 */
76d4bfa3 1209 unp_reference(unp2);
16e0b14d 1210 KASSERT(unp2->unp_conn == unp, ("unp_conn mismatch"));
9dc5418a 1211 unp2->unp_conn = NULL;
76d4bfa3 1212
16e0b14d 1213 soisdisconnected(so);
984263bc 1214 soisdisconnected(unp2->unp_socket);
76d4bfa3
SZ
1215
1216 unp_free(unp2);
984263bc
MD
1217 break;
1218 }
16e0b14d 1219
524d0e3c 1220 lwkt_relpooltoken(unp2);
984263bc
MD
1221}
1222
1223#ifdef notdef
1224void
9dc5418a 1225unp_abort(struct unpcb *unp)
984263bc 1226{
6cef7136 1227 lwkt_gettoken(&unp_token);
76d4bfa3 1228 unp_free(unp);
6cef7136 1229 lwkt_reltoken(&unp_token);
984263bc
MD
1230}
1231#endif
1232
1233static int
dadab5e9 1234prison_unpcb(struct thread *td, struct unpcb *unp)
984263bc 1235{
dadab5e9
MD
1236 struct proc *p;
1237
1238 if (td == NULL)
1239 return (0);
1240 if ((p = td->td_proc) == NULL)
1241 return (0);
41c20dac 1242 if (!p->p_ucred->cr_prison)
984263bc
MD
1243 return (0);
1244 if (p->p_fd->fd_rdir == unp->unp_rvnode)
1245 return (0);
1246 return (1);
1247}
1248
1249static int
1250unp_pcblist(SYSCTL_HANDLER_ARGS)
1251{
1252 int error, i, n;
1253 struct unpcb *unp, **unp_list;
1254 unp_gen_t gencnt;
984263bc
MD
1255 struct unp_head *head;
1256
1257 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
1258
41c20dac
MD
1259 KKASSERT(curproc != NULL);
1260
984263bc
MD
1261 /*
1262 * The process of preparing the PCB list is too time-consuming and
1263 * resource-intensive to repeat twice on every request.
1264 */
9dc5418a 1265 if (req->oldptr == NULL) {
984263bc 1266 n = unp_count;
8d7c364e 1267 req->oldidx = (n + n/8) * sizeof(struct xunpcb);
984263bc
MD
1268 return 0;
1269 }
1270
9dc5418a 1271 if (req->newptr != NULL)
984263bc
MD
1272 return EPERM;
1273
6cef7136
MD
1274 lwkt_gettoken(&unp_token);
1275
984263bc
MD
1276 /*
1277 * OK, now we're committed to doing something.
1278 */
1279 gencnt = unp_gencnt;
1280 n = unp_count;
1281
efda3bd0 1282 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
984263bc
MD
1283
1284 for (unp = LIST_FIRST(head), i = 0; unp && i < n;
1285 unp = LIST_NEXT(unp, unp_link)) {
dadab5e9 1286 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp))
984263bc
MD
1287 unp_list[i++] = unp;
1288 }
1289 n = i; /* in case we lost some during malloc */
1290
1291 error = 0;
1292 for (i = 0; i < n; i++) {
1293 unp = unp_list[i];
1294 if (unp->unp_gencnt <= gencnt) {
1295 struct xunpcb xu;
1296 xu.xu_len = sizeof xu;
1297 xu.xu_unpp = unp;
1298 /*
1299 * XXX - need more locking here to protect against
1300 * connect/disconnect races for SMP.
1301 */
1302 if (unp->unp_addr)
1303 bcopy(unp->unp_addr, &xu.xu_addr,
1304 unp->unp_addr->sun_len);
1305 if (unp->unp_conn && unp->unp_conn->unp_addr)
1306 bcopy(unp->unp_conn->unp_addr,
1307 &xu.xu_caddr,
1308 unp->unp_conn->unp_addr->sun_len);
1309 bcopy(unp, &xu.xu_unp, sizeof *unp);
1310 sotoxsocket(unp->unp_socket, &xu.xu_socket);
1311 error = SYSCTL_OUT(req, &xu, sizeof xu);
1312 }
1313 }
6cef7136 1314 lwkt_reltoken(&unp_token);
efda3bd0 1315 kfree(unp_list, M_TEMP);
6cef7136 1316
984263bc
MD
1317 return error;
1318}
1319
1320SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1321 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1322 "List of active local datagram sockets");
1323SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1324 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1325 "List of active local stream sockets");
91be174d
MD
1326SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD,
1327 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
1328 "List of active local seqpacket stream sockets");
984263bc
MD
1329
1330static void
9dc5418a 1331unp_shutdown(struct unpcb *unp)
984263bc
MD
1332{
1333 struct socket *so;
1334
91be174d
MD
1335 if ((unp->unp_socket->so_type == SOCK_STREAM ||
1336 unp->unp_socket->so_type == SOCK_SEQPACKET) &&
1337 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) {
984263bc 1338 socantrcvmore(so);
91be174d 1339 }
984263bc
MD
1340}
1341
984263bc
MD
1342#ifdef notdef
1343void
c972a82f 1344unp_drain(void)
984263bc 1345{
6cef7136
MD
1346 lwkt_gettoken(&unp_token);
1347 lwkt_reltoken(&unp_token);
984263bc
MD
1348}
1349#endif
1350
1351int
dadab5e9 1352unp_externalize(struct mbuf *rights)
984263bc 1353{
f3a2d8c4
MD
1354 struct thread *td = curthread;
1355 struct proc *p = td->td_proc; /* XXX */
1356 struct lwp *lp = td->td_lwp;
dadab5e9
MD
1357 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1358 int *fdp;
f3a2d8c4 1359 int i;
dadab5e9
MD
1360 struct file **rp;
1361 struct file *fp;
984263bc
MD
1362 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm))
1363 / sizeof (struct file *);
1364 int f;
1365
6cef7136
MD
1366 lwkt_gettoken(&unp_token);
1367
984263bc
MD
1368 /*
1369 * if the new FD's will not fit, then we free them all
1370 */
1371 if (!fdavail(p, newfds)) {
1372 rp = (struct file **)CMSG_DATA(cm);
1373 for (i = 0; i < newfds; i++) {
1374 fp = *rp;
1375 /*
1376 * zero the pointer before calling unp_discard,
1377 * since it may end up in unp_gc()..
1378 */
4090d6ff 1379 *rp++ = NULL;
2dd63755 1380 unp_discard(fp, NULL);
984263bc 1381 }
6cef7136 1382 lwkt_reltoken(&unp_token);
984263bc
MD
1383 return (EMSGSIZE);
1384 }
ea8f324c 1385
984263bc
MD
1386 /*
1387 * now change each pointer to an fd in the global table to
1388 * an integer that is the index to the local fd table entry
1389 * that we set up to point to the global one we are transferring.
1390 * If sizeof (struct file *) is bigger than or equal to sizeof int,
1391 * then do it in forward order. In that case, an integer will
1392 * always come in the same place or before its corresponding
1393 * struct file pointer.
1394 * If sizeof (struct file *) is smaller than sizeof int, then
1395 * do it in reverse order.
1396 */
1397 if (sizeof (struct file *) >= sizeof (int)) {
72ed5f2f 1398 fdp = (int *)CMSG_DATA(cm);
984263bc
MD
1399 rp = (struct file **)CMSG_DATA(cm);
1400 for (i = 0; i < newfds; i++) {
1401 if (fdalloc(p, 0, &f))
1402 panic("unp_externalize");
1403 fp = *rp++;
f3a2d8c4 1404 unp_fp_externalize(lp, fp, f);
984263bc
MD
1405 *fdp++ = f;
1406 }
1407 } else {
72ed5f2f 1408 fdp = (int *)CMSG_DATA(cm) + newfds - 1;
984263bc
MD
1409 rp = (struct file **)CMSG_DATA(cm) + newfds - 1;
1410 for (i = 0; i < newfds; i++) {
1411 if (fdalloc(p, 0, &f))
1412 panic("unp_externalize");
1413 fp = *rp--;
f3a2d8c4 1414 unp_fp_externalize(lp, fp, f);
984263bc
MD
1415 *fdp-- = f;
1416 }
1417 }
1418
1419 /*
1420 * Adjust length, in case sizeof(struct file *) and sizeof(int)
1421 * differs.
1422 */
1423 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int));
1424 rights->m_len = cm->cmsg_len;
6cef7136
MD
1425
1426 lwkt_reltoken(&unp_token);
984263bc
MD
1427 return (0);
1428}
1429
ea8f324c 1430static void
f3a2d8c4 1431unp_fp_externalize(struct lwp *lp, struct file *fp, int fd)
ea8f324c
MD
1432{
1433 struct file *fx;
1434 int error;
1435
6cef7136
MD
1436 lwkt_gettoken(&unp_token);
1437
f3a2d8c4 1438 if (lp) {
ea8f324c
MD
1439 KKASSERT(fd >= 0);
1440 if (fp->f_flag & FREVOKED) {
1441 kprintf("Warning: revoked fp exiting unix socket\n");
1442 fx = NULL;
f3a2d8c4 1443 error = falloc(lp, &fx, NULL);
ea8f324c 1444 if (error == 0)
f3a2d8c4 1445 fsetfd(lp->lwp_proc->p_fd, fx, fd);
ea8f324c 1446 else
f3a2d8c4 1447 fsetfd(lp->lwp_proc->p_fd, NULL, fd);
ea8f324c
MD
1448 fdrop(fx);
1449 } else {
f3a2d8c4 1450 fsetfd(lp->lwp_proc->p_fd, fp, fd);
ea8f324c
MD
1451 }
1452 }
287a8577 1453 spin_lock(&unp_spin);
ea8f324c
MD
1454 fp->f_msgcount--;
1455 unp_rights--;
287a8577 1456 spin_unlock(&unp_spin);
ea8f324c 1457 fdrop(fp);
6cef7136
MD
1458
1459 lwkt_reltoken(&unp_token);
ea8f324c
MD
1460}
1461
1462
984263bc
MD
1463void
1464unp_init(void)
1465{
984263bc
MD
1466 LIST_INIT(&unp_dhead);
1467 LIST_INIT(&unp_shead);
ba87a4ab 1468 spin_init(&unp_spin, "unpinit");
984263bc
MD
1469}
1470
984263bc 1471static int
dadab5e9 1472unp_internalize(struct mbuf *control, struct thread *td)
984263bc 1473{
dadab5e9
MD
1474 struct proc *p = td->td_proc;
1475 struct filedesc *fdescp;
1476 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1477 struct file **rp;
1478 struct file *fp;
1479 int i, fd, *fdp;
1480 struct cmsgcred *cmcred;
984263bc
MD
1481 int oldfds;
1482 u_int newlen;
6cef7136 1483 int error;
984263bc 1484
dadab5e9 1485 KKASSERT(p);
6cef7136
MD
1486 lwkt_gettoken(&unp_token);
1487
dadab5e9 1488 fdescp = p->p_fd;
984263bc 1489 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
72ed5f2f
MD
1490 cm->cmsg_level != SOL_SOCKET ||
1491 CMSG_ALIGN(cm->cmsg_len) != control->m_len) {
6cef7136
MD
1492 error = EINVAL;
1493 goto done;
72ed5f2f 1494 }
984263bc
MD
1495
1496 /*
1497 * Fill in credential information.
1498 */
1499 if (cm->cmsg_type == SCM_CREDS) {
72ed5f2f 1500 cmcred = (struct cmsgcred *)CMSG_DATA(cm);
984263bc 1501 cmcred->cmcred_pid = p->p_pid;
41c20dac
MD
1502 cmcred->cmcred_uid = p->p_ucred->cr_ruid;
1503 cmcred->cmcred_gid = p->p_ucred->cr_rgid;
984263bc
MD
1504 cmcred->cmcred_euid = p->p_ucred->cr_uid;
1505 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
1506 CMGROUP_MAX);
1507 for (i = 0; i < cmcred->cmcred_ngroups; i++)
1508 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
6cef7136
MD
1509 error = 0;
1510 goto done;
984263bc
MD
1511 }
1512
72ed5f2f
MD
1513 /*
1514 * cmsghdr may not be aligned, do not allow calculation(s) to
1515 * go negative.
1516 */
6cef7136
MD
1517 if (cm->cmsg_len < CMSG_LEN(0)) {
1518 error = EINVAL;
1519 goto done;
1520 }
72ed5f2f
MD
1521
1522 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int);
1523
984263bc
MD
1524 /*
1525 * check that all the FDs passed in refer to legal OPEN files
1526 * If not, reject the entire operation.
1527 */
72ed5f2f 1528 fdp = (int *)CMSG_DATA(cm);
984263bc
MD
1529 for (i = 0; i < oldfds; i++) {
1530 fd = *fdp++;
1531 if ((unsigned)fd >= fdescp->fd_nfiles ||
6cef7136
MD
1532 fdescp->fd_files[fd].fp == NULL) {
1533 error = EBADF;
1534 goto done;
1535 }
1536 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) {
1537 error = EOPNOTSUPP;
1538 goto done;
1539 }
984263bc
MD
1540 }
1541 /*
1542 * Now replace the integer FDs with pointers to
1543 * the associated global file table entry..
1544 * Allocate a bigger buffer as necessary. But if an cluster is not
1545 * enough, return E2BIG.
1546 */
1547 newlen = CMSG_LEN(oldfds * sizeof(struct file *));
6cef7136
MD
1548 if (newlen > MCLBYTES) {
1549 error = E2BIG;
1550 goto done;
1551 }
984263bc 1552 if (newlen - control->m_len > M_TRAILINGSPACE(control)) {
6cef7136
MD
1553 if (control->m_flags & M_EXT) {
1554 error = E2BIG;
1555 goto done;
1556 }
b5523eac 1557 MCLGET(control, M_WAITOK);
6cef7136
MD
1558 if (!(control->m_flags & M_EXT)) {
1559 error = ENOBUFS;
1560 goto done;
1561 }
984263bc
MD
1562
1563 /* copy the data to the cluster */
1564 memcpy(mtod(control, char *), cm, cm->cmsg_len);
1565 cm = mtod(control, struct cmsghdr *);
1566 }
1567
1568 /*
1569 * Adjust length, in case sizeof(struct file *) and sizeof(int)
1570 * differs.
1571 */
72ed5f2f
MD
1572 cm->cmsg_len = newlen;
1573 control->m_len = CMSG_ALIGN(newlen);
984263bc
MD
1574
1575 /*
1576 * Transform the file descriptors into struct file pointers.
1577 * If sizeof (struct file *) is bigger than or equal to sizeof int,
1578 * then do it in reverse order so that the int won't get until
1579 * we're done.
1580 * If sizeof (struct file *) is smaller than sizeof int, then
1581 * do it in forward order.
1582 */
1583 if (sizeof (struct file *) >= sizeof (int)) {
72ed5f2f 1584 fdp = (int *)CMSG_DATA(cm) + oldfds - 1;
984263bc
MD
1585 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1;
1586 for (i = 0; i < oldfds; i++) {
0679adc4 1587 fp = fdescp->fd_files[*fdp--].fp;
984263bc 1588 *rp-- = fp;
2dd63755 1589 fhold(fp);
287a8577 1590 spin_lock(&unp_spin);
984263bc
MD
1591 fp->f_msgcount++;
1592 unp_rights++;
287a8577 1593 spin_unlock(&unp_spin);
984263bc
MD
1594 }
1595 } else {
72ed5f2f 1596 fdp = (int *)CMSG_DATA(cm);
984263bc
MD
1597 rp = (struct file **)CMSG_DATA(cm);
1598 for (i = 0; i < oldfds; i++) {
0679adc4 1599 fp = fdescp->fd_files[*fdp++].fp;
984263bc 1600 *rp++ = fp;
2dd63755 1601 fhold(fp);
287a8577 1602 spin_lock(&unp_spin);
984263bc
MD
1603 fp->f_msgcount++;
1604 unp_rights++;
287a8577 1605 spin_unlock(&unp_spin);
984263bc
MD
1606 }
1607 }
6cef7136
MD
1608 error = 0;
1609done:
1610 lwkt_reltoken(&unp_token);
1611 return error;
984263bc
MD
1612}
1613
2dd63755
MD
1614/*
1615 * Garbage collect in-transit file descriptors that get lost due to
1616 * loops (i.e. when a socket is sent to another process over itself,
1617 * and more complex situations).
1618 *
1619 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE.
1620 */
1621
1622struct unp_gc_info {
1623 struct file **extra_ref;
1624 struct file *locked_fp;
1625 int defer;
1626 int index;
1627 int maxindex;
1628};
984263bc
MD
1629
1630static void
c972a82f 1631unp_gc(void)
984263bc 1632{
2dd63755 1633 struct unp_gc_info info;
9dc5418a 1634 static boolean_t unp_gcing;
2dd63755
MD
1635 struct file **fpp;
1636 int i;
9dc5418a 1637
6aaa730a
MD
1638 /*
1639 * Only one gc can be in-progress at any given moment
1640 */
287a8577 1641 spin_lock(&unp_spin);
2dd63755 1642 if (unp_gcing) {
287a8577 1643 spin_unlock(&unp_spin);
984263bc 1644 return;
2dd63755 1645 }
9dc5418a 1646 unp_gcing = TRUE;
287a8577 1647 spin_unlock(&unp_spin);
2dd63755 1648
6cef7136
MD
1649 lwkt_gettoken(&unp_token);
1650
984263bc 1651 /*
2aa0ab56
MD
1652 * Before going through all this, set all FDs to be NOT defered
1653 * and NOT externally accessible (not marked). During the scan
1654 * a fd can be marked externally accessible but we may or may not
1655 * be able to immediately process it (controlled by FDEFER).
1656 *
1657 * If we loop sleep a bit. The complexity of the topology can cause
1658 * multiple loops. Also failure to acquire the socket's so_rcv
1659 * token can cause us to loop.
984263bc 1660 */
2dd63755 1661 allfiles_scan_exclusive(unp_gc_clearmarks, NULL);
984263bc 1662 do {
6aaa730a 1663 info.defer = 0;
2dd63755 1664 allfiles_scan_exclusive(unp_gc_checkmarks, &info);
2aa0ab56
MD
1665 if (info.defer)
1666 tsleep(&info, 0, "gcagain", 1);
2dd63755
MD
1667 } while (info.defer);
1668
984263bc
MD
1669 /*
1670 * We grab an extra reference to each of the file table entries
1671 * that are not otherwise accessible and then free the rights
1672 * that are stored in messages on them.
1673 *
1674 * The bug in the orginal code is a little tricky, so I'll describe
1675 * what's wrong with it here.
1676 *
1677 * It is incorrect to simply unp_discard each entry for f_msgcount
1678 * times -- consider the case of sockets A and B that contain
1679 * references to each other. On a last close of some other socket,
1680 * we trigger a gc since the number of outstanding rights (unp_rights)
1681 * is non-zero. If during the sweep phase the gc code un_discards,
1682 * we end up doing a (full) closef on the descriptor. A closef on A
1683 * results in the following chain. Closef calls soo_close, which
1684 * calls soclose. Soclose calls first (through the switch
1685 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1686 * returns because the previous instance had set unp_gcing, and
1687 * we return all the way back to soclose, which marks the socket
1688 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1689 * to free up the rights that are queued in messages on the socket A,
1690 * i.e., the reference on B. The sorflush calls via the dom_dispose
1691 * switch unp_dispose, which unp_scans with unp_discard. This second
1692 * instance of unp_discard just calls closef on B.
1693 *
1694 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1695 * which results in another closef on A. Unfortunately, A is already
1696 * being closed, and the descriptor has already been marked with
1697 * SS_NOFDREF, and soclose panics at this point.
1698 *
1699 * Here, we first take an extra reference to each inaccessible
1700 * descriptor. Then, we call sorflush ourself, since we know
1701 * it is a Unix domain socket anyhow. After we destroy all the
1702 * rights carried in messages, we do a last closef to get rid
1703 * of our extra reference. This is the last close, and the
1704 * unp_detach etc will shut down the socket.
1705 *
1706 * 91/09/19, bsy@cs.cmu.edu
1707 */
efda3bd0 1708 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK);
2dd63755
MD
1709 info.maxindex = 256;
1710
1711 do {
1712 /*
1713 * Look for matches
984263bc 1714 */
2dd63755
MD
1715 info.index = 0;
1716 allfiles_scan_exclusive(unp_gc_checkrefs, &info);
1717
984263bc 1718 /*
2dd63755 1719 * For each FD on our hit list, do the following two things
984263bc 1720 */
2dd63755
MD
1721 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) {
1722 struct file *tfp = *fpp;
1723 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
1724 sorflush((struct socket *)(tfp->f_data));
984263bc 1725 }
2dd63755
MD
1726 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp)
1727 closef(*fpp, NULL);
1728 } while (info.index == info.maxindex);
6cef7136
MD
1729
1730 lwkt_reltoken(&unp_token);
1731
efda3bd0 1732 kfree((caddr_t)info.extra_ref, M_FILE);
2dd63755
MD
1733 unp_gcing = FALSE;
1734}
1735
1736/*
1737 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1738 */
1739static int
1740unp_gc_checkrefs(struct file *fp, void *data)
1741{
1742 struct unp_gc_info *info = data;
1743
1744 if (fp->f_count == 0)
1745 return(0);
1746 if (info->index == info->maxindex)
1747 return(-1);
1748
984263bc 1749 /*
2dd63755
MD
1750 * If all refs are from msgs, and it's not marked accessible
1751 * then it must be referenced from some unreachable cycle
1752 * of (shut-down) FDs, so include it in our
1753 * list of FDs to remove
984263bc 1754 */
2dd63755
MD
1755 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1756 info->extra_ref[info->index++] = fp;
1757 fhold(fp);
984263bc 1758 }
2dd63755
MD
1759 return(0);
1760}
1761
1762/*
1763 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1764 */
1765static int
1766unp_gc_clearmarks(struct file *fp, void *data __unused)
1767{
b8477cda 1768 atomic_clear_int(&fp->f_flag, FMARK | FDEFER);
2dd63755
MD
1769 return(0);
1770}
1771
1772/*
1773 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry
1774 */
1775static int
1776unp_gc_checkmarks(struct file *fp, void *data)
1777{
1778 struct unp_gc_info *info = data;
1779 struct socket *so;
1780
1781 /*
2aa0ab56
MD
1782 * If the file is not open, skip it. Make sure it isn't marked
1783 * defered or we could loop forever, in case we somehow race
1784 * something.
2dd63755 1785 */
2aa0ab56 1786 if (fp->f_count == 0) {
6aaa730a 1787 if (fp->f_flag & FDEFER)
2aa0ab56 1788 atomic_clear_int(&fp->f_flag, FDEFER);
2dd63755 1789 return(0);
2aa0ab56 1790 }
2dd63755
MD
1791 /*
1792 * If we already marked it as 'defer' in a
1793 * previous pass, then try process it this time
1794 * and un-mark it
1795 */
1796 if (fp->f_flag & FDEFER) {
b8477cda 1797 atomic_clear_int(&fp->f_flag, FDEFER);
2dd63755
MD
1798 } else {
1799 /*
1800 * if it's not defered, then check if it's
1801 * already marked.. if so skip it
1802 */
1803 if (fp->f_flag & FMARK)
1804 return(0);
1805 /*
1806 * If all references are from messages
1807 * in transit, then skip it. it's not
1808 * externally accessible.
1809 */
1810 if (fp->f_count == fp->f_msgcount)
1811 return(0);
1812 /*
1813 * If it got this far then it must be
1814 * externally accessible.
1815 */
b8477cda 1816 atomic_set_int(&fp->f_flag, FMARK);
2dd63755 1817 }
ea8f324c 1818
2dd63755
MD
1819 /*
1820 * either it was defered, or it is externally
1821 * accessible and not already marked so.
1822 * Now check if it is possibly one of OUR sockets.
1823 */
1824 if (fp->f_type != DTYPE_SOCKET ||
2aa0ab56 1825 (so = (struct socket *)fp->f_data) == NULL) {
2dd63755 1826 return(0);
2aa0ab56 1827 }
2dd63755 1828 if (so->so_proto->pr_domain != &localdomain ||
2aa0ab56 1829 !(so->so_proto->pr_flags & PR_RIGHTS)) {
2dd63755 1830 return(0);
2dd63755 1831 }
2aa0ab56 1832
2dd63755 1833 /*
2aa0ab56
MD
1834 * So, Ok, it's one of our sockets and it IS externally accessible
1835 * (or was defered). Now we look to see if we hold any file
1836 * descriptors in its message buffers. Follow those links and mark
1837 * them as accessible too.
1838 *
1839 * We are holding multiple spinlocks here, if we cannot get the
1840 * token non-blocking defer until the next loop.
2dd63755
MD
1841 */
1842 info->locked_fp = fp;
2aa0ab56
MD
1843 if (lwkt_trytoken(&so->so_rcv.ssb_token)) {
1844 unp_scan(so->so_rcv.ssb_mb, unp_mark, info);
1845 lwkt_reltoken(&so->so_rcv.ssb_token);
1846 } else {
1847 atomic_set_int(&fp->f_flag, FDEFER);
1848 ++info->defer;
1849 }
2dd63755 1850 return (0);
984263bc
MD
1851}
1852
ea8f324c
MD
1853/*
1854 * Scan all unix domain sockets and replace any revoked file pointers
1855 * found with the dummy file pointer fx. We don't worry about races
1856 * against file pointers being read out as those are handled in the
1857 * externalize code.
1858 */
1859
1860#define REVOKE_GC_MAXFILES 32
1861
1862struct unp_revoke_gc_info {
1863 struct file *fx;
1864 struct file *fary[REVOKE_GC_MAXFILES];
1865 int fcount;
1866};
1867
1868void
1869unp_revoke_gc(struct file *fx)
1870{
1871 struct unp_revoke_gc_info info;
1872 int i;
1873
6cef7136 1874 lwkt_gettoken(&unp_token);
ea8f324c
MD
1875 info.fx = fx;
1876 do {
1877 info.fcount = 0;
1878 allfiles_scan_exclusive(unp_revoke_gc_check, &info);
1879 for (i = 0; i < info.fcount; ++i)
1880 unp_fp_externalize(NULL, info.fary[i], -1);
1881 } while (info.fcount == REVOKE_GC_MAXFILES);
6cef7136 1882 lwkt_reltoken(&unp_token);
ea8f324c
MD
1883}
1884
1885/*
1886 * Check for and replace revoked descriptors.
1887 *
1888 * WARNING: This routine is not allowed to block.
1889 */
1890static int
1891unp_revoke_gc_check(struct file *fps, void *vinfo)
1892{
1893 struct unp_revoke_gc_info *info = vinfo;
1894 struct file *fp;
1895 struct socket *so;
1896 struct mbuf *m0;
1897 struct mbuf *m;
1898 struct file **rp;
1899 struct cmsghdr *cm;
1900 int i;
1901 int qfds;
1902
1903 /*
1904 * Is this a unix domain socket with rights-passing abilities?
1905 */
1906 if (fps->f_type != DTYPE_SOCKET)
1907 return (0);
1908 if ((so = (struct socket *)fps->f_data) == NULL)
1909 return(0);
1910 if (so->so_proto->pr_domain != &localdomain)
1911 return(0);
1912 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0)
1913 return(0);
1914
1915 /*
1916 * Scan the mbufs for control messages and replace any revoked
1917 * descriptors we find.
1918 */
74d8470d 1919 lwkt_gettoken(&so->so_rcv.ssb_token);
ea8f324c
MD
1920 m0 = so->so_rcv.ssb_mb;
1921 while (m0) {
1922 for (m = m0; m; m = m->m_next) {
1923 if (m->m_type != MT_CONTROL)
1924 continue;
1925 if (m->m_len < sizeof(*cm))
1926 continue;
1927 cm = mtod(m, struct cmsghdr *);
1928 if (cm->cmsg_level != SOL_SOCKET ||
1929 cm->cmsg_type != SCM_RIGHTS) {
1930 continue;
1931 }
72ed5f2f 1932 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *);
ea8f324c
MD
1933 rp = (struct file **)CMSG_DATA(cm);
1934 for (i = 0; i < qfds; i++) {
1935 fp = rp[i];
1936 if (fp->f_flag & FREVOKED) {
1937 kprintf("Warning: Removing revoked fp from unix domain socket queue\n");
1938 fhold(info->fx);
1939 info->fx->f_msgcount++;
1940 unp_rights++;
1941 rp[i] = info->fx;
1942 info->fary[info->fcount++] = fp;
1943 }
1944 if (info->fcount == REVOKE_GC_MAXFILES)
1945 break;
1946 }
1947 if (info->fcount == REVOKE_GC_MAXFILES)
1948 break;
1949 }
1950 m0 = m0->m_nextpkt;
1951 if (info->fcount == REVOKE_GC_MAXFILES)
1952 break;
1953 }
74d8470d 1954 lwkt_reltoken(&so->so_rcv.ssb_token);
ea8f324c
MD
1955
1956 /*
1957 * Stop the scan if we filled up our array.
1958 */
1959 if (info->fcount == REVOKE_GC_MAXFILES)
1960 return(-1);
1961 return(0);
1962}
1963
82b1d9f2
MD
1964/*
1965 * Dispose of the fp's stored in a mbuf.
1966 *
1967 * The dds loop can cause additional fps to be entered onto the
1968 * list while it is running, flattening out the operation and avoiding
1969 * a deep kernel stack recursion.
1970 */
984263bc 1971void
dadab5e9 1972unp_dispose(struct mbuf *m)
984263bc 1973{
82b1d9f2
MD
1974 unp_defdiscard_t dds;
1975
6cef7136 1976 lwkt_gettoken(&unp_token);
82b1d9f2
MD
1977 ++unp_defdiscard_nest;
1978 if (m) {
2dd63755 1979 unp_scan(m, unp_discard, NULL);
82b1d9f2
MD
1980 }
1981 if (unp_defdiscard_nest == 1) {
1982 while ((dds = unp_defdiscard_base) != NULL) {
1983 unp_defdiscard_base = dds->next;
1984 closef(dds->fp, NULL);
1985 kfree(dds, M_UNPCB);
1986 }
1987 }
1988 --unp_defdiscard_nest;
6cef7136 1989 lwkt_reltoken(&unp_token);
984263bc
MD
1990}
1991
1992static int
dadab5e9 1993unp_listen(struct unpcb *unp, struct thread *td)
984263bc 1994{
dadab5e9 1995 struct proc *p = td->td_proc;
984263bc 1996
dadab5e9 1997 KKASSERT(p);
6cef7136 1998 lwkt_gettoken(&unp_token);
984263bc 1999 cru2x(p->p_ucred, &unp->unp_peercred);
4b7795a3 2000 unp_setflags(unp, UNP_HAVEPCCACHED);
6cef7136 2001 lwkt_reltoken(&unp_token);
984263bc
MD
2002 return (0);
2003}
2004
2005static void
2dd63755 2006unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data)
984263bc 2007{
dadab5e9
MD
2008 struct mbuf *m;
2009 struct file **rp;
2010 struct cmsghdr *cm;
2011 int i;
984263bc
MD
2012 int qfds;
2013
2014 while (m0) {
2dd63755 2015 for (m = m0; m; m = m->m_next) {
984263bc
MD
2016 if (m->m_type == MT_CONTROL &&
2017 m->m_len >= sizeof(*cm)) {
2018 cm = mtod(m, struct cmsghdr *);
2019 if (cm->cmsg_level != SOL_SOCKET ||
2020 cm->cmsg_type != SCM_RIGHTS)
2021 continue;
72ed5f2f
MD
2022 qfds = (cm->cmsg_len - CMSG_LEN(0)) /
2023 sizeof(void *);
984263bc
MD
2024 rp = (struct file **)CMSG_DATA(cm);
2025 for (i = 0; i < qfds; i++)
2dd63755 2026 (*op)(*rp++, data);
984263bc
MD
2027 break; /* XXX, but saves time */
2028 }
2dd63755 2029 }
9a275e74 2030 m0 = m0->m_nextpkt;
984263bc
MD
2031 }
2032}
2033
6aaa730a
MD
2034/*
2035 * Mark visibility. info->defer is recalculated on every pass.
2036 */
984263bc 2037static void
2dd63755 2038unp_mark(struct file *fp, void *data)
984263bc 2039{
2dd63755 2040 struct unp_gc_info *info = data;
984263bc 2041
5d3c4ae7
MD
2042 if ((fp->f_flag & FMARK) == 0) {
2043 ++info->defer;
b8477cda 2044 atomic_set_int(&fp->f_flag, FMARK | FDEFER);
6aaa730a
MD
2045 } else if (fp->f_flag & FDEFER) {
2046 ++info->defer;
5d3c4ae7 2047 }
984263bc
MD
2048}
2049
82b1d9f2
MD
2050/*
2051 * Discard a fp previously held in a unix domain socket mbuf. To
2052 * avoid blowing out the kernel stack due to contrived chain-reactions
2053 * we may have to defer the operation to a higher procedural level.
2054 *
2055 * Caller holds unp_token
2056 */
984263bc 2057static void
2dd63755 2058unp_discard(struct file *fp, void *data __unused)
984263bc 2059{
82b1d9f2
MD
2060 unp_defdiscard_t dds;
2061
287a8577 2062 spin_lock(&unp_spin);
984263bc
MD
2063 fp->f_msgcount--;
2064 unp_rights--;
287a8577 2065 spin_unlock(&unp_spin);
82b1d9f2
MD
2066
2067 if (unp_defdiscard_nest) {
2068 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO);
2069 dds->fp = fp;
2070 dds->next = unp_defdiscard_base;
2071 unp_defdiscard_base = dds;
2072 } else {
2073 closef(fp, NULL);
2074 }
984263bc 2075}
2dd63755 2076
c2bfe86d
SZ
2077static int
2078unp_find_lockref(struct sockaddr *nam, struct thread *td, short type,
2079 struct unpcb **unp_ret)
2080{
2081 struct proc *p = td->td_proc;
2082 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
2083 struct vnode *vp = NULL;
2084 struct socket *so;
2085 struct unpcb *unp;
2086 int error, len;
2087 struct nlookupdata nd;
2088 char buf[SOCK_MAXADDRLEN];
2089
2090 *unp_ret = NULL;
2091
2092 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
2093 if (len <= 0) {
2094 error = EINVAL;
2095 goto failed;
2096 }
2097 strncpy(buf, soun->sun_path, len);
2098 buf[len] = 0;
2099
2100 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW);
2101 if (error == 0)
2102 error = nlookup(&nd);
2103 if (error == 0)
2104 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
2105 nlookup_done(&nd);
2106 if (error) {
2107 vp = NULL;
2108 goto failed;
2109 }
2110
2111 if (vp->v_type != VSOCK) {
2112 error = ENOTSOCK;
2113 goto failed;
2114 }
2115 error = VOP_EACCESS(vp, VWRITE, p->p_ucred);
2116 if (error)
2117 goto failed;
2118 so = vp->v_socket;
2119 if (so == NULL) {
2120 error = ECONNREFUSED;
2121 goto failed;
2122 }
2123 if (so->so_type != type) {
2124 error = EPROTOTYPE;
2125 goto failed;
2126 }
2127
2128 /* Lock this unp. */
2129 unp = unp_getsocktoken(so);
2130 if (!UNP_ISATTACHED(unp)) {
2131 unp_reltoken(unp);
2132 error = ECONNREFUSED;
2133 goto failed;
2134 }
2135 /* And keep this unp referenced. */
2136 unp_reference(unp);
2137
2138 /* Done! */
2139 *unp_ret = unp;
2140 error = 0;
2141failed:
2142 if (vp != NULL)
2143 vput(vp);
2144 return error;
2145}
2146
a4095867 2147static int
c2bfe86d
SZ
2148unp_connect_pair(struct unpcb *unp, struct unpcb *unp2)
2149{
2150 struct socket *so = unp->unp_socket;
2151 struct socket *so2 = unp2->unp_socket;
2152
c1614976 2153 ASSERT_LWKT_TOKEN_HELD(&unp_token);
c2bfe86d
SZ
2154 UNP_ASSERT_TOKEN_HELD(unp);
2155 UNP_ASSERT_TOKEN_HELD(unp2);
2156
2157 KASSERT(so->so_type == so2->so_type,
2158 ("socket type mismatch, so %d, so2 %d", so->so_type, so2->so_type));
2159
a4095867
SZ
2160 if (!UNP_ISATTACHED(unp))
2161 return EINVAL;
2162 if (!UNP_ISATTACHED(unp2))
2163 return ECONNREFUSED;
2164
c2bfe86d
SZ
2165 KASSERT(unp->unp_conn == NULL, ("unp is already connected"));
2166 unp->unp_conn = unp2;
2167
2168 switch (so->so_type) {
2169 case SOCK_DGRAM:
2170 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
2171 soisconnected(so);
2172 break;
2173
2174 case SOCK_STREAM:
2175 case SOCK_SEQPACKET:
2176 KASSERT(unp2->unp_conn == NULL, ("unp2 is already connected"));
2177 unp2->unp_conn = unp;
2178 soisconnected(so);
2179 soisconnected(so2);
2180 break;
2181
2182 default:
2183 panic("unp_connect_pair: unknown socket type %d", so->so_type);
2184 }
a4095867 2185 return 0;
c2bfe86d 2186}
16e0b14d
SZ
2187
2188static void
2189unp_drop(struct unpcb *unp, int error)
2190{
2191 struct unpcb *unp2;
2192
2193 ASSERT_LWKT_TOKEN_HELD(&unp_token);
2194 UNP_ASSERT_TOKEN_HELD(unp);
2195 KASSERT(unp->unp_flags & UNP_DETACHED, ("unp is not detached"));
2196
2197 unp_disconnect(unp, error);
2198
2199 while ((unp2 = LIST_FIRST(&unp->unp_refs)) != NULL) {
2200 lwkt_getpooltoken(unp2);
2201 unp_disconnect(unp2, ECONNRESET);
2202 lwkt_relpooltoken(unp2);
2203 }
2204 unp_setflags(unp, UNP_DROPPED);
2205}