accept: Implement fast soaccept predication
[dragonfly.git] / sys / kern / uipc_syscalls.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $
38 */
39
984263bc 40#include "opt_ktrace.h"
78812139 41#include "opt_sctp.h"
984263bc
MD
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/sysproto.h>
47#include <sys/malloc.h>
48#include <sys/filedesc.h>
49#include <sys/event.h>
50#include <sys/proc.h>
51#include <sys/fcntl.h>
52#include <sys/file.h>
53#include <sys/filio.h>
5969a6f1 54#include <sys/kern_syscall.h>
984263bc
MD
55#include <sys/mbuf.h>
56#include <sys/protosw.h>
4860553a 57#include <sys/sfbuf.h>
984263bc
MD
58#include <sys/socket.h>
59#include <sys/socketvar.h>
6b6e0885 60#include <sys/socketops.h>
984263bc
MD
61#include <sys/uio.h>
62#include <sys/vnode.h>
63#include <sys/lock.h>
64#include <sys/mount.h>
65#ifdef KTRACE
66#include <sys/ktrace.h>
67#endif
68#include <vm/vm.h>
69#include <vm/vm_object.h>
70#include <vm/vm_page.h>
71#include <vm/vm_pageout.h>
72#include <vm/vm_kern.h>
73#include <vm/vm_extern.h>
dadab5e9 74#include <sys/file2.h>
770d4c4d 75#include <sys/signalvar.h>
df8d1020 76#include <sys/serialize.h>
984263bc 77
b44419cb
MD
78#include <sys/thread2.h>
79#include <sys/msgport2.h>
d6cb521d 80#include <sys/socketvar2.h>
4599cf19 81#include <net/netmsg2.h>
b44419cb 82
78812139
EN
83#ifdef SCTP
84#include <netinet/sctp_peeloff.h>
85#endif /* SCTP */
86
5e4b3994
SZ
87extern int use_soaccept_pred_fast;
88
984263bc
MD
89/*
90 * System call interface to the socket abstraction.
91 */
984263bc
MD
92
93extern struct fileops socketops;
94
41c20dac
MD
95/*
96 * socket_args(int domain, int type, int protocol)
97 */
984263bc 98int
75a872f8 99kern_socket(int domain, int type, int protocol, int *res)
984263bc 100{
dadab5e9 101 struct thread *td = curthread;
f3a2d8c4 102 struct filedesc *fdp = td->td_proc->p_fd;
984263bc
MD
103 struct socket *so;
104 struct file *fp;
105 int fd, error;
106
f3a2d8c4 107 KKASSERT(td->td_lwp);
dadab5e9 108
f3a2d8c4 109 error = falloc(td->td_lwp, &fp, &fd);
984263bc
MD
110 if (error)
111 return (error);
75a872f8 112 error = socreate(domain, &so, type, protocol, td);
984263bc 113 if (error) {
f3a2d8c4 114 fsetfd(fdp, NULL, fd);
984263bc 115 } else {
984263bc 116 fp->f_type = DTYPE_SOCKET;
fbb4eeab
JH
117 fp->f_flag = FREAD | FWRITE;
118 fp->f_ops = &socketops;
119 fp->f_data = so;
75a872f8 120 *res = fd;
f3a2d8c4 121 fsetfd(fdp, fp, fd);
984263bc 122 }
9f87144f 123 fdrop(fp);
984263bc
MD
124 return (error);
125}
126
3919ced0
MD
127/*
128 * MPALMOSTSAFE
129 */
02844a31 130int
753fd850 131sys_socket(struct socket_args *uap)
75a872f8
DRJ
132{
133 int error;
134
135 error = kern_socket(uap->domain, uap->type, uap->protocol,
e54488bb 136 &uap->sysmsg_iresult);
75a872f8
DRJ
137
138 return (error);
139}
6b6e0885 140
75a872f8 141int
5969a6f1 142kern_bind(int s, struct sockaddr *sa)
984263bc 143{
dadab5e9
MD
144 struct thread *td = curthread;
145 struct proc *p = td->td_proc;
984263bc 146 struct file *fp;
984263bc
MD
147 int error;
148
dadab5e9 149 KKASSERT(p);
d83b97b9 150 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
151 if (error)
152 return (error);
d83b97b9 153 error = sobind((struct socket *)fp->f_data, sa, td);
9f87144f 154 fdrop(fp);
d83b97b9
MD
155 return (error);
156}
157
158/*
159 * bind_args(int s, caddr_t name, int namelen)
3919ced0
MD
160 *
161 * MPALMOSTSAFE
d83b97b9
MD
162 */
163int
753fd850 164sys_bind(struct bind_args *uap)
d83b97b9
MD
165{
166 struct sockaddr *sa;
167 int error;
168
984263bc 169 error = getsockaddr(&sa, uap->name, uap->namelen);
d83b97b9 170 if (error)
984263bc 171 return (error);
5969a6f1 172 error = kern_bind(uap->s, sa);
984263bc 173 FREE(sa, M_SONAME);
d83b97b9 174
984263bc
MD
175 return (error);
176}
177
984263bc 178int
5969a6f1 179kern_listen(int s, int backlog)
984263bc 180{
dadab5e9
MD
181 struct thread *td = curthread;
182 struct proc *p = td->td_proc;
984263bc
MD
183 struct file *fp;
184 int error;
185
dadab5e9 186 KKASSERT(p);
5969a6f1 187 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
188 if (error)
189 return (error);
5969a6f1 190 error = solisten((struct socket *)fp->f_data, backlog, td);
9f87144f 191 fdrop(fp);
984263bc
MD
192 return(error);
193}
194
41c20dac 195/*
5969a6f1 196 * listen_args(int s, int backlog)
3919ced0
MD
197 *
198 * MPALMOSTSAFE
5969a6f1
DRJ
199 */
200int
753fd850 201sys_listen(struct listen_args *uap)
5969a6f1
DRJ
202{
203 int error;
204
205 error = kern_listen(uap->s, uap->backlog);
206 return (error);
207}
208
209/*
f172717f 210 * Returns the accepted socket as well.
5217bcbc
MD
211 *
212 * NOTE! The sockets sitting on so_comp/so_incomp might have 0 refs, the
213 * pool token is absolutely required to avoid a sofree() race,
214 * as well as to avoid tailq handling races.
f172717f
JH
215 */
216static boolean_t
002c1265 217soaccept_predicate(struct netmsg_so_notify *msg)
f172717f 218{
002c1265 219 struct socket *head = msg->base.nm_so;
c19fdb0e 220 struct socket *so;
f172717f
JH
221
222 if (head->so_error != 0) {
002c1265 223 msg->base.lmsg.ms_error = head->so_error;
f172717f
JH
224 return (TRUE);
225 }
5217bcbc 226 lwkt_getpooltoken(head);
f172717f
JH
227 if (!TAILQ_EMPTY(&head->so_comp)) {
228 /* Abuse nm_so field as copy in/copy out parameter. XXX JH */
c19fdb0e
MD
229 so = TAILQ_FIRST(&head->so_comp);
230 TAILQ_REMOVE(&head->so_comp, so, so_list);
f172717f 231 head->so_qlen--;
c19fdb0e
MD
232 soclrstate(so, SS_COMP);
233 so->so_head = NULL;
234 soreference(so);
f172717f 235
5217bcbc 236 lwkt_relpooltoken(head);
c19fdb0e
MD
237
238 msg->base.lmsg.ms_error = 0;
239 msg->base.nm_so = so;
f172717f
JH
240 return (TRUE);
241 }
5217bcbc 242 lwkt_relpooltoken(head);
f172717f 243 if (head->so_state & SS_CANTRCVMORE) {
002c1265 244 msg->base.lmsg.ms_error = ECONNABORTED;
f172717f
JH
245 return (TRUE);
246 }
9ba76b73 247 if (msg->nm_fflags & FNONBLOCK) {
002c1265 248 msg->base.lmsg.ms_error = EWOULDBLOCK;
f172717f
JH
249 return (TRUE);
250 }
251
252 return (FALSE);
253}
254
255/*
5969a6f1
DRJ
256 * The second argument to kern_accept() is a handle to a struct sockaddr.
257 * This allows kern_accept() to return a pointer to an allocated struct
d83b97b9
MD
258 * sockaddr which must be freed later with FREE(). The caller must
259 * initialize *name to NULL.
41c20dac 260 */
02844a31 261int
358e1f78 262kern_accept(int s, int fflags, struct sockaddr **name, int *namelen, int *res)
984263bc 263{
dadab5e9 264 struct thread *td = curthread;
f3a2d8c4 265 struct filedesc *fdp = td->td_proc->p_fd;
984263bc
MD
266 struct file *lfp = NULL;
267 struct file *nfp = NULL;
268 struct sockaddr *sa;
984263bc 269 struct socket *head, *so;
f172717f 270 struct netmsg_so_notify msg;
984263bc
MD
271 int fd;
272 u_int fflag; /* type must match fp->f_flag */
f172717f 273 int error, tmp;
984263bc 274
259b8ea0 275 *res = -1;
d83b97b9
MD
276 if (name && namelen && *namelen < 0)
277 return (EINVAL);
278
f3a2d8c4 279 error = holdsock(td->td_proc->p_fd, s, &lfp);
984263bc
MD
280 if (error)
281 return (error);
f172717f 282
f3a2d8c4 283 error = falloc(td->td_lwp, &nfp, &fd);
f172717f 284 if (error) { /* Probably ran out of file descriptors. */
9f87144f 285 fdrop(lfp);
f172717f
JH
286 return (error);
287 }
984263bc
MD
288 head = (struct socket *)lfp->f_data;
289 if ((head->so_options & SO_ACCEPTCONN) == 0) {
984263bc
MD
290 error = EINVAL;
291 goto done;
292 }
f172717f 293
358e1f78
MD
294 if (fflags & O_FBLOCKING)
295 fflags |= lfp->f_flag & ~FNONBLOCK;
296 else if (fflags & O_FNONBLOCKING)
297 fflags |= lfp->f_flag | FNONBLOCK;
298 else
299 fflags = lfp->f_flag;
300
5e4b3994
SZ
301 if (use_soaccept_pred_fast) {
302 boolean_t pred;
303
304 /* Initialize necessary parts for soaccept_predicate() */
305 netmsg_init(&msg.base, head, &netisr_apanic_rport, 0, NULL);
306 msg.nm_fflags = fflags;
307
308 lwkt_getpooltoken(head);
309 pred = soaccept_predicate(&msg);
310 lwkt_relpooltoken(head);
311
312 if (pred) {
313 error = msg.base.lmsg.ms_error;
314 if (error)
315 goto done;
316 else
317 goto accepted;
318 }
319 }
320
f172717f 321 /* optimize for uniprocessor case later XXX JH */
002c1265 322 netmsg_init_abortable(&msg.base, head, &curthread->td_msgport,
48e7b118 323 0, netmsg_so_notify, netmsg_so_notify_doabort);
f172717f 324 msg.nm_predicate = soaccept_predicate;
358e1f78 325 msg.nm_fflags = fflags;
f172717f 326 msg.nm_etype = NM_REVENT;
002c1265 327 error = lwkt_domsg(head->so_port, &msg.base.lmsg, PCATCH);
f172717f 328 if (error)
984263bc 329 goto done;
984263bc 330
5e4b3994 331accepted:
984263bc 332 /*
f172717f 333 * At this point we have the connection that's ready to be accepted.
c19fdb0e
MD
334 *
335 * NOTE! soaccept_predicate() ref'd so for us, and soaccept() expects
336 * to eat the ref and turn it into a descriptor.
984263bc 337 */
002c1265 338 so = msg.base.nm_so;
984263bc
MD
339
340 fflag = lfp->f_flag;
984263bc
MD
341
342 /* connection has been removed from the listen queue */
5b22f1a7 343 KNOTE(&head->so_rcv.ssb_kq.ki_note, 0);
984263bc 344
984263bc 345 if (head->so_sigio != NULL)
b5c4d81f 346 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
984263bc 347
fbb4eeab 348 nfp->f_type = DTYPE_SOCKET;
984263bc
MD
349 nfp->f_flag = fflag;
350 nfp->f_ops = &socketops;
fbb4eeab 351 nfp->f_data = so;
984263bc
MD
352 /* Sync socket nonblocking/async state with file flags */
353 tmp = fflag & FNONBLOCK;
9910d07b 354 fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td->td_ucred, NULL);
984263bc 355 tmp = fflag & FASYNC;
9910d07b 356 fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td->td_ucred, NULL);
d83b97b9
MD
357
358 sa = NULL;
984263bc 359 error = soaccept(so, &sa);
d83b97b9
MD
360
361 /*
362 * Set the returned name and namelen as applicable. Set the returned
363 * namelen to 0 for older code which might ignore the return value
364 * from accept.
365 */
366 if (error == 0) {
367 if (sa && name && namelen) {
368 if (*namelen > sa->sa_len)
369 *namelen = sa->sa_len;
370 *name = sa;
371 } else {
372 if (sa)
373 FREE(sa, M_SONAME);
984263bc 374 }
984263bc 375 }
984263bc 376
f172717f 377done:
984263bc 378 /*
259b8ea0
MD
379 * If an error occured clear the reserved descriptor, else associate
380 * nfp with it.
381 *
382 * Note that *res is normally ignored if an error is returned but
383 * a syscall message will still have access to the result code.
984263bc
MD
384 */
385 if (error) {
f3a2d8c4 386 fsetfd(fdp, NULL, fd);
259b8ea0
MD
387 } else {
388 *res = fd;
f3a2d8c4 389 fsetfd(fdp, nfp, fd);
984263bc 390 }
259b8ea0 391 fdrop(nfp);
9f87144f 392 fdrop(lfp);
984263bc
MD
393 return (error);
394}
395
d83b97b9 396/*
358e1f78 397 * accept(int s, caddr_t name, int *anamelen)
3919ced0
MD
398 *
399 * MPALMOSTSAFE
d83b97b9 400 */
984263bc 401int
753fd850 402sys_accept(struct accept_args *uap)
984263bc 403{
d83b97b9
MD
404 struct sockaddr *sa = NULL;
405 int sa_len;
406 int error;
407
408 if (uap->name) {
409 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len));
410 if (error)
411 return (error);
412
e54488bb
MD
413 error = kern_accept(uap->s, 0, &sa, &sa_len,
414 &uap->sysmsg_iresult);
358e1f78
MD
415
416 if (error == 0)
417 error = copyout(sa, uap->name, sa_len);
418 if (error == 0) {
419 error = copyout(&sa_len, uap->anamelen,
420 sizeof(*uap->anamelen));
421 }
422 if (sa)
423 FREE(sa, M_SONAME);
424 } else {
e54488bb
MD
425 error = kern_accept(uap->s, 0, NULL, 0,
426 &uap->sysmsg_iresult);
358e1f78
MD
427 }
428 return (error);
429}
430
431/*
b09fd398 432 * extaccept(int s, int fflags, caddr_t name, int *anamelen)
3919ced0
MD
433 *
434 * MPALMOSTSAFE
358e1f78
MD
435 */
436int
b09fd398 437sys_extaccept(struct extaccept_args *uap)
358e1f78
MD
438{
439 struct sockaddr *sa = NULL;
440 int sa_len;
441 int error;
442 int fflags = uap->flags & O_FMASK;
443
444 if (uap->name) {
445 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len));
446 if (error)
447 return (error);
448
e54488bb
MD
449 error = kern_accept(uap->s, fflags, &sa, &sa_len,
450 &uap->sysmsg_iresult);
d83b97b9
MD
451
452 if (error == 0)
453 error = copyout(sa, uap->name, sa_len);
454 if (error == 0) {
455 error = copyout(&sa_len, uap->anamelen,
456 sizeof(*uap->anamelen));
457 }
458 if (sa)
459 FREE(sa, M_SONAME);
460 } else {
e54488bb
MD
461 error = kern_accept(uap->s, fflags, NULL, 0,
462 &uap->sysmsg_iresult);
d83b97b9
MD
463 }
464 return (error);
984263bc
MD
465}
466
358e1f78 467
b44419cb
MD
468/*
469 * Returns TRUE if predicate satisfied.
470 */
471static boolean_t
002c1265 472soconnected_predicate(struct netmsg_so_notify *msg)
b44419cb 473{
002c1265 474 struct socket *so = msg->base.nm_so;
b44419cb
MD
475
476 /* check predicate */
477 if (!(so->so_state & SS_ISCONNECTING) || so->so_error != 0) {
002c1265 478 msg->base.lmsg.ms_error = so->so_error;
b44419cb
MD
479 return (TRUE);
480 }
481
482 return (FALSE);
483}
484
02844a31 485int
358e1f78 486kern_connect(int s, int fflags, struct sockaddr *sa)
984263bc 487{
dadab5e9
MD
488 struct thread *td = curthread;
489 struct proc *p = td->td_proc;
984263bc 490 struct file *fp;
dadab5e9 491 struct socket *so;
8765eadc 492 int error, interrupted = 0;
984263bc 493
d83b97b9 494 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
495 if (error)
496 return (error);
497 so = (struct socket *)fp->f_data;
358e1f78
MD
498
499 if (fflags & O_FBLOCKING)
500 /* fflags &= ~FNONBLOCK; */;
501 else if (fflags & O_FNONBLOCKING)
502 fflags |= FNONBLOCK;
503 else
504 fflags = fp->f_flag;
505
8765eadc 506 if (so->so_state & SS_ISCONNECTING) {
984263bc
MD
507 error = EALREADY;
508 goto done;
509 }
dadab5e9 510 error = soconnect(so, sa, td);
984263bc
MD
511 if (error)
512 goto bad;
358e1f78 513 if ((fflags & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) {
984263bc
MD
514 error = EINPROGRESS;
515 goto done;
516 }
b44419cb
MD
517 if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
518 struct netmsg_so_notify msg;
b44419cb 519
002c1265 520 netmsg_init_abortable(&msg.base, so,
4599cf19 521 &curthread->td_msgport,
a22c590e 522 0,
4599cf19
MD
523 netmsg_so_notify,
524 netmsg_so_notify_doabort);
b44419cb 525 msg.nm_predicate = soconnected_predicate;
b44419cb 526 msg.nm_etype = NM_REVENT;
002c1265 527 error = lwkt_domsg(so->so_port, &msg.base.lmsg, PCATCH);
8765eadc
SZ
528 if (error == EINTR || error == ERESTART)
529 interrupted = 1;
984263bc
MD
530 }
531 if (error == 0) {
532 error = so->so_error;
533 so->so_error = 0;
534 }
984263bc 535bad:
8765eadc 536 if (!interrupted)
6cef7136 537 soclrstate(so, SS_ISCONNECTING);
984263bc
MD
538 if (error == ERESTART)
539 error = EINTR;
540done:
9f87144f 541 fdrop(fp);
984263bc
MD
542 return (error);
543}
544
41c20dac 545/*
d83b97b9 546 * connect_args(int s, caddr_t name, int namelen)
3919ced0
MD
547 *
548 * MPALMOSTSAFE
d83b97b9
MD
549 */
550int
753fd850 551sys_connect(struct connect_args *uap)
d83b97b9
MD
552{
553 struct sockaddr *sa;
554 int error;
555
556 error = getsockaddr(&sa, uap->name, uap->namelen);
557 if (error)
558 return (error);
358e1f78
MD
559 error = kern_connect(uap->s, 0, sa);
560 FREE(sa, M_SONAME);
561
562 return (error);
563}
564
565/*
566 * connect_args(int s, int fflags, caddr_t name, int namelen)
3919ced0
MD
567 *
568 * MPALMOSTSAFE
358e1f78
MD
569 */
570int
b09fd398 571sys_extconnect(struct extconnect_args *uap)
358e1f78
MD
572{
573 struct sockaddr *sa;
574 int error;
575 int fflags = uap->flags & O_FMASK;
576
577 error = getsockaddr(&sa, uap->name, uap->namelen);
578 if (error)
579 return (error);
580 error = kern_connect(uap->s, fflags, sa);
d83b97b9
MD
581 FREE(sa, M_SONAME);
582
583 return (error);
584}
585
984263bc 586int
5969a6f1 587kern_socketpair(int domain, int type, int protocol, int *sv)
984263bc 588{
dadab5e9 589 struct thread *td = curthread;
f3a2d8c4 590 struct filedesc *fdp;
984263bc
MD
591 struct file *fp1, *fp2;
592 struct socket *so1, *so2;
259b8ea0 593 int fd1, fd2, error;
984263bc 594
f3a2d8c4 595 fdp = td->td_proc->p_fd;
5969a6f1 596 error = socreate(domain, &so1, type, protocol, td);
984263bc
MD
597 if (error)
598 return (error);
5969a6f1 599 error = socreate(domain, &so2, type, protocol, td);
984263bc
MD
600 if (error)
601 goto free1;
f3a2d8c4 602 error = falloc(td->td_lwp, &fp1, &fd1);
984263bc
MD
603 if (error)
604 goto free2;
259b8ea0 605 sv[0] = fd1;
fbb4eeab 606 fp1->f_data = so1;
f3a2d8c4 607 error = falloc(td->td_lwp, &fp2, &fd2);
984263bc
MD
608 if (error)
609 goto free3;
fbb4eeab 610 fp2->f_data = so2;
259b8ea0 611 sv[1] = fd2;
984263bc
MD
612 error = soconnect2(so1, so2);
613 if (error)
614 goto free4;
5969a6f1 615 if (type == SOCK_DGRAM) {
984263bc
MD
616 /*
617 * Datagram socket connection is asymmetric.
618 */
619 error = soconnect2(so2, so1);
620 if (error)
621 goto free4;
622 }
fbb4eeab 623 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
984263bc
MD
624 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
625 fp1->f_ops = fp2->f_ops = &socketops;
f3a2d8c4
MD
626 fsetfd(fdp, fp1, fd1);
627 fsetfd(fdp, fp2, fd2);
9f87144f
MD
628 fdrop(fp1);
629 fdrop(fp2);
984263bc
MD
630 return (error);
631free4:
f3a2d8c4 632 fsetfd(fdp, NULL, fd2);
9f87144f 633 fdrop(fp2);
984263bc 634free3:
f3a2d8c4 635 fsetfd(fdp, NULL, fd1);
9f87144f 636 fdrop(fp1);
984263bc 637free2:
9ba76b73 638 (void)soclose(so2, 0);
984263bc 639free1:
9ba76b73 640 (void)soclose(so1, 0);
984263bc
MD
641 return (error);
642}
643
5969a6f1
DRJ
644/*
645 * socketpair(int domain, int type, int protocol, int *rsv)
646 */
647int
753fd850 648sys_socketpair(struct socketpair_args *uap)
5969a6f1
DRJ
649{
650 int error, sockv[2];
651
652 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv);
653
8b8ad336 654 if (error == 0) {
5969a6f1 655 error = copyout(sockv, uap->rsv, sizeof(sockv));
8b8ad336
VS
656
657 if (error != 0) {
658 kern_close(sockv[0]);
659 kern_close(sockv[1]);
660 }
661 }
662
5969a6f1
DRJ
663 return (error);
664}
665
35fbb1d9 666int
3e1837ce 667kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio,
e54488bb 668 struct mbuf *control, int flags, size_t *res)
984263bc 669{
dadab5e9 670 struct thread *td = curthread;
7278a846 671 struct lwp *lp = td->td_lwp;
dadab5e9 672 struct proc *p = td->td_proc;
984263bc 673 struct file *fp;
e54488bb
MD
674 size_t len;
675 int error;
984263bc
MD
676 struct socket *so;
677#ifdef KTRACE
678 struct iovec *ktriov = NULL;
679 struct uio ktruio;
680#endif
681
682 error = holdsock(p->p_fd, s, &fp);
683 if (error)
684 return (error);
984263bc 685#ifdef KTRACE
dadab5e9 686 if (KTRPOINT(td, KTR_GENIO)) {
3e1837ce 687 int iovlen = auio->uio_iovcnt * sizeof (struct iovec);
984263bc
MD
688
689 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
3e1837ce
DRJ
690 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen);
691 ktruio = *auio;
984263bc
MD
692 }
693#endif
3e1837ce 694 len = auio->uio_resid;
984263bc 695 so = (struct socket *)fp->f_data;
9ba76b73
MD
696 if ((flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) {
697 if (fp->f_flag & FNONBLOCK)
698 flags |= MSG_FNONBLOCKING;
699 }
6b6e0885 700 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td);
984263bc 701 if (error) {
3e1837ce 702 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
703 error == EINTR || error == EWOULDBLOCK))
704 error = 0;
e2c70b77 705 if (error == EPIPE && !(flags & MSG_NOSIGNAL))
7278a846 706 lwpsignal(p, lp, SIGPIPE);
984263bc 707 }
984263bc
MD
708#ifdef KTRACE
709 if (ktriov != NULL) {
710 if (error == 0) {
711 ktruio.uio_iov = ktriov;
3e1837ce 712 ktruio.uio_resid = len - auio->uio_resid;
9fb04d14 713 ktrgenio(lp, s, UIO_WRITE, &ktruio, error);
984263bc
MD
714 }
715 FREE(ktriov, M_TEMP);
716 }
717#endif
35fbb1d9 718 if (error == 0)
3e1837ce 719 *res = len - auio->uio_resid;
9f87144f 720 fdrop(fp);
984263bc
MD
721 return (error);
722}
723
41c20dac
MD
724/*
725 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen)
3919ced0
MD
726 *
727 * MPALMOSTSAFE
41c20dac 728 */
984263bc 729int
753fd850 730sys_sendto(struct sendto_args *uap)
984263bc 731{
3e1837ce
DRJ
732 struct thread *td = curthread;
733 struct uio auio;
984263bc 734 struct iovec aiov;
35fbb1d9
DRJ
735 struct sockaddr *sa = NULL;
736 int error;
984263bc 737
35fbb1d9
DRJ
738 if (uap->to) {
739 error = getsockaddr(&sa, uap->to, uap->tolen);
740 if (error)
741 return (error);
35fbb1d9 742 }
984263bc
MD
743 aiov.iov_base = uap->buf;
744 aiov.iov_len = uap->len;
3e1837ce
DRJ
745 auio.uio_iov = &aiov;
746 auio.uio_iovcnt = 1;
747 auio.uio_offset = 0;
748 auio.uio_resid = uap->len;
749 auio.uio_segflg = UIO_USERSPACE;
750 auio.uio_rw = UIO_WRITE;
751 auio.uio_td = td;
984263bc 752
3e1837ce 753 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags,
e54488bb 754 &uap->sysmsg_szresult);
984263bc 755
35fbb1d9
DRJ
756 if (sa)
757 FREE(sa, M_SONAME);
758 return (error);
984263bc
MD
759}
760
41c20dac 761/*
35fbb1d9 762 * sendmsg_args(int s, caddr_t msg, int flags)
3919ced0
MD
763 *
764 * MPALMOSTSAFE
41c20dac 765 */
984263bc 766int
753fd850 767sys_sendmsg(struct sendmsg_args *uap)
984263bc 768{
3e1837ce 769 struct thread *td = curthread;
984263bc 770 struct msghdr msg;
3e1837ce 771 struct uio auio;
75a872f8 772 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
35fbb1d9
DRJ
773 struct sockaddr *sa = NULL;
774 struct mbuf *control = NULL;
75a872f8 775 int error;
984263bc 776
35fbb1d9 777 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg));
984263bc
MD
778 if (error)
779 return (error);
35fbb1d9
DRJ
780
781 /*
782 * Conditionally copyin msg.msg_name.
783 */
784 if (msg.msg_name) {
785 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen);
786 if (error)
787 return (error);
35fbb1d9
DRJ
788 }
789
790 /*
3e1837ce 791 * Populate auio.
35fbb1d9 792 */
75a872f8 793 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen,
ef5c76d7 794 &auio.uio_resid);
984263bc 795 if (error)
8130f673 796 goto cleanup2;
3e1837ce
DRJ
797 auio.uio_iov = iov;
798 auio.uio_iovcnt = msg.msg_iovlen;
799 auio.uio_offset = 0;
3e1837ce
DRJ
800 auio.uio_segflg = UIO_USERSPACE;
801 auio.uio_rw = UIO_WRITE;
802 auio.uio_td = td;
35fbb1d9
DRJ
803
804 /*
805 * Conditionally copyin msg.msg_control.
806 */
807 if (msg.msg_control) {
3e1837ce
DRJ
808 if (msg.msg_controllen < sizeof(struct cmsghdr) ||
809 msg.msg_controllen > MLEN) {
35fbb1d9
DRJ
810 error = EINVAL;
811 goto cleanup;
812 }
74f1caca 813 control = m_get(MB_WAIT, MT_CONTROL);
3e1837ce
DRJ
814 if (control == NULL) {
815 error = ENOBUFS;
35fbb1d9 816 goto cleanup;
3e1837ce
DRJ
817 }
818 control->m_len = msg.msg_controllen;
819 error = copyin(msg.msg_control, mtod(control, caddr_t),
3919ced0 820 msg.msg_controllen);
3e1837ce
DRJ
821 if (error) {
822 m_free(control);
823 goto cleanup;
824 }
35fbb1d9
DRJ
825 }
826
3e1837ce 827 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags,
e54488bb 828 &uap->sysmsg_szresult);
35fbb1d9
DRJ
829
830cleanup:
8130f673
MD
831 iovec_free(&iov, aiov);
832cleanup2:
35fbb1d9
DRJ
833 if (sa)
834 FREE(sa, M_SONAME);
984263bc
MD
835 return (error);
836}
984263bc 837
41c20dac 838/*
3e1837ce
DRJ
839 * kern_recvmsg() takes a handle to sa and control. If the handle is non-
840 * null, it returns a dynamically allocated struct sockaddr and an mbuf.
841 * Don't forget to FREE() and m_free() these if they are returned.
41c20dac 842 */
984263bc 843int
3e1837ce 844kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio,
e54488bb 845 struct mbuf **control, int *flags, size_t *res)
984263bc 846{
dadab5e9
MD
847 struct thread *td = curthread;
848 struct proc *p = td->td_proc;
984263bc 849 struct file *fp;
e54488bb
MD
850 size_t len;
851 int error;
9ba76b73 852 int lflags;
984263bc 853 struct socket *so;
984263bc
MD
854#ifdef KTRACE
855 struct iovec *ktriov = NULL;
856 struct uio ktruio;
857#endif
858
859 error = holdsock(p->p_fd, s, &fp);
860 if (error)
861 return (error);
984263bc 862#ifdef KTRACE
dadab5e9 863 if (KTRPOINT(td, KTR_GENIO)) {
3e1837ce 864 int iovlen = auio->uio_iovcnt * sizeof (struct iovec);
984263bc
MD
865
866 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
3e1837ce
DRJ
867 bcopy(auio->uio_iov, ktriov, iovlen);
868 ktruio = *auio;
984263bc
MD
869 }
870#endif
3e1837ce 871 len = auio->uio_resid;
984263bc 872 so = (struct socket *)fp->f_data;
9ba76b73
MD
873
874 if (flags == NULL || (*flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) {
875 if (fp->f_flag & FNONBLOCK) {
876 if (flags) {
877 *flags |= MSG_FNONBLOCKING;
878 } else {
879 lflags = MSG_FNONBLOCKING;
880 flags = &lflags;
881 }
882 }
883 }
884
6b6e0885 885 error = so_pru_soreceive(so, sa, auio, NULL, control, flags);
984263bc 886 if (error) {
3e1837ce 887 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
888 error == EINTR || error == EWOULDBLOCK))
889 error = 0;
890 }
891#ifdef KTRACE
892 if (ktriov != NULL) {
893 if (error == 0) {
894 ktruio.uio_iov = ktriov;
3e1837ce 895 ktruio.uio_resid = len - auio->uio_resid;
9fb04d14 896 ktrgenio(td->td_lwp, s, UIO_READ, &ktruio, error);
984263bc
MD
897 }
898 FREE(ktriov, M_TEMP);
899 }
900#endif
35fbb1d9 901 if (error == 0)
3e1837ce 902 *res = len - auio->uio_resid;
9f87144f 903 fdrop(fp);
984263bc
MD
904 return (error);
905}
906
41c20dac
MD
907/*
908 * recvfrom_args(int s, caddr_t buf, size_t len, int flags,
909 * caddr_t from, int *fromlenaddr)
3919ced0
MD
910 *
911 * MPALMOSTSAFE
41c20dac 912 */
984263bc 913int
753fd850 914sys_recvfrom(struct recvfrom_args *uap)
984263bc 915{
3e1837ce
DRJ
916 struct thread *td = curthread;
917 struct uio auio;
984263bc 918 struct iovec aiov;
3e1837ce 919 struct sockaddr *sa = NULL;
35fbb1d9 920 int error, fromlen;
984263bc 921
3e1837ce 922 if (uap->from && uap->fromlenaddr) {
35fbb1d9 923 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen));
984263bc
MD
924 if (error)
925 return (error);
3e1837ce
DRJ
926 if (fromlen < 0)
927 return (EINVAL);
35fbb1d9
DRJ
928 } else {
929 fromlen = 0;
930 }
984263bc
MD
931 aiov.iov_base = uap->buf;
932 aiov.iov_len = uap->len;
3e1837ce
DRJ
933 auio.uio_iov = &aiov;
934 auio.uio_iovcnt = 1;
935 auio.uio_offset = 0;
936 auio.uio_resid = uap->len;
937 auio.uio_segflg = UIO_USERSPACE;
938 auio.uio_rw = UIO_READ;
939 auio.uio_td = td;
984263bc 940
3e1837ce 941 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL,
e54488bb 942 &uap->flags, &uap->sysmsg_szresult);
984263bc 943
3e1837ce 944 if (error == 0 && uap->from) {
c3996757
MD
945 /* note: sa may still be NULL */
946 if (sa) {
947 fromlen = MIN(fromlen, sa->sa_len);
948 error = copyout(sa, uap->from, fromlen);
949 } else {
950 fromlen = 0;
951 }
952 if (error == 0) {
35fbb1d9 953 error = copyout(&fromlen, uap->fromlenaddr,
c3996757
MD
954 sizeof(fromlen));
955 }
35fbb1d9 956 }
3e1837ce
DRJ
957 if (sa)
958 FREE(sa, M_SONAME);
984263bc 959
984263bc
MD
960 return (error);
961}
984263bc 962
41c20dac
MD
963/*
964 * recvmsg_args(int s, struct msghdr *msg, int flags)
3919ced0
MD
965 *
966 * MPALMOSTSAFE
41c20dac 967 */
984263bc 968int
753fd850 969sys_recvmsg(struct recvmsg_args *uap)
984263bc 970{
3e1837ce 971 struct thread *td = curthread;
984263bc 972 struct msghdr msg;
3e1837ce 973 struct uio auio;
75a872f8
DRJ
974 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
975 struct mbuf *m, *control = NULL;
3e1837ce 976 struct sockaddr *sa = NULL;
35fbb1d9 977 caddr_t ctlbuf;
3e1837ce 978 socklen_t *ufromlenp, *ucontrollenp;
75a872f8 979 int error, fromlen, controllen, len, flags, *uflagsp;
984263bc 980
35fbb1d9
DRJ
981 /*
982 * This copyin handles everything except the iovec.
983 */
984 error = copyin(uap->msg, &msg, sizeof(msg));
984263bc
MD
985 if (error)
986 return (error);
35fbb1d9 987
3e1837ce
DRJ
988 if (msg.msg_name && msg.msg_namelen < 0)
989 return (EINVAL);
990 if (msg.msg_control && msg.msg_controllen < 0)
991 return (EINVAL);
992
993 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 994 msg_namelen));
35fbb1d9 995 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 996 msg_controllen));
3e1837ce 997 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 998 msg_flags));
35fbb1d9
DRJ
999
1000 /*
3e1837ce 1001 * Populate auio.
35fbb1d9 1002 */
75a872f8 1003 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen,
ef5c76d7 1004 &auio.uio_resid);
984263bc 1005 if (error)
75a872f8 1006 return (error);
3e1837ce
DRJ
1007 auio.uio_iov = iov;
1008 auio.uio_iovcnt = msg.msg_iovlen;
1009 auio.uio_offset = 0;
3e1837ce
DRJ
1010 auio.uio_segflg = UIO_USERSPACE;
1011 auio.uio_rw = UIO_READ;
1012 auio.uio_td = td;
35fbb1d9 1013
b7ccd728 1014 flags = uap->flags;
35fbb1d9 1015
e54488bb
MD
1016 error = kern_recvmsg(uap->s,
1017 (msg.msg_name ? &sa : NULL), &auio,
1018 (msg.msg_control ? &control : NULL), &flags,
1019 &uap->sysmsg_szresult);
35fbb1d9
DRJ
1020
1021 /*
3e1837ce 1022 * Conditionally copyout the name and populate the namelen field.
35fbb1d9 1023 */
3e1837ce 1024 if (error == 0 && msg.msg_name) {
b9cd15b9
YT
1025 /* note: sa may still be NULL */
1026 if (sa != NULL) {
1027 fromlen = MIN(msg.msg_namelen, sa->sa_len);
1028 error = copyout(sa, msg.msg_name, fromlen);
b4354d10 1029 } else {
b9cd15b9 1030 fromlen = 0;
b4354d10 1031 }
35fbb1d9 1032 if (error == 0)
3e1837ce
DRJ
1033 error = copyout(&fromlen, ufromlenp,
1034 sizeof(*ufromlenp));
984263bc 1035 }
35fbb1d9
DRJ
1036
1037 /*
1038 * Copyout msg.msg_control and msg.msg_controllen.
1039 */
3e1837ce 1040 if (error == 0 && msg.msg_control) {
35fbb1d9 1041 len = msg.msg_controllen;
3e1837ce
DRJ
1042 m = control;
1043 ctlbuf = (caddr_t)msg.msg_control;
35fbb1d9
DRJ
1044
1045 while(m && len > 0) {
1046 unsigned int tocopy;
1047
1048 if (len >= m->m_len) {
1049 tocopy = m->m_len;
1050 } else {
1051 msg.msg_flags |= MSG_CTRUNC;
1052 tocopy = len;
1053 }
1054
1055 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy);
1056 if (error)
1057 goto cleanup;
1058
1059 ctlbuf += tocopy;
1060 len -= tocopy;
1061 m = m->m_next;
1062 }
3e1837ce
DRJ
1063 controllen = ctlbuf - (caddr_t)msg.msg_control;
1064 error = copyout(&controllen, ucontrollenp,
35fbb1d9
DRJ
1065 sizeof(*ucontrollenp));
1066 }
1067
3e1837ce
DRJ
1068 if (error == 0)
1069 error = copyout(&flags, uflagsp, sizeof(*uflagsp));
1070
35fbb1d9 1071cleanup:
3e1837ce
DRJ
1072 if (sa)
1073 FREE(sa, M_SONAME);
75a872f8 1074 iovec_free(&iov, aiov);
3e1837ce
DRJ
1075 if (control)
1076 m_freem(control);
984263bc
MD
1077 return (error);
1078}
1079
41c20dac 1080/*
201305ad
DRJ
1081 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an
1082 * in kernel pointer instead of a userland pointer. This allows us
1083 * to manipulate socket options in the emulation code.
41c20dac 1084 */
984263bc 1085int
201305ad 1086kern_setsockopt(int s, struct sockopt *sopt)
984263bc 1087{
dadab5e9
MD
1088 struct thread *td = curthread;
1089 struct proc *p = td->td_proc;
984263bc 1090 struct file *fp;
984263bc
MD
1091 int error;
1092
b4354d10 1093 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0)
984263bc 1094 return (EFAULT);
b8237e23
AH
1095 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0)
1096 return (EINVAL);
201305ad 1097 if (sopt->sopt_valsize < 0)
984263bc
MD
1098 return (EINVAL);
1099
201305ad 1100 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1101 if (error)
1102 return (error);
1103
201305ad 1104 error = sosetopt((struct socket *)fp->f_data, sopt);
9f87144f 1105 fdrop(fp);
201305ad
DRJ
1106 return (error);
1107}
1108
1109/*
1110 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize)
3919ced0
MD
1111 *
1112 * MPALMOSTSAFE
201305ad
DRJ
1113 */
1114int
753fd850 1115sys_setsockopt(struct setsockopt_args *uap)
201305ad
DRJ
1116{
1117 struct thread *td = curthread;
1118 struct sockopt sopt;
1119 int error;
1120
984263bc
MD
1121 sopt.sopt_level = uap->level;
1122 sopt.sopt_name = uap->name;
984263bc 1123 sopt.sopt_valsize = uap->valsize;
dadab5e9 1124 sopt.sopt_td = td;
aca22a94 1125 sopt.sopt_val = NULL;
201305ad 1126
b4354d10
MD
1127 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE)
1128 return (EINVAL);
792239df
AE
1129 if (uap->val) {
1130 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK);
1131 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize);
1132 if (error)
1133 goto out;
792239df 1134 }
aca22a94 1135
201305ad 1136 error = kern_setsockopt(uap->s, &sopt);
de0003fe 1137out:
792239df
AE
1138 if (uap->val)
1139 kfree(sopt.sopt_val, M_TEMP);
984263bc
MD
1140 return(error);
1141}
1142
41c20dac 1143/*
201305ad
DRJ
1144 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an
1145 * in kernel pointer instead of a userland pointer. This allows us
1146 * to manipulate socket options in the emulation code.
41c20dac 1147 */
984263bc 1148int
201305ad 1149kern_getsockopt(int s, struct sockopt *sopt)
984263bc 1150{
dadab5e9
MD
1151 struct thread *td = curthread;
1152 struct proc *p = td->td_proc;
201305ad
DRJ
1153 struct file *fp;
1154 int error;
984263bc 1155
b4354d10 1156 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0)
201305ad 1157 return (EFAULT);
b8237e23
AH
1158 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0)
1159 return (EINVAL);
b4354d10 1160 if (sopt->sopt_valsize < 0 || sopt->sopt_valsize > SOMAXOPT_SIZE)
201305ad
DRJ
1161 return (EINVAL);
1162
1163 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1164 if (error)
1165 return (error);
201305ad
DRJ
1166
1167 error = sogetopt((struct socket *)fp->f_data, sopt);
9f87144f 1168 fdrop(fp);
201305ad
DRJ
1169 return (error);
1170}
1171
1172/*
3919ced0
MD
1173 * getsockopt_args(int s, int level, int name, caddr_t val, int *avalsize)
1174 *
1175 * MPALMOSTSAFE
201305ad
DRJ
1176 */
1177int
753fd850 1178sys_getsockopt(struct getsockopt_args *uap)
201305ad
DRJ
1179{
1180 struct thread *td = curthread;
1181 struct sockopt sopt;
1182 int error, valsize;
1183
984263bc 1184 if (uap->val) {
201305ad
DRJ
1185 error = copyin(uap->avalsize, &valsize, sizeof(valsize));
1186 if (error)
984263bc 1187 return (error);
984263bc
MD
1188 } else {
1189 valsize = 0;
1190 }
1191
984263bc
MD
1192 sopt.sopt_level = uap->level;
1193 sopt.sopt_name = uap->name;
201305ad 1194 sopt.sopt_valsize = valsize;
dadab5e9 1195 sopt.sopt_td = td;
aca22a94 1196 sopt.sopt_val = NULL;
984263bc 1197
aca22a94 1198 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE)
b4354d10 1199 return (EINVAL);
792239df
AE
1200 if (uap->val) {
1201 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK);
1202 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize);
1203 if (error)
1204 goto out;
792239df 1205 }
aca22a94 1206
201305ad 1207 error = kern_getsockopt(uap->s, &sopt);
de0003fe
AE
1208 if (error)
1209 goto out;
1210 valsize = sopt.sopt_valsize;
1211 error = copyout(&valsize, uap->avalsize, sizeof(valsize));
1212 if (error)
1213 goto out;
792239df
AE
1214 if (uap->val)
1215 error = copyout(sopt.sopt_val, uap->val, sopt.sopt_valsize);
de0003fe 1216out:
792239df
AE
1217 if (uap->val)
1218 kfree(sopt.sopt_val, M_TEMP);
984263bc
MD
1219 return (error);
1220}
1221
1222/*
5969a6f1
DRJ
1223 * The second argument to kern_getsockname() is a handle to a struct sockaddr.
1224 * This allows kern_getsockname() to return a pointer to an allocated struct
1225 * sockaddr which must be freed later with FREE(). The caller must
1226 * initialize *name to NULL.
984263bc 1227 */
5969a6f1
DRJ
1228int
1229kern_getsockname(int s, struct sockaddr **name, int *namelen)
984263bc 1230{
dadab5e9
MD
1231 struct thread *td = curthread;
1232 struct proc *p = td->td_proc;
984263bc 1233 struct file *fp;
dadab5e9 1234 struct socket *so;
5969a6f1
DRJ
1235 struct sockaddr *sa = NULL;
1236 int error;
984263bc 1237
5969a6f1 1238 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1239 if (error)
1240 return (error);
5969a6f1 1241 if (*namelen < 0) {
9f87144f 1242 fdrop(fp);
984263bc
MD
1243 return (EINVAL);
1244 }
1245 so = (struct socket *)fp->f_data;
6b6e0885 1246 error = so_pru_sockaddr(so, &sa);
5969a6f1 1247 if (error == 0) {
b4354d10 1248 if (sa == NULL) {
5969a6f1
DRJ
1249 *namelen = 0;
1250 } else {
1251 *namelen = MIN(*namelen, sa->sa_len);
1252 *name = sa;
1253 }
984263bc
MD
1254 }
1255
9f87144f 1256 fdrop(fp);
984263bc
MD
1257 return (error);
1258}
1259
5969a6f1
DRJ
1260/*
1261 * getsockname_args(int fdes, caddr_t asa, int *alen)
1262 *
1263 * Get socket name.
3919ced0
MD
1264 *
1265 * MPALMOSTSAFE
5969a6f1 1266 */
984263bc 1267int
753fd850 1268sys_getsockname(struct getsockname_args *uap)
984263bc 1269{
5969a6f1
DRJ
1270 struct sockaddr *sa = NULL;
1271 int error, sa_len;
1272
1273 error = copyin(uap->alen, &sa_len, sizeof(sa_len));
1274 if (error)
1275 return (error);
1276
1277 error = kern_getsockname(uap->fdes, &sa, &sa_len);
984263bc 1278
5969a6f1
DRJ
1279 if (error == 0)
1280 error = copyout(sa, uap->asa, sa_len);
1281 if (error == 0)
1282 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen));
1283 if (sa)
1284 FREE(sa, M_SONAME);
1285 return (error);
984263bc
MD
1286}
1287
984263bc 1288/*
5969a6f1
DRJ
1289 * The second argument to kern_getpeername() is a handle to a struct sockaddr.
1290 * This allows kern_getpeername() to return a pointer to an allocated struct
1291 * sockaddr which must be freed later with FREE(). The caller must
1292 * initialize *name to NULL.
984263bc 1293 */
5969a6f1
DRJ
1294int
1295kern_getpeername(int s, struct sockaddr **name, int *namelen)
984263bc 1296{
dadab5e9
MD
1297 struct thread *td = curthread;
1298 struct proc *p = td->td_proc;
984263bc 1299 struct file *fp;
dadab5e9 1300 struct socket *so;
5969a6f1
DRJ
1301 struct sockaddr *sa = NULL;
1302 int error;
984263bc 1303
5969a6f1 1304 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1305 if (error)
1306 return (error);
5969a6f1 1307 if (*namelen < 0) {
9f87144f 1308 fdrop(fp);
5969a6f1
DRJ
1309 return (EINVAL);
1310 }
984263bc
MD
1311 so = (struct socket *)fp->f_data;
1312 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
9f87144f 1313 fdrop(fp);
984263bc
MD
1314 return (ENOTCONN);
1315 }
6b6e0885 1316 error = so_pru_peeraddr(so, &sa);
5969a6f1 1317 if (error == 0) {
b4354d10 1318 if (sa == NULL) {
5969a6f1
DRJ
1319 *namelen = 0;
1320 } else {
1321 *namelen = MIN(*namelen, sa->sa_len);
1322 *name = sa;
1323 }
984263bc 1324 }
5969a6f1 1325
9f87144f 1326 fdrop(fp);
984263bc
MD
1327 return (error);
1328}
1329
5969a6f1
DRJ
1330/*
1331 * getpeername_args(int fdes, caddr_t asa, int *alen)
1332 *
1333 * Get name of peer for connected socket.
3919ced0
MD
1334 *
1335 * MPALMOSTSAFE
5969a6f1 1336 */
984263bc 1337int
753fd850 1338sys_getpeername(struct getpeername_args *uap)
984263bc 1339{
5969a6f1
DRJ
1340 struct sockaddr *sa = NULL;
1341 int error, sa_len;
1342
1343 error = copyin(uap->alen, &sa_len, sizeof(sa_len));
1344 if (error)
1345 return (error);
1346
1347 error = kern_getpeername(uap->fdes, &sa, &sa_len);
1348
1349 if (error == 0)
1350 error = copyout(sa, uap->asa, sa_len);
1351 if (error == 0)
1352 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen));
1353 if (sa)
1354 FREE(sa, M_SONAME);
1355 return (error);
984263bc
MD
1356}
1357
984263bc 1358int
02844a31 1359getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
984263bc
MD
1360{
1361 struct sockaddr *sa;
1362 int error;
1363
02844a31 1364 *namp = NULL;
984263bc
MD
1365 if (len > SOCK_MAXADDRLEN)
1366 return ENAMETOOLONG;
02844a31
MD
1367 if (len < offsetof(struct sockaddr, sa_data[0]))
1368 return EDOM;
984263bc
MD
1369 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1370 error = copyin(uaddr, sa, len);
1371 if (error) {
1372 FREE(sa, M_SONAME);
1373 } else {
75a872f8
DRJ
1374#if BYTE_ORDER != BIG_ENDIAN
1375 /*
1376 * The bind(), connect(), and sendto() syscalls were not
1377 * versioned for COMPAT_43. Thus, this check must stay.
1378 */
984263bc
MD
1379 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1380 sa->sa_family = sa->sa_len;
1381#endif
1382 sa->sa_len = len;
1383 *namp = sa;
1384 }
1385 return error;
1386}
1387
1388/*
b4caac98
MD
1389 * Detach a mapped page and release resources back to the system.
1390 * We must release our wiring and if the object is ripped out
1391 * from under the vm_page we become responsible for freeing the
b5c4d81f 1392 * page.
b4caac98 1393 *
b5c4d81f 1394 * MPSAFE
b4caac98
MD
1395 */
1396static void
013a4c0e 1397sf_buf_mfree(void *arg)
b4caac98 1398{
5c5185ae 1399 struct sf_buf *sf = arg;
b4caac98 1400 vm_page_t m;
e66bab2b 1401
5c5185ae 1402 m = sf_buf_page(sf);
58c2553a
MD
1403 if (sf_buf_free(sf)) {
1404 /* sf invalid now */
b12defdc 1405 vm_page_busy_wait(m, FALSE, "sockpgf");
321e057f 1406 vm_page_unwire(m, 0);
b12defdc 1407 vm_page_wakeup(m);
321e057f
SZ
1408 if (m->wire_count == 0 && m->object == NULL)
1409 vm_page_try_to_free(m);
b4caac98
MD
1410 }
1411}
1412
1413/*
984263bc
MD
1414 * sendfile(2).
1415 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1416 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1417 *
1418 * Send a file specified by 'fd' and starting at 'offset' to a socket
1419 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1420 * nbytes == 0. Optionally add a header and/or trailer to the socket
1421 * output. If specified, write the total number of bytes sent into *sbytes.
75a872f8
DRJ
1422 *
1423 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused
1424 * the headers to count against the remaining bytes to be sent from
1425 * the file descriptor. We may wish to implement a compatibility syscall
1426 * in the future.
3919ced0
MD
1427 *
1428 * MPALMOSTSAFE
984263bc
MD
1429 */
1430int
753fd850 1431sys_sendfile(struct sendfile_args *uap)
984263bc 1432{
dadab5e9
MD
1433 struct thread *td = curthread;
1434 struct proc *p = td->td_proc;
984263bc 1435 struct file *fp;
75a872f8 1436 struct vnode *vp = NULL;
984263bc 1437 struct sf_hdtr hdtr;
75a872f8
DRJ
1438 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
1439 struct uio auio;
30eeba44 1440 struct mbuf *mheader = NULL;
e54488bb
MD
1441 size_t hbytes = 0;
1442 size_t tbytes;
1443 off_t hdtr_size = 0;
1444 off_t sbytes;
1445 int error;
984263bc 1446
dadab5e9 1447 KKASSERT(p);
dadab5e9 1448
984263bc
MD
1449 /*
1450 * Do argument checking. Must be a regular file in, stream
1451 * type and connected socket out, positive offset.
1452 */
fa541be6 1453 fp = holdfp(p->p_fd, uap->fd, FREAD);
984263bc 1454 if (fp == NULL) {
f0846490 1455 return (EBADF);
984263bc
MD
1456 }
1457 if (fp->f_type != DTYPE_VNODE) {
9f87144f 1458 fdrop(fp);
f0846490 1459 return (EINVAL);
984263bc
MD
1460 }
1461 vp = (struct vnode *)fp->f_data;
1462 vref(vp);
9f87144f 1463 fdrop(fp);
75a872f8
DRJ
1464
1465 /*
1466 * If specified, get the pointer to the sf_hdtr struct for
1467 * any headers/trailers.
1468 */
1469 if (uap->hdtr) {
1470 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1471 if (error)
1472 goto done;
1473 /*
1474 * Send any headers.
1475 */
1476 if (hdtr.headers) {
1477 error = iovec_copyin(hdtr.headers, &iov, aiov,
ef5c76d7 1478 hdtr.hdr_cnt, &hbytes);
75a872f8
DRJ
1479 if (error)
1480 goto done;
1481 auio.uio_iov = iov;
1482 auio.uio_iovcnt = hdtr.hdr_cnt;
1483 auio.uio_offset = 0;
1484 auio.uio_segflg = UIO_USERSPACE;
1485 auio.uio_rw = UIO_WRITE;
1486 auio.uio_td = td;
30eeba44 1487 auio.uio_resid = hbytes;
75a872f8 1488
e12241e1 1489 mheader = m_uiomove(&auio);
75a872f8
DRJ
1490
1491 iovec_free(&iov, aiov);
30eeba44 1492 if (mheader == NULL)
75a872f8 1493 goto done;
75a872f8
DRJ
1494 }
1495 }
1496
30eeba44 1497 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, mheader,
3919ced0 1498 &sbytes, uap->flags);
75a872f8
DRJ
1499 if (error)
1500 goto done;
1501
1502 /*
1503 * Send trailers. Wimp out and use writev(2).
1504 */
1505 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1506 error = iovec_copyin(hdtr.trailers, &iov, aiov,
ef5c76d7 1507 hdtr.trl_cnt, &auio.uio_resid);
75a872f8
DRJ
1508 if (error)
1509 goto done;
1510 auio.uio_iov = iov;
1511 auio.uio_iovcnt = hdtr.trl_cnt;
1512 auio.uio_offset = 0;
1513 auio.uio_segflg = UIO_USERSPACE;
1514 auio.uio_rw = UIO_WRITE;
1515 auio.uio_td = td;
1516
30eeba44 1517 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &tbytes);
75a872f8
DRJ
1518
1519 iovec_free(&iov, aiov);
1520 if (error)
1521 goto done;
30eeba44 1522 hdtr_size += tbytes; /* trailer bytes successfully sent */
75a872f8
DRJ
1523 }
1524
1525done:
3919ced0
MD
1526 if (vp)
1527 vrele(vp);
75a872f8
DRJ
1528 if (uap->sbytes != NULL) {
1529 sbytes += hdtr_size;
1530 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1531 }
75a872f8
DRJ
1532 return (error);
1533}
1534
1535int
06ecca5a 1536kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes,
b5c4d81f 1537 struct mbuf *mheader, off_t *sbytes, int flags)
75a872f8
DRJ
1538{
1539 struct thread *td = curthread;
1540 struct proc *p = td->td_proc;
1541 struct vm_object *obj;
1542 struct socket *so;
285332f0 1543 struct file *fp;
75a872f8
DRJ
1544 struct mbuf *m;
1545 struct sf_buf *sf;
1546 struct vm_page *pg;
1547 off_t off, xfsize;
39b3370f 1548 off_t hbytes = 0;
75a872f8
DRJ
1549 int error = 0;
1550
7540ab49
MD
1551 if (vp->v_type != VREG) {
1552 error = EINVAL;
1553 goto done0;
1554 }
1555 if ((obj = vp->v_object) == NULL) {
984263bc 1556 error = EINVAL;
285332f0 1557 goto done0;
984263bc 1558 }
06ecca5a 1559 error = holdsock(p->p_fd, sfd, &fp);
984263bc 1560 if (error)
285332f0 1561 goto done0;
984263bc
MD
1562 so = (struct socket *)fp->f_data;
1563 if (so->so_type != SOCK_STREAM) {
1564 error = EINVAL;
1565 goto done;
1566 }
1567 if ((so->so_state & SS_ISCONNECTED) == 0) {
1568 error = ENOTCONN;
1569 goto done;
1570 }
75a872f8 1571 if (offset < 0) {
984263bc
MD
1572 error = EINVAL;
1573 goto done;
1574 }
1575
75a872f8 1576 *sbytes = 0;
984263bc
MD
1577 /*
1578 * Protect against multiple writers to the socket.
1579 */
6d49aa6f 1580 ssb_lock(&so->so_snd, M_WAITOK);
984263bc
MD
1581
1582 /*
1583 * Loop through the pages in the file, starting with the requested
1584 * offset. Get a file page (do I/O if necessary), map the file page
1585 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1586 * it on the socket.
1587 */
39b3370f 1588 for (off = offset; ; off += xfsize, *sbytes += xfsize + hbytes) {
984263bc
MD
1589 vm_pindex_t pindex;
1590 vm_offset_t pgoff;
1591
1592 pindex = OFF_TO_IDX(off);
1593retry_lookup:
1594 /*
1595 * Calculate the amount to transfer. Not to exceed a page,
1596 * the EOF, or the passed in nbytes.
1597 */
57f7b636 1598 xfsize = vp->v_filesize - off;
984263bc
MD
1599 if (xfsize > PAGE_SIZE)
1600 xfsize = PAGE_SIZE;
1601 pgoff = (vm_offset_t)(off & PAGE_MASK);
1602 if (PAGE_SIZE - pgoff < xfsize)
1603 xfsize = PAGE_SIZE - pgoff;
75a872f8
DRJ
1604 if (nbytes && xfsize > (nbytes - *sbytes))
1605 xfsize = nbytes - *sbytes;
984263bc
MD
1606 if (xfsize <= 0)
1607 break;
1608 /*
1609 * Optimize the non-blocking case by looking at the socket space
1610 * before going to the extra work of constituting the sf_buf.
1611 */
6d49aa6f 1612 if ((fp->f_flag & FNONBLOCK) && ssb_space(&so->so_snd) <= 0) {
984263bc
MD
1613 if (so->so_state & SS_CANTSENDMORE)
1614 error = EPIPE;
1615 else
1616 error = EAGAIN;
6d49aa6f 1617 ssb_unlock(&so->so_snd);
984263bc
MD
1618 goto done;
1619 }
1620 /*
1621 * Attempt to look up the page.
1622 *
06ecca5a 1623 * Allocate if not found, wait and loop if busy, then
5fd012e0
MD
1624 * wire the page. critical section protection is
1625 * required to maintain the object association (an
1626 * interrupt can free the page) through to the
1627 * vm_page_wire() call.
984263bc 1628 */
b12defdc
MD
1629 vm_object_hold(obj);
1630 pg = vm_page_lookup_busy_try(obj, pindex, TRUE, &error);
1631 if (error) {
1632 vm_page_sleep_busy(pg, TRUE, "sfpbsy");
1633 vm_object_drop(obj);
1634 goto retry_lookup;
1635 }
984263bc 1636 if (pg == NULL) {
d2d8515b
MD
1637 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL |
1638 VM_ALLOC_NULL_OK);
984263bc 1639 if (pg == NULL) {
4ecf7cc9 1640 vm_wait(0);
b12defdc 1641 vm_object_drop(obj);
984263bc
MD
1642 goto retry_lookup;
1643 }
984263bc 1644 }
b12defdc
MD
1645 vm_page_wire(pg);
1646 vm_object_drop(obj);
984263bc
MD
1647
1648 /*
1649 * If page is not valid for what we need, initiate I/O
1650 */
1651
1652 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1653 struct uio auio;
1654 struct iovec aiov;
1655 int bsize;
1656
1657 /*
1658 * Ensure that our page is still around when the I/O
1659 * completes.
1660 */
1661 vm_page_io_start(pg);
b12defdc 1662 vm_page_wakeup(pg);
984263bc
MD
1663
1664 /*
1665 * Get the page from backing store.
1666 */
1667 bsize = vp->v_mount->mnt_stat.f_iosize;
1668 auio.uio_iov = &aiov;
1669 auio.uio_iovcnt = 1;
1670 aiov.iov_base = 0;
1671 aiov.iov_len = MAXBSIZE;
1672 auio.uio_resid = MAXBSIZE;
1673 auio.uio_offset = trunc_page(off);
1674 auio.uio_segflg = UIO_NOCOPY;
1675 auio.uio_rw = UIO_READ;
dadab5e9 1676 auio.uio_td = td;
ab6f251b 1677 vn_lock(vp, LK_SHARED | LK_RETRY);
dadab5e9
MD
1678 error = VOP_READ(vp, &auio,
1679 IO_VMIO | ((MAXBSIZE / bsize) << 16),
9910d07b 1680 td->td_ucred);
a11aaa81 1681 vn_unlock(vp);
984263bc 1682 vm_page_flag_clear(pg, PG_ZERO);
b12defdc 1683 vm_page_busy_wait(pg, FALSE, "sockpg");
984263bc
MD
1684 vm_page_io_finish(pg);
1685 if (error) {
1686 vm_page_unwire(pg, 0);
b12defdc 1687 vm_page_wakeup(pg);
f2555cdd 1688 vm_page_try_to_free(pg);
6d49aa6f 1689 ssb_unlock(&so->so_snd);
984263bc
MD
1690 goto done;
1691 }
1692 }
1693
1694
1695 /*
1696 * Get a sendfile buf. We usually wait as long as necessary,
1697 * but this wait can be interrupted.
1698 */
5c5185ae 1699 if ((sf = sf_buf_alloc(pg)) == NULL) {
984263bc 1700 vm_page_unwire(pg, 0);
b12defdc 1701 vm_page_wakeup(pg);
f2555cdd 1702 vm_page_try_to_free(pg);
6d49aa6f 1703 ssb_unlock(&so->so_snd);
984263bc
MD
1704 error = EINTR;
1705 goto done;
1706 }
b12defdc 1707 vm_page_wakeup(pg);
984263bc 1708
984263bc
MD
1709 /*
1710 * Get an mbuf header and set it up as having external storage.
1711 */
74f1caca 1712 MGETHDR(m, MB_WAIT, MT_DATA);
984263bc
MD
1713 if (m == NULL) {
1714 error = ENOBUFS;
b4caac98 1715 sf_buf_free(sf);
6d49aa6f 1716 ssb_unlock(&so->so_snd);
984263bc
MD
1717 goto done;
1718 }
e66bab2b 1719
b542cd49 1720 m->m_ext.ext_free = sf_buf_mfree;
5c5185ae
SG
1721 m->m_ext.ext_ref = sf_buf_ref;
1722 m->m_ext.ext_arg = sf;
1723 m->m_ext.ext_buf = (void *)sf_buf_kva(sf);
984263bc 1724 m->m_ext.ext_size = PAGE_SIZE;
5c5185ae 1725 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
013a4c0e 1726 m->m_flags |= M_EXT;
984263bc 1727 m->m_pkthdr.len = m->m_len = xfsize;
b542cd49 1728 KKASSERT((m->m_flags & (M_EXT_CLUSTER)) == 0);
30eeba44 1729
39b3370f
JH
1730 if (mheader != NULL) {
1731 hbytes = mheader->m_pkthdr.len;
30eeba44
JH
1732 mheader->m_pkthdr.len += m->m_pkthdr.len;
1733 m_cat(mheader, m);
1734 m = mheader;
1735 mheader = NULL;
39b3370f
JH
1736 } else
1737 hbytes = 0;
30eeba44 1738
984263bc
MD
1739 /*
1740 * Add the buffer to the socket buffer chain.
1741 */
5fd012e0 1742 crit_enter();
984263bc
MD
1743retry_space:
1744 /*
1745 * Make sure that the socket is still able to take more data.
1746 * CANTSENDMORE being true usually means that the connection
1747 * was closed. so_error is true when an error was sensed after
1748 * a previous send.
1749 * The state is checked after the page mapping and buffer
1750 * allocation above since those operations may block and make
1751 * any socket checks stale. From this point forward, nothing
1752 * blocks before the pru_send (or more accurately, any blocking
1753 * results in a loop back to here to re-check).
1754 */
1755 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1756 if (so->so_state & SS_CANTSENDMORE) {
1757 error = EPIPE;
1758 } else {
1759 error = so->so_error;
1760 so->so_error = 0;
1761 }
1762 m_freem(m);
6d49aa6f 1763 ssb_unlock(&so->so_snd);
5fd012e0 1764 crit_exit();
984263bc
MD
1765 goto done;
1766 }
1767 /*
1768 * Wait for socket space to become available. We do this just
1769 * after checking the connection state above in order to avoid
6d49aa6f 1770 * a race condition with ssb_wait().
984263bc 1771 */
6d49aa6f 1772 if (ssb_space(&so->so_snd) < so->so_snd.ssb_lowat) {
9ba76b73 1773 if (fp->f_flag & FNONBLOCK) {
984263bc 1774 m_freem(m);
6d49aa6f 1775 ssb_unlock(&so->so_snd);
5fd012e0 1776 crit_exit();
984263bc
MD
1777 error = EAGAIN;
1778 goto done;
1779 }
6d49aa6f 1780 error = ssb_wait(&so->so_snd);
984263bc 1781 /*
6d49aa6f 1782 * An error from ssb_wait usually indicates that we've
984263bc
MD
1783 * been interrupted by a signal. If we've sent anything
1784 * then return bytes sent, otherwise return the error.
1785 */
1786 if (error) {
1787 m_freem(m);
6d49aa6f 1788 ssb_unlock(&so->so_snd);
5fd012e0 1789 crit_exit();
984263bc
MD
1790 goto done;
1791 }
1792 goto retry_space;
1793 }
084009e2 1794 error = so_pru_senda(so, 0, m, NULL, NULL, td);
5fd012e0 1795 crit_exit();
984263bc 1796 if (error) {
6d49aa6f 1797 ssb_unlock(&so->so_snd);
984263bc
MD
1798 goto done;
1799 }
1800 }
d785f69d 1801 if (mheader != NULL) {
39b3370f 1802 *sbytes += mheader->m_pkthdr.len;
084009e2 1803 error = so_pru_senda(so, 0, mheader, NULL, NULL, td);
d785f69d
JH
1804 mheader = NULL;
1805 }
6d49aa6f 1806 ssb_unlock(&so->so_snd);
984263bc 1807
984263bc 1808done:
9f87144f 1809 fdrop(fp);
285332f0 1810done0:
30eeba44
JH
1811 if (mheader != NULL)
1812 m_freem(mheader);
984263bc
MD
1813 return (error);
1814}
78812139 1815
3919ced0
MD
1816/*
1817 * MPALMOSTSAFE
1818 */
78812139 1819int
753fd850 1820sys_sctp_peeloff(struct sctp_peeloff_args *uap)
78812139
EN
1821{
1822#ifdef SCTP
1823 struct thread *td = curthread;
f3a2d8c4 1824 struct filedesc *fdp = td->td_proc->p_fd;
78812139
EN
1825 struct file *lfp = NULL;
1826 struct file *nfp = NULL;
1827 int error;
1828 struct socket *head, *so;
1829 caddr_t assoc_id;
1830 int fd;
1831 short fflag; /* type must match fp->f_flag */
1832
1833 assoc_id = uap->name;
ef2b8c7d 1834 error = holdsock(td->td_proc->p_fd, uap->sd, &lfp);
3919ced0 1835 if (error)
78812139 1836 return (error);
3919ced0 1837
78812139
EN
1838 crit_enter();
1839 head = (struct socket *)lfp->f_data;
1840 error = sctp_can_peel_off(head, assoc_id);
1841 if (error) {
1842 crit_exit();
1843 goto done;
1844 }
1845 /*
1846 * At this point we know we do have a assoc to pull
1847 * we proceed to get the fd setup. This may block
1848 * but that is ok.
1849 */
1850
1851 fflag = lfp->f_flag;
f3a2d8c4 1852 error = falloc(td->td_lwp, &nfp, &fd);
78812139
EN
1853 if (error) {
1854 /*
1855 * Probably ran out of file descriptors. Put the
1856 * unaccepted connection back onto the queue and
1857 * do another wakeup so some other process might
1858 * have a chance at it.
1859 */
1860 crit_exit();
1861 goto done;
1862 }
e54488bb 1863 uap->sysmsg_iresult = fd;
78812139
EN
1864
1865 so = sctp_get_peeloff(head, assoc_id, &error);
1866 if (so == NULL) {
1867 /*
1868 * Either someone else peeled it off OR
1869 * we can't get a socket.
1870 */
1871 goto noconnection;
1872 }
6cef7136
MD
1873 soreference(so); /* reference needed */
1874 soclrstate(so, SS_NOFDREF | SS_COMP); /* when clearing NOFDREF */
78812139
EN
1875 so->so_head = NULL;
1876 if (head->so_sigio != NULL)
b5c4d81f 1877 fsetown(fgetown(&head->so_sigio), &so->so_sigio);
78812139 1878
fbb4eeab 1879 nfp->f_type = DTYPE_SOCKET;
78812139
EN
1880 nfp->f_flag = fflag;
1881 nfp->f_ops = &socketops;
fbb4eeab 1882 nfp->f_data = so;
78812139
EN
1883
1884noconnection:
1885 /*
259b8ea0
MD
1886 * Assign the file pointer to the reserved descriptor, or clear
1887 * the reserved descriptor if an error occured.
78812139 1888 */
fa541be6 1889 if (error)
f3a2d8c4 1890 fsetfd(fdp, NULL, fd);
259b8ea0 1891 else
f3a2d8c4 1892 fsetfd(fdp, nfp, fd);
78812139
EN
1893 crit_exit();
1894 /*
1895 * Release explicitly held references before returning.
1896 */
1897done:
1898 if (nfp != NULL)
9f87144f
MD
1899 fdrop(nfp);
1900 fdrop(lfp);
78812139
EN
1901 return (error);
1902#else /* SCTP */
1903 return(EOPNOTSUPP);
1904#endif /* SCTP */
1905}