kernel - Introduce lightweight buffers
[dragonfly.git] / sys / kern / uipc_syscalls.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $
aca22a94 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.92 2008/11/26 13:10:56 sephe Exp $
984263bc
MD
39 */
40
984263bc 41#include "opt_ktrace.h"
78812139 42#include "opt_sctp.h"
984263bc
MD
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/kernel.h>
47#include <sys/sysproto.h>
48#include <sys/malloc.h>
49#include <sys/filedesc.h>
50#include <sys/event.h>
51#include <sys/proc.h>
52#include <sys/fcntl.h>
53#include <sys/file.h>
54#include <sys/filio.h>
5969a6f1 55#include <sys/kern_syscall.h>
984263bc
MD
56#include <sys/mbuf.h>
57#include <sys/protosw.h>
4860553a 58#include <sys/sfbuf.h>
984263bc
MD
59#include <sys/socket.h>
60#include <sys/socketvar.h>
6b6e0885 61#include <sys/socketops.h>
984263bc
MD
62#include <sys/uio.h>
63#include <sys/vnode.h>
64#include <sys/lock.h>
65#include <sys/mount.h>
66#ifdef KTRACE
67#include <sys/ktrace.h>
68#endif
69#include <vm/vm.h>
70#include <vm/vm_object.h>
71#include <vm/vm_page.h>
72#include <vm/vm_pageout.h>
73#include <vm/vm_kern.h>
74#include <vm/vm_extern.h>
dadab5e9 75#include <sys/file2.h>
770d4c4d 76#include <sys/signalvar.h>
df8d1020 77#include <sys/serialize.h>
984263bc 78
b44419cb
MD
79#include <sys/thread2.h>
80#include <sys/msgport2.h>
d6cb521d 81#include <sys/socketvar2.h>
684a93c4 82#include <sys/mplock2.h>
4599cf19 83#include <net/netmsg2.h>
b44419cb 84
78812139
EN
85#ifdef SCTP
86#include <netinet/sctp_peeloff.h>
87#endif /* SCTP */
88
984263bc
MD
89/*
90 * System call interface to the socket abstraction.
91 */
984263bc
MD
92
93extern struct fileops socketops;
94
41c20dac
MD
95/*
96 * socket_args(int domain, int type, int protocol)
97 */
984263bc 98int
75a872f8 99kern_socket(int domain, int type, int protocol, int *res)
984263bc 100{
dadab5e9 101 struct thread *td = curthread;
f3a2d8c4 102 struct filedesc *fdp = td->td_proc->p_fd;
984263bc
MD
103 struct socket *so;
104 struct file *fp;
105 int fd, error;
106
f3a2d8c4 107 KKASSERT(td->td_lwp);
dadab5e9 108
f3a2d8c4 109 error = falloc(td->td_lwp, &fp, &fd);
984263bc
MD
110 if (error)
111 return (error);
75a872f8 112 error = socreate(domain, &so, type, protocol, td);
984263bc 113 if (error) {
f3a2d8c4 114 fsetfd(fdp, NULL, fd);
984263bc 115 } else {
984263bc 116 fp->f_type = DTYPE_SOCKET;
fbb4eeab
JH
117 fp->f_flag = FREAD | FWRITE;
118 fp->f_ops = &socketops;
119 fp->f_data = so;
75a872f8 120 *res = fd;
f3a2d8c4 121 fsetfd(fdp, fp, fd);
984263bc 122 }
9f87144f 123 fdrop(fp);
984263bc
MD
124 return (error);
125}
126
3919ced0
MD
127/*
128 * MPALMOSTSAFE
129 */
02844a31 130int
753fd850 131sys_socket(struct socket_args *uap)
75a872f8
DRJ
132{
133 int error;
134
3919ced0 135 get_mplock();
75a872f8 136 error = kern_socket(uap->domain, uap->type, uap->protocol,
e54488bb 137 &uap->sysmsg_iresult);
3919ced0 138 rel_mplock();
75a872f8
DRJ
139
140 return (error);
141}
6b6e0885 142
75a872f8 143int
5969a6f1 144kern_bind(int s, struct sockaddr *sa)
984263bc 145{
dadab5e9
MD
146 struct thread *td = curthread;
147 struct proc *p = td->td_proc;
984263bc 148 struct file *fp;
984263bc
MD
149 int error;
150
dadab5e9 151 KKASSERT(p);
d83b97b9 152 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
153 if (error)
154 return (error);
d83b97b9 155 error = sobind((struct socket *)fp->f_data, sa, td);
9f87144f 156 fdrop(fp);
d83b97b9
MD
157 return (error);
158}
159
160/*
161 * bind_args(int s, caddr_t name, int namelen)
3919ced0
MD
162 *
163 * MPALMOSTSAFE
d83b97b9
MD
164 */
165int
753fd850 166sys_bind(struct bind_args *uap)
d83b97b9
MD
167{
168 struct sockaddr *sa;
169 int error;
170
984263bc 171 error = getsockaddr(&sa, uap->name, uap->namelen);
d83b97b9 172 if (error)
984263bc 173 return (error);
3919ced0 174 get_mplock();
5969a6f1 175 error = kern_bind(uap->s, sa);
3919ced0 176 rel_mplock();
984263bc 177 FREE(sa, M_SONAME);
d83b97b9 178
984263bc
MD
179 return (error);
180}
181
984263bc 182int
5969a6f1 183kern_listen(int s, int backlog)
984263bc 184{
dadab5e9
MD
185 struct thread *td = curthread;
186 struct proc *p = td->td_proc;
984263bc
MD
187 struct file *fp;
188 int error;
189
dadab5e9 190 KKASSERT(p);
5969a6f1 191 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
192 if (error)
193 return (error);
5969a6f1 194 error = solisten((struct socket *)fp->f_data, backlog, td);
9f87144f 195 fdrop(fp);
984263bc
MD
196 return(error);
197}
198
41c20dac 199/*
5969a6f1 200 * listen_args(int s, int backlog)
3919ced0
MD
201 *
202 * MPALMOSTSAFE
5969a6f1
DRJ
203 */
204int
753fd850 205sys_listen(struct listen_args *uap)
5969a6f1
DRJ
206{
207 int error;
208
3919ced0 209 get_mplock();
5969a6f1 210 error = kern_listen(uap->s, uap->backlog);
3919ced0 211 rel_mplock();
5969a6f1
DRJ
212 return (error);
213}
214
215/*
f172717f
JH
216 * Returns the accepted socket as well.
217 */
218static boolean_t
219soaccept_predicate(struct netmsg *msg0)
220{
221 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0;
222 struct socket *head = msg->nm_so;
223
224 if (head->so_error != 0) {
4599cf19 225 msg->nm_netmsg.nm_lmsg.ms_error = head->so_error;
f172717f
JH
226 return (TRUE);
227 }
228 if (!TAILQ_EMPTY(&head->so_comp)) {
229 /* Abuse nm_so field as copy in/copy out parameter. XXX JH */
230 msg->nm_so = TAILQ_FIRST(&head->so_comp);
231 TAILQ_REMOVE(&head->so_comp, msg->nm_so, so_list);
232 head->so_qlen--;
233
4599cf19 234 msg->nm_netmsg.nm_lmsg.ms_error = 0;
f172717f
JH
235 return (TRUE);
236 }
237 if (head->so_state & SS_CANTRCVMORE) {
4599cf19 238 msg->nm_netmsg.nm_lmsg.ms_error = ECONNABORTED;
f172717f
JH
239 return (TRUE);
240 }
9ba76b73 241 if (msg->nm_fflags & FNONBLOCK) {
4599cf19 242 msg->nm_netmsg.nm_lmsg.ms_error = EWOULDBLOCK;
f172717f
JH
243 return (TRUE);
244 }
245
246 return (FALSE);
247}
248
249/*
5969a6f1
DRJ
250 * The second argument to kern_accept() is a handle to a struct sockaddr.
251 * This allows kern_accept() to return a pointer to an allocated struct
d83b97b9
MD
252 * sockaddr which must be freed later with FREE(). The caller must
253 * initialize *name to NULL.
41c20dac 254 */
02844a31 255int
358e1f78 256kern_accept(int s, int fflags, struct sockaddr **name, int *namelen, int *res)
984263bc 257{
dadab5e9 258 struct thread *td = curthread;
f3a2d8c4 259 struct filedesc *fdp = td->td_proc->p_fd;
984263bc
MD
260 struct file *lfp = NULL;
261 struct file *nfp = NULL;
262 struct sockaddr *sa;
984263bc 263 struct socket *head, *so;
f172717f 264 struct netmsg_so_notify msg;
984263bc
MD
265 int fd;
266 u_int fflag; /* type must match fp->f_flag */
f172717f 267 int error, tmp;
984263bc 268
259b8ea0 269 *res = -1;
d83b97b9
MD
270 if (name && namelen && *namelen < 0)
271 return (EINVAL);
272
f3a2d8c4 273 error = holdsock(td->td_proc->p_fd, s, &lfp);
984263bc
MD
274 if (error)
275 return (error);
f172717f 276
f3a2d8c4 277 error = falloc(td->td_lwp, &nfp, &fd);
f172717f 278 if (error) { /* Probably ran out of file descriptors. */
9f87144f 279 fdrop(lfp);
f172717f
JH
280 return (error);
281 }
984263bc
MD
282 head = (struct socket *)lfp->f_data;
283 if ((head->so_options & SO_ACCEPTCONN) == 0) {
984263bc
MD
284 error = EINVAL;
285 goto done;
286 }
f172717f 287
358e1f78
MD
288 if (fflags & O_FBLOCKING)
289 fflags |= lfp->f_flag & ~FNONBLOCK;
290 else if (fflags & O_FNONBLOCKING)
291 fflags |= lfp->f_flag | FNONBLOCK;
292 else
293 fflags = lfp->f_flag;
294
f172717f 295 /* optimize for uniprocessor case later XXX JH */
48e7b118
MD
296 netmsg_init_abortable(&msg.nm_netmsg, head, &curthread->td_msgport,
297 0, netmsg_so_notify, netmsg_so_notify_doabort);
f172717f 298 msg.nm_predicate = soaccept_predicate;
358e1f78 299 msg.nm_fflags = fflags;
f172717f
JH
300 msg.nm_so = head;
301 msg.nm_etype = NM_REVENT;
48e7b118 302 error = lwkt_domsg(head->so_port, &msg.nm_netmsg.nm_lmsg, PCATCH);
f172717f 303 if (error)
984263bc 304 goto done;
984263bc
MD
305
306 /*
f172717f 307 * At this point we have the connection that's ready to be accepted.
984263bc 308 */
f172717f 309 so = msg.nm_so;
984263bc
MD
310
311 fflag = lfp->f_flag;
984263bc
MD
312
313 /* connection has been removed from the listen queue */
6d49aa6f 314 KNOTE(&head->so_rcv.ssb_sel.si_note, 0);
984263bc
MD
315
316 so->so_state &= ~SS_COMP;
317 so->so_head = NULL;
318 if (head->so_sigio != NULL)
319 fsetown(fgetown(head->so_sigio), &so->so_sigio);
320
fbb4eeab 321 nfp->f_type = DTYPE_SOCKET;
984263bc
MD
322 nfp->f_flag = fflag;
323 nfp->f_ops = &socketops;
fbb4eeab 324 nfp->f_data = so;
984263bc
MD
325 /* Sync socket nonblocking/async state with file flags */
326 tmp = fflag & FNONBLOCK;
9910d07b 327 fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td->td_ucred, NULL);
984263bc 328 tmp = fflag & FASYNC;
9910d07b 329 fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td->td_ucred, NULL);
d83b97b9
MD
330
331 sa = NULL;
984263bc 332 error = soaccept(so, &sa);
d83b97b9
MD
333
334 /*
335 * Set the returned name and namelen as applicable. Set the returned
336 * namelen to 0 for older code which might ignore the return value
337 * from accept.
338 */
339 if (error == 0) {
340 if (sa && name && namelen) {
341 if (*namelen > sa->sa_len)
342 *namelen = sa->sa_len;
343 *name = sa;
344 } else {
345 if (sa)
346 FREE(sa, M_SONAME);
984263bc 347 }
984263bc 348 }
984263bc 349
f172717f 350done:
984263bc 351 /*
259b8ea0
MD
352 * If an error occured clear the reserved descriptor, else associate
353 * nfp with it.
354 *
355 * Note that *res is normally ignored if an error is returned but
356 * a syscall message will still have access to the result code.
984263bc
MD
357 */
358 if (error) {
f3a2d8c4 359 fsetfd(fdp, NULL, fd);
259b8ea0
MD
360 } else {
361 *res = fd;
f3a2d8c4 362 fsetfd(fdp, nfp, fd);
984263bc 363 }
259b8ea0 364 fdrop(nfp);
9f87144f 365 fdrop(lfp);
984263bc
MD
366 return (error);
367}
368
d83b97b9 369/*
358e1f78 370 * accept(int s, caddr_t name, int *anamelen)
3919ced0
MD
371 *
372 * MPALMOSTSAFE
d83b97b9 373 */
984263bc 374int
753fd850 375sys_accept(struct accept_args *uap)
984263bc 376{
d83b97b9
MD
377 struct sockaddr *sa = NULL;
378 int sa_len;
379 int error;
380
381 if (uap->name) {
382 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len));
383 if (error)
384 return (error);
385
3919ced0 386 get_mplock();
e54488bb
MD
387 error = kern_accept(uap->s, 0, &sa, &sa_len,
388 &uap->sysmsg_iresult);
3919ced0 389 rel_mplock();
358e1f78
MD
390
391 if (error == 0)
392 error = copyout(sa, uap->name, sa_len);
393 if (error == 0) {
394 error = copyout(&sa_len, uap->anamelen,
395 sizeof(*uap->anamelen));
396 }
397 if (sa)
398 FREE(sa, M_SONAME);
399 } else {
3919ced0 400 get_mplock();
e54488bb
MD
401 error = kern_accept(uap->s, 0, NULL, 0,
402 &uap->sysmsg_iresult);
3919ced0 403 rel_mplock();
358e1f78
MD
404 }
405 return (error);
406}
407
408/*
b09fd398 409 * extaccept(int s, int fflags, caddr_t name, int *anamelen)
3919ced0
MD
410 *
411 * MPALMOSTSAFE
358e1f78
MD
412 */
413int
b09fd398 414sys_extaccept(struct extaccept_args *uap)
358e1f78
MD
415{
416 struct sockaddr *sa = NULL;
417 int sa_len;
418 int error;
419 int fflags = uap->flags & O_FMASK;
420
421 if (uap->name) {
422 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len));
423 if (error)
424 return (error);
425
3919ced0 426 get_mplock();
e54488bb
MD
427 error = kern_accept(uap->s, fflags, &sa, &sa_len,
428 &uap->sysmsg_iresult);
3919ced0 429 rel_mplock();
d83b97b9
MD
430
431 if (error == 0)
432 error = copyout(sa, uap->name, sa_len);
433 if (error == 0) {
434 error = copyout(&sa_len, uap->anamelen,
435 sizeof(*uap->anamelen));
436 }
437 if (sa)
438 FREE(sa, M_SONAME);
439 } else {
3919ced0 440 get_mplock();
e54488bb
MD
441 error = kern_accept(uap->s, fflags, NULL, 0,
442 &uap->sysmsg_iresult);
3919ced0 443 rel_mplock();
d83b97b9
MD
444 }
445 return (error);
984263bc
MD
446}
447
358e1f78 448
b44419cb
MD
449/*
450 * Returns TRUE if predicate satisfied.
451 */
452static boolean_t
453soconnected_predicate(struct netmsg *msg0)
454{
455 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0;
456 struct socket *so = msg->nm_so;
457
458 /* check predicate */
459 if (!(so->so_state & SS_ISCONNECTING) || so->so_error != 0) {
4599cf19 460 msg->nm_netmsg.nm_lmsg.ms_error = so->so_error;
b44419cb
MD
461 return (TRUE);
462 }
463
464 return (FALSE);
465}
466
02844a31 467int
358e1f78 468kern_connect(int s, int fflags, struct sockaddr *sa)
984263bc 469{
dadab5e9
MD
470 struct thread *td = curthread;
471 struct proc *p = td->td_proc;
984263bc 472 struct file *fp;
dadab5e9 473 struct socket *so;
8765eadc 474 int error, interrupted = 0;
984263bc 475
d83b97b9 476 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
477 if (error)
478 return (error);
479 so = (struct socket *)fp->f_data;
358e1f78
MD
480
481 if (fflags & O_FBLOCKING)
482 /* fflags &= ~FNONBLOCK; */;
483 else if (fflags & O_FNONBLOCKING)
484 fflags |= FNONBLOCK;
485 else
486 fflags = fp->f_flag;
487
8765eadc 488 if (so->so_state & SS_ISCONNECTING) {
984263bc
MD
489 error = EALREADY;
490 goto done;
491 }
dadab5e9 492 error = soconnect(so, sa, td);
984263bc
MD
493 if (error)
494 goto bad;
358e1f78 495 if ((fflags & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) {
984263bc
MD
496 error = EINPROGRESS;
497 goto done;
498 }
b44419cb
MD
499 if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
500 struct netmsg_so_notify msg;
b44419cb 501
48e7b118 502 netmsg_init_abortable(&msg.nm_netmsg, so,
4599cf19 503 &curthread->td_msgport,
a22c590e 504 0,
4599cf19
MD
505 netmsg_so_notify,
506 netmsg_so_notify_doabort);
b44419cb
MD
507 msg.nm_predicate = soconnected_predicate;
508 msg.nm_so = so;
509 msg.nm_etype = NM_REVENT;
48e7b118 510 error = lwkt_domsg(so->so_port, &msg.nm_netmsg.nm_lmsg, PCATCH);
8765eadc
SZ
511 if (error == EINTR || error == ERESTART)
512 interrupted = 1;
984263bc
MD
513 }
514 if (error == 0) {
515 error = so->so_error;
516 so->so_error = 0;
517 }
984263bc 518bad:
8765eadc
SZ
519 if (!interrupted)
520 so->so_state &= ~SS_ISCONNECTING;
984263bc
MD
521 if (error == ERESTART)
522 error = EINTR;
523done:
9f87144f 524 fdrop(fp);
984263bc
MD
525 return (error);
526}
527
41c20dac 528/*
d83b97b9 529 * connect_args(int s, caddr_t name, int namelen)
3919ced0
MD
530 *
531 * MPALMOSTSAFE
d83b97b9
MD
532 */
533int
753fd850 534sys_connect(struct connect_args *uap)
d83b97b9
MD
535{
536 struct sockaddr *sa;
537 int error;
538
539 error = getsockaddr(&sa, uap->name, uap->namelen);
540 if (error)
541 return (error);
3919ced0 542 get_mplock();
358e1f78 543 error = kern_connect(uap->s, 0, sa);
3919ced0 544 rel_mplock();
358e1f78
MD
545 FREE(sa, M_SONAME);
546
547 return (error);
548}
549
550/*
551 * connect_args(int s, int fflags, caddr_t name, int namelen)
3919ced0
MD
552 *
553 * MPALMOSTSAFE
358e1f78
MD
554 */
555int
b09fd398 556sys_extconnect(struct extconnect_args *uap)
358e1f78
MD
557{
558 struct sockaddr *sa;
559 int error;
560 int fflags = uap->flags & O_FMASK;
561
562 error = getsockaddr(&sa, uap->name, uap->namelen);
563 if (error)
564 return (error);
3919ced0 565 get_mplock();
358e1f78 566 error = kern_connect(uap->s, fflags, sa);
3919ced0 567 rel_mplock();
d83b97b9
MD
568 FREE(sa, M_SONAME);
569
570 return (error);
571}
572
984263bc 573int
5969a6f1 574kern_socketpair(int domain, int type, int protocol, int *sv)
984263bc 575{
dadab5e9 576 struct thread *td = curthread;
f3a2d8c4 577 struct filedesc *fdp;
984263bc
MD
578 struct file *fp1, *fp2;
579 struct socket *so1, *so2;
259b8ea0 580 int fd1, fd2, error;
984263bc 581
f3a2d8c4 582 fdp = td->td_proc->p_fd;
5969a6f1 583 error = socreate(domain, &so1, type, protocol, td);
984263bc
MD
584 if (error)
585 return (error);
5969a6f1 586 error = socreate(domain, &so2, type, protocol, td);
984263bc
MD
587 if (error)
588 goto free1;
f3a2d8c4 589 error = falloc(td->td_lwp, &fp1, &fd1);
984263bc
MD
590 if (error)
591 goto free2;
259b8ea0 592 sv[0] = fd1;
fbb4eeab 593 fp1->f_data = so1;
f3a2d8c4 594 error = falloc(td->td_lwp, &fp2, &fd2);
984263bc
MD
595 if (error)
596 goto free3;
fbb4eeab 597 fp2->f_data = so2;
259b8ea0 598 sv[1] = fd2;
984263bc
MD
599 error = soconnect2(so1, so2);
600 if (error)
601 goto free4;
5969a6f1 602 if (type == SOCK_DGRAM) {
984263bc
MD
603 /*
604 * Datagram socket connection is asymmetric.
605 */
606 error = soconnect2(so2, so1);
607 if (error)
608 goto free4;
609 }
fbb4eeab 610 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
984263bc
MD
611 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
612 fp1->f_ops = fp2->f_ops = &socketops;
f3a2d8c4
MD
613 fsetfd(fdp, fp1, fd1);
614 fsetfd(fdp, fp2, fd2);
9f87144f
MD
615 fdrop(fp1);
616 fdrop(fp2);
984263bc
MD
617 return (error);
618free4:
f3a2d8c4 619 fsetfd(fdp, NULL, fd2);
9f87144f 620 fdrop(fp2);
984263bc 621free3:
f3a2d8c4 622 fsetfd(fdp, NULL, fd1);
9f87144f 623 fdrop(fp1);
984263bc 624free2:
9ba76b73 625 (void)soclose(so2, 0);
984263bc 626free1:
9ba76b73 627 (void)soclose(so1, 0);
984263bc
MD
628 return (error);
629}
630
5969a6f1
DRJ
631/*
632 * socketpair(int domain, int type, int protocol, int *rsv)
3919ced0
MD
633 *
634 * MPALMOSTSAFE
5969a6f1
DRJ
635 */
636int
753fd850 637sys_socketpair(struct socketpair_args *uap)
5969a6f1
DRJ
638{
639 int error, sockv[2];
640
3919ced0 641 get_mplock();
5969a6f1 642 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv);
3919ced0 643 rel_mplock();
5969a6f1
DRJ
644
645 if (error == 0)
646 error = copyout(sockv, uap->rsv, sizeof(sockv));
647 return (error);
648}
649
35fbb1d9 650int
3e1837ce 651kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio,
e54488bb 652 struct mbuf *control, int flags, size_t *res)
984263bc 653{
dadab5e9 654 struct thread *td = curthread;
7278a846 655 struct lwp *lp = td->td_lwp;
dadab5e9 656 struct proc *p = td->td_proc;
984263bc 657 struct file *fp;
e54488bb
MD
658 size_t len;
659 int error;
984263bc
MD
660 struct socket *so;
661#ifdef KTRACE
662 struct iovec *ktriov = NULL;
663 struct uio ktruio;
664#endif
665
666 error = holdsock(p->p_fd, s, &fp);
667 if (error)
668 return (error);
984263bc 669#ifdef KTRACE
dadab5e9 670 if (KTRPOINT(td, KTR_GENIO)) {
3e1837ce 671 int iovlen = auio->uio_iovcnt * sizeof (struct iovec);
984263bc
MD
672
673 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
3e1837ce
DRJ
674 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen);
675 ktruio = *auio;
984263bc
MD
676 }
677#endif
3e1837ce 678 len = auio->uio_resid;
984263bc 679 so = (struct socket *)fp->f_data;
9ba76b73
MD
680 if ((flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) {
681 if (fp->f_flag & FNONBLOCK)
682 flags |= MSG_FNONBLOCKING;
683 }
6b6e0885 684 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td);
984263bc 685 if (error) {
3e1837ce 686 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
687 error == EINTR || error == EWOULDBLOCK))
688 error = 0;
689 if (error == EPIPE)
7278a846 690 lwpsignal(p, lp, SIGPIPE);
984263bc 691 }
984263bc
MD
692#ifdef KTRACE
693 if (ktriov != NULL) {
694 if (error == 0) {
695 ktruio.uio_iov = ktriov;
3e1837ce 696 ktruio.uio_resid = len - auio->uio_resid;
9fb04d14 697 ktrgenio(lp, s, UIO_WRITE, &ktruio, error);
984263bc
MD
698 }
699 FREE(ktriov, M_TEMP);
700 }
701#endif
35fbb1d9 702 if (error == 0)
3e1837ce 703 *res = len - auio->uio_resid;
9f87144f 704 fdrop(fp);
984263bc
MD
705 return (error);
706}
707
41c20dac
MD
708/*
709 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen)
3919ced0
MD
710 *
711 * MPALMOSTSAFE
41c20dac 712 */
984263bc 713int
753fd850 714sys_sendto(struct sendto_args *uap)
984263bc 715{
3e1837ce
DRJ
716 struct thread *td = curthread;
717 struct uio auio;
984263bc 718 struct iovec aiov;
35fbb1d9
DRJ
719 struct sockaddr *sa = NULL;
720 int error;
984263bc 721
35fbb1d9
DRJ
722 if (uap->to) {
723 error = getsockaddr(&sa, uap->to, uap->tolen);
724 if (error)
725 return (error);
35fbb1d9 726 }
984263bc
MD
727 aiov.iov_base = uap->buf;
728 aiov.iov_len = uap->len;
3e1837ce
DRJ
729 auio.uio_iov = &aiov;
730 auio.uio_iovcnt = 1;
731 auio.uio_offset = 0;
732 auio.uio_resid = uap->len;
733 auio.uio_segflg = UIO_USERSPACE;
734 auio.uio_rw = UIO_WRITE;
735 auio.uio_td = td;
984263bc 736
3919ced0 737 get_mplock();
3e1837ce 738 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags,
e54488bb 739 &uap->sysmsg_szresult);
3919ced0 740 rel_mplock();
984263bc 741
35fbb1d9
DRJ
742 if (sa)
743 FREE(sa, M_SONAME);
744 return (error);
984263bc
MD
745}
746
41c20dac 747/*
35fbb1d9 748 * sendmsg_args(int s, caddr_t msg, int flags)
3919ced0
MD
749 *
750 * MPALMOSTSAFE
41c20dac 751 */
984263bc 752int
753fd850 753sys_sendmsg(struct sendmsg_args *uap)
984263bc 754{
3e1837ce 755 struct thread *td = curthread;
984263bc 756 struct msghdr msg;
3e1837ce 757 struct uio auio;
75a872f8 758 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
35fbb1d9
DRJ
759 struct sockaddr *sa = NULL;
760 struct mbuf *control = NULL;
75a872f8 761 int error;
984263bc 762
35fbb1d9 763 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg));
984263bc
MD
764 if (error)
765 return (error);
35fbb1d9
DRJ
766
767 /*
768 * Conditionally copyin msg.msg_name.
769 */
770 if (msg.msg_name) {
771 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen);
772 if (error)
773 return (error);
35fbb1d9
DRJ
774 }
775
776 /*
3e1837ce 777 * Populate auio.
35fbb1d9 778 */
75a872f8 779 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen,
ef5c76d7 780 &auio.uio_resid);
984263bc 781 if (error)
8130f673 782 goto cleanup2;
3e1837ce
DRJ
783 auio.uio_iov = iov;
784 auio.uio_iovcnt = msg.msg_iovlen;
785 auio.uio_offset = 0;
3e1837ce
DRJ
786 auio.uio_segflg = UIO_USERSPACE;
787 auio.uio_rw = UIO_WRITE;
788 auio.uio_td = td;
35fbb1d9
DRJ
789
790 /*
791 * Conditionally copyin msg.msg_control.
792 */
793 if (msg.msg_control) {
3e1837ce
DRJ
794 if (msg.msg_controllen < sizeof(struct cmsghdr) ||
795 msg.msg_controllen > MLEN) {
35fbb1d9
DRJ
796 error = EINVAL;
797 goto cleanup;
798 }
74f1caca 799 control = m_get(MB_WAIT, MT_CONTROL);
3e1837ce
DRJ
800 if (control == NULL) {
801 error = ENOBUFS;
35fbb1d9 802 goto cleanup;
3e1837ce
DRJ
803 }
804 control->m_len = msg.msg_controllen;
805 error = copyin(msg.msg_control, mtod(control, caddr_t),
3919ced0 806 msg.msg_controllen);
3e1837ce
DRJ
807 if (error) {
808 m_free(control);
809 goto cleanup;
810 }
35fbb1d9
DRJ
811 }
812
3919ced0 813 get_mplock();
3e1837ce 814 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags,
e54488bb 815 &uap->sysmsg_szresult);
3919ced0 816 rel_mplock();
35fbb1d9
DRJ
817
818cleanup:
8130f673
MD
819 iovec_free(&iov, aiov);
820cleanup2:
35fbb1d9
DRJ
821 if (sa)
822 FREE(sa, M_SONAME);
984263bc
MD
823 return (error);
824}
984263bc 825
41c20dac 826/*
3e1837ce
DRJ
827 * kern_recvmsg() takes a handle to sa and control. If the handle is non-
828 * null, it returns a dynamically allocated struct sockaddr and an mbuf.
829 * Don't forget to FREE() and m_free() these if they are returned.
41c20dac 830 */
984263bc 831int
3e1837ce 832kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio,
e54488bb 833 struct mbuf **control, int *flags, size_t *res)
984263bc 834{
dadab5e9
MD
835 struct thread *td = curthread;
836 struct proc *p = td->td_proc;
984263bc 837 struct file *fp;
e54488bb
MD
838 size_t len;
839 int error;
9ba76b73 840 int lflags;
984263bc 841 struct socket *so;
984263bc
MD
842#ifdef KTRACE
843 struct iovec *ktriov = NULL;
844 struct uio ktruio;
845#endif
846
847 error = holdsock(p->p_fd, s, &fp);
848 if (error)
849 return (error);
984263bc 850#ifdef KTRACE
dadab5e9 851 if (KTRPOINT(td, KTR_GENIO)) {
3e1837ce 852 int iovlen = auio->uio_iovcnt * sizeof (struct iovec);
984263bc
MD
853
854 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
3e1837ce
DRJ
855 bcopy(auio->uio_iov, ktriov, iovlen);
856 ktruio = *auio;
984263bc
MD
857 }
858#endif
3e1837ce 859 len = auio->uio_resid;
984263bc 860 so = (struct socket *)fp->f_data;
9ba76b73
MD
861
862 if (flags == NULL || (*flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) {
863 if (fp->f_flag & FNONBLOCK) {
864 if (flags) {
865 *flags |= MSG_FNONBLOCKING;
866 } else {
867 lflags = MSG_FNONBLOCKING;
868 flags = &lflags;
869 }
870 }
871 }
872
6b6e0885 873 error = so_pru_soreceive(so, sa, auio, NULL, control, flags);
984263bc 874 if (error) {
3e1837ce 875 if (auio->uio_resid != len && (error == ERESTART ||
984263bc
MD
876 error == EINTR || error == EWOULDBLOCK))
877 error = 0;
878 }
879#ifdef KTRACE
880 if (ktriov != NULL) {
881 if (error == 0) {
882 ktruio.uio_iov = ktriov;
3e1837ce 883 ktruio.uio_resid = len - auio->uio_resid;
9fb04d14 884 ktrgenio(td->td_lwp, s, UIO_READ, &ktruio, error);
984263bc
MD
885 }
886 FREE(ktriov, M_TEMP);
887 }
888#endif
35fbb1d9 889 if (error == 0)
3e1837ce 890 *res = len - auio->uio_resid;
9f87144f 891 fdrop(fp);
984263bc
MD
892 return (error);
893}
894
41c20dac
MD
895/*
896 * recvfrom_args(int s, caddr_t buf, size_t len, int flags,
897 * caddr_t from, int *fromlenaddr)
3919ced0
MD
898 *
899 * MPALMOSTSAFE
41c20dac 900 */
984263bc 901int
753fd850 902sys_recvfrom(struct recvfrom_args *uap)
984263bc 903{
3e1837ce
DRJ
904 struct thread *td = curthread;
905 struct uio auio;
984263bc 906 struct iovec aiov;
3e1837ce 907 struct sockaddr *sa = NULL;
35fbb1d9 908 int error, fromlen;
984263bc 909
3e1837ce 910 if (uap->from && uap->fromlenaddr) {
35fbb1d9 911 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen));
984263bc
MD
912 if (error)
913 return (error);
3e1837ce
DRJ
914 if (fromlen < 0)
915 return (EINVAL);
35fbb1d9
DRJ
916 } else {
917 fromlen = 0;
918 }
984263bc
MD
919 aiov.iov_base = uap->buf;
920 aiov.iov_len = uap->len;
3e1837ce
DRJ
921 auio.uio_iov = &aiov;
922 auio.uio_iovcnt = 1;
923 auio.uio_offset = 0;
924 auio.uio_resid = uap->len;
925 auio.uio_segflg = UIO_USERSPACE;
926 auio.uio_rw = UIO_READ;
927 auio.uio_td = td;
984263bc 928
3919ced0 929 get_mplock();
3e1837ce 930 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL,
e54488bb 931 &uap->flags, &uap->sysmsg_szresult);
3919ced0 932 rel_mplock();
984263bc 933
3e1837ce 934 if (error == 0 && uap->from) {
c3996757
MD
935 /* note: sa may still be NULL */
936 if (sa) {
937 fromlen = MIN(fromlen, sa->sa_len);
938 error = copyout(sa, uap->from, fromlen);
939 } else {
940 fromlen = 0;
941 }
942 if (error == 0) {
35fbb1d9 943 error = copyout(&fromlen, uap->fromlenaddr,
c3996757
MD
944 sizeof(fromlen));
945 }
35fbb1d9 946 }
3e1837ce
DRJ
947 if (sa)
948 FREE(sa, M_SONAME);
984263bc 949
984263bc
MD
950 return (error);
951}
984263bc 952
41c20dac
MD
953/*
954 * recvmsg_args(int s, struct msghdr *msg, int flags)
3919ced0
MD
955 *
956 * MPALMOSTSAFE
41c20dac 957 */
984263bc 958int
753fd850 959sys_recvmsg(struct recvmsg_args *uap)
984263bc 960{
3e1837ce 961 struct thread *td = curthread;
984263bc 962 struct msghdr msg;
3e1837ce 963 struct uio auio;
75a872f8
DRJ
964 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
965 struct mbuf *m, *control = NULL;
3e1837ce 966 struct sockaddr *sa = NULL;
35fbb1d9 967 caddr_t ctlbuf;
3e1837ce 968 socklen_t *ufromlenp, *ucontrollenp;
75a872f8 969 int error, fromlen, controllen, len, flags, *uflagsp;
984263bc 970
35fbb1d9
DRJ
971 /*
972 * This copyin handles everything except the iovec.
973 */
974 error = copyin(uap->msg, &msg, sizeof(msg));
984263bc
MD
975 if (error)
976 return (error);
35fbb1d9 977
3e1837ce
DRJ
978 if (msg.msg_name && msg.msg_namelen < 0)
979 return (EINVAL);
980 if (msg.msg_control && msg.msg_controllen < 0)
981 return (EINVAL);
982
983 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 984 msg_namelen));
35fbb1d9 985 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 986 msg_controllen));
3e1837ce 987 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr,
3919ced0 988 msg_flags));
35fbb1d9
DRJ
989
990 /*
3e1837ce 991 * Populate auio.
35fbb1d9 992 */
75a872f8 993 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen,
ef5c76d7 994 &auio.uio_resid);
984263bc 995 if (error)
75a872f8 996 return (error);
3e1837ce
DRJ
997 auio.uio_iov = iov;
998 auio.uio_iovcnt = msg.msg_iovlen;
999 auio.uio_offset = 0;
3e1837ce
DRJ
1000 auio.uio_segflg = UIO_USERSPACE;
1001 auio.uio_rw = UIO_READ;
1002 auio.uio_td = td;
35fbb1d9 1003
b7ccd728 1004 flags = uap->flags;
35fbb1d9 1005
3919ced0 1006 get_mplock();
e54488bb
MD
1007 error = kern_recvmsg(uap->s,
1008 (msg.msg_name ? &sa : NULL), &auio,
1009 (msg.msg_control ? &control : NULL), &flags,
1010 &uap->sysmsg_szresult);
3919ced0 1011 rel_mplock();
35fbb1d9
DRJ
1012
1013 /*
3e1837ce 1014 * Conditionally copyout the name and populate the namelen field.
35fbb1d9 1015 */
3e1837ce 1016 if (error == 0 && msg.msg_name) {
b9cd15b9
YT
1017 /* note: sa may still be NULL */
1018 if (sa != NULL) {
1019 fromlen = MIN(msg.msg_namelen, sa->sa_len);
1020 error = copyout(sa, msg.msg_name, fromlen);
b4354d10 1021 } else {
b9cd15b9 1022 fromlen = 0;
b4354d10 1023 }
35fbb1d9 1024 if (error == 0)
3e1837ce
DRJ
1025 error = copyout(&fromlen, ufromlenp,
1026 sizeof(*ufromlenp));
984263bc 1027 }
35fbb1d9
DRJ
1028
1029 /*
1030 * Copyout msg.msg_control and msg.msg_controllen.
1031 */
3e1837ce 1032 if (error == 0 && msg.msg_control) {
35fbb1d9 1033 len = msg.msg_controllen;
3e1837ce
DRJ
1034 m = control;
1035 ctlbuf = (caddr_t)msg.msg_control;
35fbb1d9
DRJ
1036
1037 while(m && len > 0) {
1038 unsigned int tocopy;
1039
1040 if (len >= m->m_len) {
1041 tocopy = m->m_len;
1042 } else {
1043 msg.msg_flags |= MSG_CTRUNC;
1044 tocopy = len;
1045 }
1046
1047 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy);
1048 if (error)
1049 goto cleanup;
1050
1051 ctlbuf += tocopy;
1052 len -= tocopy;
1053 m = m->m_next;
1054 }
3e1837ce
DRJ
1055 controllen = ctlbuf - (caddr_t)msg.msg_control;
1056 error = copyout(&controllen, ucontrollenp,
35fbb1d9
DRJ
1057 sizeof(*ucontrollenp));
1058 }
1059
3e1837ce
DRJ
1060 if (error == 0)
1061 error = copyout(&flags, uflagsp, sizeof(*uflagsp));
1062
35fbb1d9 1063cleanup:
3e1837ce
DRJ
1064 if (sa)
1065 FREE(sa, M_SONAME);
75a872f8 1066 iovec_free(&iov, aiov);
3e1837ce
DRJ
1067 if (control)
1068 m_freem(control);
984263bc
MD
1069 return (error);
1070}
1071
41c20dac 1072/*
201305ad
DRJ
1073 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an
1074 * in kernel pointer instead of a userland pointer. This allows us
1075 * to manipulate socket options in the emulation code.
41c20dac 1076 */
984263bc 1077int
201305ad 1078kern_setsockopt(int s, struct sockopt *sopt)
984263bc 1079{
dadab5e9
MD
1080 struct thread *td = curthread;
1081 struct proc *p = td->td_proc;
984263bc 1082 struct file *fp;
984263bc
MD
1083 int error;
1084
b4354d10 1085 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0)
984263bc 1086 return (EFAULT);
b8237e23
AH
1087 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0)
1088 return (EINVAL);
201305ad 1089 if (sopt->sopt_valsize < 0)
984263bc
MD
1090 return (EINVAL);
1091
201305ad 1092 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1093 if (error)
1094 return (error);
1095
201305ad 1096 error = sosetopt((struct socket *)fp->f_data, sopt);
9f87144f 1097 fdrop(fp);
201305ad
DRJ
1098 return (error);
1099}
1100
1101/*
1102 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize)
3919ced0
MD
1103 *
1104 * MPALMOSTSAFE
201305ad
DRJ
1105 */
1106int
753fd850 1107sys_setsockopt(struct setsockopt_args *uap)
201305ad
DRJ
1108{
1109 struct thread *td = curthread;
1110 struct sockopt sopt;
1111 int error;
1112
984263bc
MD
1113 sopt.sopt_level = uap->level;
1114 sopt.sopt_name = uap->name;
984263bc 1115 sopt.sopt_valsize = uap->valsize;
dadab5e9 1116 sopt.sopt_td = td;
aca22a94 1117 sopt.sopt_val = NULL;
201305ad 1118
b4354d10
MD
1119 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE)
1120 return (EINVAL);
792239df
AE
1121 if (uap->val) {
1122 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK);
1123 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize);
1124 if (error)
1125 goto out;
792239df 1126 }
aca22a94 1127
3919ced0 1128 get_mplock();
201305ad 1129 error = kern_setsockopt(uap->s, &sopt);
3919ced0 1130 rel_mplock();
de0003fe 1131out:
792239df
AE
1132 if (uap->val)
1133 kfree(sopt.sopt_val, M_TEMP);
984263bc
MD
1134 return(error);
1135}
1136
41c20dac 1137/*
201305ad
DRJ
1138 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an
1139 * in kernel pointer instead of a userland pointer. This allows us
1140 * to manipulate socket options in the emulation code.
41c20dac 1141 */
984263bc 1142int
201305ad 1143kern_getsockopt(int s, struct sockopt *sopt)
984263bc 1144{
dadab5e9
MD
1145 struct thread *td = curthread;
1146 struct proc *p = td->td_proc;
201305ad
DRJ
1147 struct file *fp;
1148 int error;
984263bc 1149
b4354d10 1150 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0)
201305ad 1151 return (EFAULT);
b8237e23
AH
1152 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0)
1153 return (EINVAL);
b4354d10 1154 if (sopt->sopt_valsize < 0 || sopt->sopt_valsize > SOMAXOPT_SIZE)
201305ad
DRJ
1155 return (EINVAL);
1156
1157 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1158 if (error)
1159 return (error);
201305ad
DRJ
1160
1161 error = sogetopt((struct socket *)fp->f_data, sopt);
9f87144f 1162 fdrop(fp);
201305ad
DRJ
1163 return (error);
1164}
1165
1166/*
3919ced0
MD
1167 * getsockopt_args(int s, int level, int name, caddr_t val, int *avalsize)
1168 *
1169 * MPALMOSTSAFE
201305ad
DRJ
1170 */
1171int
753fd850 1172sys_getsockopt(struct getsockopt_args *uap)
201305ad
DRJ
1173{
1174 struct thread *td = curthread;
1175 struct sockopt sopt;
1176 int error, valsize;
1177
984263bc 1178 if (uap->val) {
201305ad
DRJ
1179 error = copyin(uap->avalsize, &valsize, sizeof(valsize));
1180 if (error)
984263bc 1181 return (error);
984263bc
MD
1182 } else {
1183 valsize = 0;
1184 }
1185
984263bc
MD
1186 sopt.sopt_level = uap->level;
1187 sopt.sopt_name = uap->name;
201305ad 1188 sopt.sopt_valsize = valsize;
dadab5e9 1189 sopt.sopt_td = td;
aca22a94 1190 sopt.sopt_val = NULL;
984263bc 1191
aca22a94 1192 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE)
b4354d10 1193 return (EINVAL);
792239df
AE
1194 if (uap->val) {
1195 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK);
1196 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize);
1197 if (error)
1198 goto out;
792239df 1199 }
aca22a94 1200
3919ced0 1201 get_mplock();
201305ad 1202 error = kern_getsockopt(uap->s, &sopt);
3919ced0 1203 rel_mplock();
de0003fe
AE
1204 if (error)
1205 goto out;
1206 valsize = sopt.sopt_valsize;
1207 error = copyout(&valsize, uap->avalsize, sizeof(valsize));
1208 if (error)
1209 goto out;
792239df
AE
1210 if (uap->val)
1211 error = copyout(sopt.sopt_val, uap->val, sopt.sopt_valsize);
de0003fe 1212out:
792239df
AE
1213 if (uap->val)
1214 kfree(sopt.sopt_val, M_TEMP);
984263bc
MD
1215 return (error);
1216}
1217
1218/*
5969a6f1
DRJ
1219 * The second argument to kern_getsockname() is a handle to a struct sockaddr.
1220 * This allows kern_getsockname() to return a pointer to an allocated struct
1221 * sockaddr which must be freed later with FREE(). The caller must
1222 * initialize *name to NULL.
984263bc 1223 */
5969a6f1
DRJ
1224int
1225kern_getsockname(int s, struct sockaddr **name, int *namelen)
984263bc 1226{
dadab5e9
MD
1227 struct thread *td = curthread;
1228 struct proc *p = td->td_proc;
984263bc 1229 struct file *fp;
dadab5e9 1230 struct socket *so;
5969a6f1
DRJ
1231 struct sockaddr *sa = NULL;
1232 int error;
984263bc 1233
5969a6f1 1234 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1235 if (error)
1236 return (error);
5969a6f1 1237 if (*namelen < 0) {
9f87144f 1238 fdrop(fp);
984263bc
MD
1239 return (EINVAL);
1240 }
1241 so = (struct socket *)fp->f_data;
6b6e0885 1242 error = so_pru_sockaddr(so, &sa);
5969a6f1 1243 if (error == 0) {
b4354d10 1244 if (sa == NULL) {
5969a6f1
DRJ
1245 *namelen = 0;
1246 } else {
1247 *namelen = MIN(*namelen, sa->sa_len);
1248 *name = sa;
1249 }
984263bc
MD
1250 }
1251
9f87144f 1252 fdrop(fp);
984263bc
MD
1253 return (error);
1254}
1255
5969a6f1
DRJ
1256/*
1257 * getsockname_args(int fdes, caddr_t asa, int *alen)
1258 *
1259 * Get socket name.
3919ced0
MD
1260 *
1261 * MPALMOSTSAFE
5969a6f1 1262 */
984263bc 1263int
753fd850 1264sys_getsockname(struct getsockname_args *uap)
984263bc 1265{
5969a6f1
DRJ
1266 struct sockaddr *sa = NULL;
1267 int error, sa_len;
1268
1269 error = copyin(uap->alen, &sa_len, sizeof(sa_len));
1270 if (error)
1271 return (error);
1272
3919ced0 1273 get_mplock();
5969a6f1 1274 error = kern_getsockname(uap->fdes, &sa, &sa_len);
3919ced0 1275 rel_mplock();
984263bc 1276
5969a6f1
DRJ
1277 if (error == 0)
1278 error = copyout(sa, uap->asa, sa_len);
1279 if (error == 0)
1280 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen));
1281 if (sa)
1282 FREE(sa, M_SONAME);
1283 return (error);
984263bc
MD
1284}
1285
984263bc 1286/*
5969a6f1
DRJ
1287 * The second argument to kern_getpeername() is a handle to a struct sockaddr.
1288 * This allows kern_getpeername() to return a pointer to an allocated struct
1289 * sockaddr which must be freed later with FREE(). The caller must
1290 * initialize *name to NULL.
984263bc 1291 */
5969a6f1
DRJ
1292int
1293kern_getpeername(int s, struct sockaddr **name, int *namelen)
984263bc 1294{
dadab5e9
MD
1295 struct thread *td = curthread;
1296 struct proc *p = td->td_proc;
984263bc 1297 struct file *fp;
dadab5e9 1298 struct socket *so;
5969a6f1
DRJ
1299 struct sockaddr *sa = NULL;
1300 int error;
984263bc 1301
5969a6f1 1302 error = holdsock(p->p_fd, s, &fp);
984263bc
MD
1303 if (error)
1304 return (error);
5969a6f1 1305 if (*namelen < 0) {
9f87144f 1306 fdrop(fp);
5969a6f1
DRJ
1307 return (EINVAL);
1308 }
984263bc
MD
1309 so = (struct socket *)fp->f_data;
1310 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
9f87144f 1311 fdrop(fp);
984263bc
MD
1312 return (ENOTCONN);
1313 }
6b6e0885 1314 error = so_pru_peeraddr(so, &sa);
5969a6f1 1315 if (error == 0) {
b4354d10 1316 if (sa == NULL) {
5969a6f1
DRJ
1317 *namelen = 0;
1318 } else {
1319 *namelen = MIN(*namelen, sa->sa_len);
1320 *name = sa;
1321 }
984263bc 1322 }
5969a6f1 1323
9f87144f 1324 fdrop(fp);
984263bc
MD
1325 return (error);
1326}
1327
5969a6f1
DRJ
1328/*
1329 * getpeername_args(int fdes, caddr_t asa, int *alen)
1330 *
1331 * Get name of peer for connected socket.
3919ced0
MD
1332 *
1333 * MPALMOSTSAFE
5969a6f1 1334 */
984263bc 1335int
753fd850 1336sys_getpeername(struct getpeername_args *uap)
984263bc 1337{
5969a6f1
DRJ
1338 struct sockaddr *sa = NULL;
1339 int error, sa_len;
1340
1341 error = copyin(uap->alen, &sa_len, sizeof(sa_len));
1342 if (error)
1343 return (error);
1344
3919ced0 1345 get_mplock();
5969a6f1 1346 error = kern_getpeername(uap->fdes, &sa, &sa_len);
3919ced0 1347 rel_mplock();
5969a6f1
DRJ
1348
1349 if (error == 0)
1350 error = copyout(sa, uap->asa, sa_len);
1351 if (error == 0)
1352 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen));
1353 if (sa)
1354 FREE(sa, M_SONAME);
1355 return (error);
984263bc
MD
1356}
1357
984263bc 1358int
02844a31 1359getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
984263bc
MD
1360{
1361 struct sockaddr *sa;
1362 int error;
1363
02844a31 1364 *namp = NULL;
984263bc
MD
1365 if (len > SOCK_MAXADDRLEN)
1366 return ENAMETOOLONG;
02844a31
MD
1367 if (len < offsetof(struct sockaddr, sa_data[0]))
1368 return EDOM;
984263bc
MD
1369 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1370 error = copyin(uaddr, sa, len);
1371 if (error) {
1372 FREE(sa, M_SONAME);
1373 } else {
75a872f8
DRJ
1374#if BYTE_ORDER != BIG_ENDIAN
1375 /*
1376 * The bind(), connect(), and sendto() syscalls were not
1377 * versioned for COMPAT_43. Thus, this check must stay.
1378 */
984263bc
MD
1379 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1380 sa->sa_family = sa->sa_len;
1381#endif
1382 sa->sa_len = len;
1383 *namp = sa;
1384 }
1385 return error;
1386}
1387
1388/*
b4caac98
MD
1389 * Detach a mapped page and release resources back to the system.
1390 * We must release our wiring and if the object is ripped out
1391 * from under the vm_page we become responsible for freeing the
df8d1020 1392 * page. These routines must be MPSAFE.
b4caac98
MD
1393 *
1394 * XXX HACK XXX TEMPORARY UNTIL WE IMPLEMENT EXT MBUF REFERENCE COUNTING
df8d1020
MD
1395 *
1396 * XXX vm_page_*() routines are not MPSAFE yet, the MP lock is required.
b4caac98
MD
1397 */
1398static void
013a4c0e 1399sf_buf_mfree(void *arg)
b4caac98 1400{
5c5185ae 1401 struct sf_buf *sf = arg;
b4caac98 1402 vm_page_t m;
e66bab2b 1403
5c5185ae
SG
1404 /*
1405 * XXX vm_page_*() and SFBUF routines not MPSAFE yet.
1406 */
1407 get_mplock();
1408 crit_enter();
1409 m = sf_buf_page(sf);
1410 if (sf_buf_free(sf) == 0) {
321e057f
SZ
1411 vm_page_unwire(m, 0);
1412 if (m->wire_count == 0 && m->object == NULL)
1413 vm_page_try_to_free(m);
b4caac98 1414 }
5c5185ae
SG
1415 crit_exit();
1416 rel_mplock();
b4caac98
MD
1417}
1418
1419/*
984263bc
MD
1420 * sendfile(2).
1421 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1422 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1423 *
1424 * Send a file specified by 'fd' and starting at 'offset' to a socket
1425 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1426 * nbytes == 0. Optionally add a header and/or trailer to the socket
1427 * output. If specified, write the total number of bytes sent into *sbytes.
75a872f8
DRJ
1428 *
1429 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused
1430 * the headers to count against the remaining bytes to be sent from
1431 * the file descriptor. We may wish to implement a compatibility syscall
1432 * in the future.
3919ced0
MD
1433 *
1434 * MPALMOSTSAFE
984263bc
MD
1435 */
1436int
753fd850 1437sys_sendfile(struct sendfile_args *uap)
984263bc 1438{
dadab5e9
MD
1439 struct thread *td = curthread;
1440 struct proc *p = td->td_proc;
984263bc 1441 struct file *fp;
75a872f8 1442 struct vnode *vp = NULL;
984263bc 1443 struct sf_hdtr hdtr;
75a872f8
DRJ
1444 struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
1445 struct uio auio;
30eeba44 1446 struct mbuf *mheader = NULL;
e54488bb
MD
1447 size_t hbytes = 0;
1448 size_t tbytes;
1449 off_t hdtr_size = 0;
1450 off_t sbytes;
1451 int error;
984263bc 1452
dadab5e9 1453 KKASSERT(p);
dadab5e9 1454
984263bc
MD
1455 /*
1456 * Do argument checking. Must be a regular file in, stream
1457 * type and connected socket out, positive offset.
1458 */
fa541be6 1459 fp = holdfp(p->p_fd, uap->fd, FREAD);
984263bc 1460 if (fp == NULL) {
f0846490 1461 return (EBADF);
984263bc
MD
1462 }
1463 if (fp->f_type != DTYPE_VNODE) {
9f87144f 1464 fdrop(fp);
f0846490 1465 return (EINVAL);
984263bc 1466 }
3919ced0 1467 get_mplock();
984263bc
MD
1468 vp = (struct vnode *)fp->f_data;
1469 vref(vp);
9f87144f 1470 fdrop(fp);
75a872f8
DRJ
1471
1472 /*
1473 * If specified, get the pointer to the sf_hdtr struct for
1474 * any headers/trailers.
1475 */
1476 if (uap->hdtr) {
1477 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1478 if (error)
1479 goto done;
1480 /*
1481 * Send any headers.
1482 */
1483 if (hdtr.headers) {
1484 error = iovec_copyin(hdtr.headers, &iov, aiov,
ef5c76d7 1485 hdtr.hdr_cnt, &hbytes);
75a872f8
DRJ
1486 if (error)
1487 goto done;
1488 auio.uio_iov = iov;
1489 auio.uio_iovcnt = hdtr.hdr_cnt;
1490 auio.uio_offset = 0;
1491 auio.uio_segflg = UIO_USERSPACE;
1492 auio.uio_rw = UIO_WRITE;
1493 auio.uio_td = td;
30eeba44 1494 auio.uio_resid = hbytes;
75a872f8 1495
e12241e1 1496 mheader = m_uiomove(&auio);
75a872f8
DRJ
1497
1498 iovec_free(&iov, aiov);
30eeba44 1499 if (mheader == NULL)
75a872f8 1500 goto done;
75a872f8
DRJ
1501 }
1502 }
1503
30eeba44 1504 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, mheader,
3919ced0 1505 &sbytes, uap->flags);
75a872f8
DRJ
1506 if (error)
1507 goto done;
1508
1509 /*
1510 * Send trailers. Wimp out and use writev(2).
1511 */
1512 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1513 error = iovec_copyin(hdtr.trailers, &iov, aiov,
ef5c76d7 1514 hdtr.trl_cnt, &auio.uio_resid);
75a872f8
DRJ
1515 if (error)
1516 goto done;
1517 auio.uio_iov = iov;
1518 auio.uio_iovcnt = hdtr.trl_cnt;
1519 auio.uio_offset = 0;
1520 auio.uio_segflg = UIO_USERSPACE;
1521 auio.uio_rw = UIO_WRITE;
1522 auio.uio_td = td;
1523
30eeba44 1524 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &tbytes);
75a872f8
DRJ
1525
1526 iovec_free(&iov, aiov);
1527 if (error)
1528 goto done;
30eeba44 1529 hdtr_size += tbytes; /* trailer bytes successfully sent */
75a872f8
DRJ
1530 }
1531
1532done:
3919ced0
MD
1533 if (vp)
1534 vrele(vp);
1535 rel_mplock();
75a872f8
DRJ
1536 if (uap->sbytes != NULL) {
1537 sbytes += hdtr_size;
1538 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1539 }
75a872f8
DRJ
1540 return (error);
1541}
1542
1543int
06ecca5a 1544kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes,
30eeba44 1545 struct mbuf *mheader, off_t *sbytes, int flags)
75a872f8
DRJ
1546{
1547 struct thread *td = curthread;
1548 struct proc *p = td->td_proc;
1549 struct vm_object *obj;
1550 struct socket *so;
285332f0 1551 struct file *fp;
75a872f8
DRJ
1552 struct mbuf *m;
1553 struct sf_buf *sf;
1554 struct vm_page *pg;
1555 off_t off, xfsize;
39b3370f 1556 off_t hbytes = 0;
75a872f8
DRJ
1557 int error = 0;
1558
7540ab49
MD
1559 if (vp->v_type != VREG) {
1560 error = EINVAL;
1561 goto done0;
1562 }
1563 if ((obj = vp->v_object) == NULL) {
984263bc 1564 error = EINVAL;
285332f0 1565 goto done0;
984263bc 1566 }
06ecca5a 1567 error = holdsock(p->p_fd, sfd, &fp);
984263bc 1568 if (error)
285332f0 1569 goto done0;
984263bc
MD
1570 so = (struct socket *)fp->f_data;
1571 if (so->so_type != SOCK_STREAM) {
1572 error = EINVAL;
1573 goto done;
1574 }
1575 if ((so->so_state & SS_ISCONNECTED) == 0) {
1576 error = ENOTCONN;
1577 goto done;
1578 }
75a872f8 1579 if (offset < 0) {
984263bc
MD
1580 error = EINVAL;
1581 goto done;
1582 }
1583
75a872f8 1584 *sbytes = 0;
984263bc
MD
1585 /*
1586 * Protect against multiple writers to the socket.
1587 */
6d49aa6f 1588 ssb_lock(&so->so_snd, M_WAITOK);
984263bc
MD
1589
1590 /*
1591 * Loop through the pages in the file, starting with the requested
1592 * offset. Get a file page (do I/O if necessary), map the file page
1593 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1594 * it on the socket.
1595 */
39b3370f 1596 for (off = offset; ; off += xfsize, *sbytes += xfsize + hbytes) {
984263bc
MD
1597 vm_pindex_t pindex;
1598 vm_offset_t pgoff;
1599
1600 pindex = OFF_TO_IDX(off);
1601retry_lookup:
1602 /*
1603 * Calculate the amount to transfer. Not to exceed a page,
1604 * the EOF, or the passed in nbytes.
1605 */
57f7b636 1606 xfsize = vp->v_filesize - off;
984263bc
MD
1607 if (xfsize > PAGE_SIZE)
1608 xfsize = PAGE_SIZE;
1609 pgoff = (vm_offset_t)(off & PAGE_MASK);
1610 if (PAGE_SIZE - pgoff < xfsize)
1611 xfsize = PAGE_SIZE - pgoff;
75a872f8
DRJ
1612 if (nbytes && xfsize > (nbytes - *sbytes))
1613 xfsize = nbytes - *sbytes;
984263bc
MD
1614 if (xfsize <= 0)
1615 break;
1616 /*
1617 * Optimize the non-blocking case by looking at the socket space
1618 * before going to the extra work of constituting the sf_buf.
1619 */
6d49aa6f 1620 if ((fp->f_flag & FNONBLOCK) && ssb_space(&so->so_snd) <= 0) {
984263bc
MD
1621 if (so->so_state & SS_CANTSENDMORE)
1622 error = EPIPE;
1623 else
1624 error = EAGAIN;
6d49aa6f 1625 ssb_unlock(&so->so_snd);
984263bc
MD
1626 goto done;
1627 }
1628 /*
1629 * Attempt to look up the page.
1630 *
06ecca5a 1631 * Allocate if not found, wait and loop if busy, then
5fd012e0
MD
1632 * wire the page. critical section protection is
1633 * required to maintain the object association (an
1634 * interrupt can free the page) through to the
1635 * vm_page_wire() call.
984263bc 1636 */
5fd012e0 1637 crit_enter();
984263bc 1638 pg = vm_page_lookup(obj, pindex);
984263bc
MD
1639 if (pg == NULL) {
1640 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1641 if (pg == NULL) {
4ecf7cc9 1642 vm_wait(0);
5fd012e0 1643 crit_exit();
984263bc
MD
1644 goto retry_lookup;
1645 }
1646 vm_page_wakeup(pg);
1647 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
5fd012e0 1648 crit_exit();
984263bc
MD
1649 goto retry_lookup;
1650 }
984263bc 1651 vm_page_wire(pg);
5fd012e0 1652 crit_exit();
984263bc
MD
1653
1654 /*
1655 * If page is not valid for what we need, initiate I/O
1656 */
1657
1658 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1659 struct uio auio;
1660 struct iovec aiov;
1661 int bsize;
1662
1663 /*
1664 * Ensure that our page is still around when the I/O
1665 * completes.
1666 */
1667 vm_page_io_start(pg);
1668
1669 /*
1670 * Get the page from backing store.
1671 */
1672 bsize = vp->v_mount->mnt_stat.f_iosize;
1673 auio.uio_iov = &aiov;
1674 auio.uio_iovcnt = 1;
1675 aiov.iov_base = 0;
1676 aiov.iov_len = MAXBSIZE;
1677 auio.uio_resid = MAXBSIZE;
1678 auio.uio_offset = trunc_page(off);
1679 auio.uio_segflg = UIO_NOCOPY;
1680 auio.uio_rw = UIO_READ;
dadab5e9 1681 auio.uio_td = td;
ab6f251b 1682 vn_lock(vp, LK_SHARED | LK_RETRY);
dadab5e9
MD
1683 error = VOP_READ(vp, &auio,
1684 IO_VMIO | ((MAXBSIZE / bsize) << 16),
9910d07b 1685 td->td_ucred);
a11aaa81 1686 vn_unlock(vp);
984263bc
MD
1687 vm_page_flag_clear(pg, PG_ZERO);
1688 vm_page_io_finish(pg);
1689 if (error) {
f2555cdd 1690 crit_enter();
984263bc 1691 vm_page_unwire(pg, 0);
f2555cdd
MD
1692 vm_page_try_to_free(pg);
1693 crit_exit();
6d49aa6f 1694 ssb_unlock(&so->so_snd);
984263bc
MD
1695 goto done;
1696 }
1697 }
1698
1699
1700 /*
1701 * Get a sendfile buf. We usually wait as long as necessary,
1702 * but this wait can be interrupted.
1703 */
5c5185ae 1704 if ((sf = sf_buf_alloc(pg)) == NULL) {
5fd012e0 1705 crit_enter();
984263bc 1706 vm_page_unwire(pg, 0);
f2555cdd 1707 vm_page_try_to_free(pg);
5fd012e0 1708 crit_exit();
6d49aa6f 1709 ssb_unlock(&so->so_snd);
984263bc
MD
1710 error = EINTR;
1711 goto done;
1712 }
1713
984263bc
MD
1714 /*
1715 * Get an mbuf header and set it up as having external storage.
1716 */
74f1caca 1717 MGETHDR(m, MB_WAIT, MT_DATA);
984263bc
MD
1718 if (m == NULL) {
1719 error = ENOBUFS;
b4caac98 1720 sf_buf_free(sf);
6d49aa6f 1721 ssb_unlock(&so->so_snd);
984263bc
MD
1722 goto done;
1723 }
e66bab2b 1724
b542cd49 1725 m->m_ext.ext_free = sf_buf_mfree;
5c5185ae
SG
1726 m->m_ext.ext_ref = sf_buf_ref;
1727 m->m_ext.ext_arg = sf;
1728 m->m_ext.ext_buf = (void *)sf_buf_kva(sf);
984263bc 1729 m->m_ext.ext_size = PAGE_SIZE;
5c5185ae 1730 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
013a4c0e 1731 m->m_flags |= M_EXT;
984263bc 1732 m->m_pkthdr.len = m->m_len = xfsize;
b542cd49 1733 KKASSERT((m->m_flags & (M_EXT_CLUSTER)) == 0);
30eeba44 1734
39b3370f
JH
1735 if (mheader != NULL) {
1736 hbytes = mheader->m_pkthdr.len;
30eeba44
JH
1737 mheader->m_pkthdr.len += m->m_pkthdr.len;
1738 m_cat(mheader, m);
1739 m = mheader;
1740 mheader = NULL;
39b3370f
JH
1741 } else
1742 hbytes = 0;
30eeba44 1743
984263bc
MD
1744 /*
1745 * Add the buffer to the socket buffer chain.
1746 */
5fd012e0 1747 crit_enter();
984263bc
MD
1748retry_space:
1749 /*
1750 * Make sure that the socket is still able to take more data.
1751 * CANTSENDMORE being true usually means that the connection
1752 * was closed. so_error is true when an error was sensed after
1753 * a previous send.
1754 * The state is checked after the page mapping and buffer
1755 * allocation above since those operations may block and make
1756 * any socket checks stale. From this point forward, nothing
1757 * blocks before the pru_send (or more accurately, any blocking
1758 * results in a loop back to here to re-check).
1759 */
1760 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1761 if (so->so_state & SS_CANTSENDMORE) {
1762 error = EPIPE;
1763 } else {
1764 error = so->so_error;
1765 so->so_error = 0;
1766 }
1767 m_freem(m);
6d49aa6f 1768 ssb_unlock(&so->so_snd);
5fd012e0 1769 crit_exit();
984263bc
MD
1770 goto done;
1771 }
1772 /*
1773 * Wait for socket space to become available. We do this just
1774 * after checking the connection state above in order to avoid
6d49aa6f 1775 * a race condition with ssb_wait().
984263bc 1776 */
6d49aa6f 1777 if (ssb_space(&so->so_snd) < so->so_snd.ssb_lowat) {
9ba76b73 1778 if (fp->f_flag & FNONBLOCK) {
984263bc 1779 m_freem(m);
6d49aa6f 1780 ssb_unlock(&so->so_snd);
5fd012e0 1781 crit_exit();
984263bc
MD
1782 error = EAGAIN;
1783 goto done;
1784 }
6d49aa6f 1785 error = ssb_wait(&so->so_snd);
984263bc 1786 /*
6d49aa6f 1787 * An error from ssb_wait usually indicates that we've
984263bc
MD
1788 * been interrupted by a signal. If we've sent anything
1789 * then return bytes sent, otherwise return the error.
1790 */
1791 if (error) {
1792 m_freem(m);
6d49aa6f 1793 ssb_unlock(&so->so_snd);
5fd012e0 1794 crit_exit();
984263bc
MD
1795 goto done;
1796 }
1797 goto retry_space;
1798 }
3c6b2883 1799 error = so_pru_send(so, 0, m, NULL, NULL, td);
5fd012e0 1800 crit_exit();
984263bc 1801 if (error) {
6d49aa6f 1802 ssb_unlock(&so->so_snd);
984263bc
MD
1803 goto done;
1804 }
1805 }
d785f69d 1806 if (mheader != NULL) {
39b3370f 1807 *sbytes += mheader->m_pkthdr.len;
d785f69d
JH
1808 error = so_pru_send(so, 0, mheader, NULL, NULL, td);
1809 mheader = NULL;
1810 }
6d49aa6f 1811 ssb_unlock(&so->so_snd);
984263bc 1812
984263bc 1813done:
9f87144f 1814 fdrop(fp);
285332f0 1815done0:
30eeba44
JH
1816 if (mheader != NULL)
1817 m_freem(mheader);
984263bc
MD
1818 return (error);
1819}
78812139 1820
3919ced0
MD
1821/*
1822 * MPALMOSTSAFE
1823 */
78812139 1824int
753fd850 1825sys_sctp_peeloff(struct sctp_peeloff_args *uap)
78812139
EN
1826{
1827#ifdef SCTP
1828 struct thread *td = curthread;
f3a2d8c4 1829 struct filedesc *fdp = td->td_proc->p_fd;
78812139
EN
1830 struct file *lfp = NULL;
1831 struct file *nfp = NULL;
1832 int error;
1833 struct socket *head, *so;
1834 caddr_t assoc_id;
1835 int fd;
1836 short fflag; /* type must match fp->f_flag */
1837
1838 assoc_id = uap->name;
ef2b8c7d 1839 error = holdsock(td->td_proc->p_fd, uap->sd, &lfp);
3919ced0 1840 if (error)
78812139 1841 return (error);
3919ced0
MD
1842
1843 get_mplock();
78812139
EN
1844 crit_enter();
1845 head = (struct socket *)lfp->f_data;
1846 error = sctp_can_peel_off(head, assoc_id);
1847 if (error) {
1848 crit_exit();
1849 goto done;
1850 }
1851 /*
1852 * At this point we know we do have a assoc to pull
1853 * we proceed to get the fd setup. This may block
1854 * but that is ok.
1855 */
1856
1857 fflag = lfp->f_flag;
f3a2d8c4 1858 error = falloc(td->td_lwp, &nfp, &fd);
78812139
EN
1859 if (error) {
1860 /*
1861 * Probably ran out of file descriptors. Put the
1862 * unaccepted connection back onto the queue and
1863 * do another wakeup so some other process might
1864 * have a chance at it.
1865 */
1866 crit_exit();
1867 goto done;
1868 }
e54488bb 1869 uap->sysmsg_iresult = fd;
78812139
EN
1870
1871 so = sctp_get_peeloff(head, assoc_id, &error);
1872 if (so == NULL) {
1873 /*
1874 * Either someone else peeled it off OR
1875 * we can't get a socket.
1876 */
1877 goto noconnection;
1878 }
1879 so->so_state &= ~SS_COMP;
1880 so->so_state &= ~SS_NOFDREF;
1881 so->so_head = NULL;
1882 if (head->so_sigio != NULL)
1883 fsetown(fgetown(head->so_sigio), &so->so_sigio);
1884
fbb4eeab 1885 nfp->f_type = DTYPE_SOCKET;
78812139
EN
1886 nfp->f_flag = fflag;
1887 nfp->f_ops = &socketops;
fbb4eeab 1888 nfp->f_data = so;
78812139
EN
1889
1890noconnection:
1891 /*
259b8ea0
MD
1892 * Assign the file pointer to the reserved descriptor, or clear
1893 * the reserved descriptor if an error occured.
78812139 1894 */
fa541be6 1895 if (error)
f3a2d8c4 1896 fsetfd(fdp, NULL, fd);
259b8ea0 1897 else
f3a2d8c4 1898 fsetfd(fdp, nfp, fd);
78812139
EN
1899 crit_exit();
1900 /*
1901 * Release explicitly held references before returning.
1902 */
1903done:
3919ced0 1904 rel_mplock();
78812139 1905 if (nfp != NULL)
9f87144f
MD
1906 fdrop(nfp);
1907 fdrop(lfp);
78812139
EN
1908 return (error);
1909#else /* SCTP */
1910 return(EOPNOTSUPP);
1911#endif /* SCTP */
1912}