Register keyword removal
[dragonfly.git] / sys / kern / uipc_socket.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
34 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.22 2002/12/15 09:24:23 maxim Exp $
377d4740 35 * $DragonFly: src/sys/kern/uipc_socket.c,v 1.7 2003/07/19 21:14:39 dillon Exp $
984263bc
MD
36 */
37
38#include "opt_inet.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/fcntl.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/domain.h>
46#include <sys/file.h> /* for struct knote */
47#include <sys/kernel.h>
48#include <sys/malloc.h>
49#include <sys/event.h>
50#include <sys/poll.h>
51#include <sys/proc.h>
52#include <sys/protosw.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/uio.h>
59#include <sys/jail.h>
60#include <vm/vm_zone.h>
61
62#include <machine/limits.h>
63
64#ifdef INET
65static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
66#endif /* INET */
67
68static void filt_sordetach(struct knote *kn);
69static int filt_soread(struct knote *kn, long hint);
70static void filt_sowdetach(struct knote *kn);
71static int filt_sowrite(struct knote *kn, long hint);
72static int filt_solisten(struct knote *kn, long hint);
73
74static struct filterops solisten_filtops =
75 { 1, NULL, filt_sordetach, filt_solisten };
76static struct filterops soread_filtops =
77 { 1, NULL, filt_sordetach, filt_soread };
78static struct filterops sowrite_filtops =
79 { 1, NULL, filt_sowdetach, filt_sowrite };
80
81struct vm_zone *socket_zone;
82so_gen_t so_gencnt; /* generation count for sockets */
83
84MALLOC_DEFINE(M_SONAME, "soname", "socket name");
85MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
86
87SYSCTL_DECL(_kern_ipc);
88
89static int somaxconn = SOMAXCONN;
90SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
91 &somaxconn, 0, "Maximum pending socket connection queue size");
92
93/*
94 * Socket operation routines.
95 * These routines are called by the routines in
96 * sys_socket.c or from a system process, and
97 * implement the semantics of socket operations by
98 * switching out to the protocol specific routines.
99 */
100
101/*
102 * Get a socket structure from our zone, and initialize it.
103 * We don't implement `waitok' yet (see comments in uipc_domain.c).
104 * Note that it would probably be better to allocate socket
105 * and PCB at the same time, but I'm not convinced that all
106 * the protocols can be easily modified to do this.
107 */
108struct socket *
109soalloc(waitok)
110 int waitok;
111{
112 struct socket *so;
113
8a8d5d85 114 so = zalloc(socket_zone);
984263bc
MD
115 if (so) {
116 /* XXX race condition for reentrant kernel */
117 bzero(so, sizeof *so);
118 so->so_gencnt = ++so_gencnt;
119 TAILQ_INIT(&so->so_aiojobq);
120 }
121 return so;
122}
123
124int
dadab5e9
MD
125socreate(int dom, struct socket **aso, int type,
126 int proto, struct thread *td)
984263bc 127{
dadab5e9
MD
128 struct proc *p = td->td_proc;
129 struct protosw *prp;
130 struct socket *so;
131 int error;
984263bc
MD
132
133 if (proto)
134 prp = pffindproto(dom, proto, type);
135 else
136 prp = pffindtype(dom, type);
137
138 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
139 return (EPROTONOSUPPORT);
140
41c20dac 141 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only &&
984263bc
MD
142 prp->pr_domain->dom_family != PF_LOCAL &&
143 prp->pr_domain->dom_family != PF_INET &&
144 prp->pr_domain->dom_family != PF_ROUTE) {
145 return (EPROTONOSUPPORT);
146 }
147
148 if (prp->pr_type != type)
149 return (EPROTOTYPE);
150 so = soalloc(p != 0);
151 if (so == 0)
152 return (ENOBUFS);
153
154 TAILQ_INIT(&so->so_incomp);
155 TAILQ_INIT(&so->so_comp);
156 so->so_type = type;
e9a372eb 157 so->so_cred = crhold(p->p_ucred);
984263bc 158 so->so_proto = prp;
dadab5e9 159 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
984263bc
MD
160 if (error) {
161 so->so_state |= SS_NOFDREF;
162 sofree(so);
163 return (error);
164 }
165 *aso = so;
166 return (0);
167}
168
169int
dadab5e9 170sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
984263bc
MD
171{
172 int s = splnet();
173 int error;
174
dadab5e9 175 error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
984263bc
MD
176 splx(s);
177 return (error);
178}
179
180void
dadab5e9 181sodealloc(struct socket *so)
984263bc
MD
182{
183
184 so->so_gencnt = ++so_gencnt;
185 if (so->so_rcv.sb_hiwat)
186 (void)chgsbsize(so->so_cred->cr_uidinfo,
187 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
188 if (so->so_snd.sb_hiwat)
189 (void)chgsbsize(so->so_cred->cr_uidinfo,
190 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
191#ifdef INET
192 if (so->so_accf != NULL) {
193 if (so->so_accf->so_accept_filter != NULL &&
194 so->so_accf->so_accept_filter->accf_destroy != NULL) {
195 so->so_accf->so_accept_filter->accf_destroy(so);
196 }
197 if (so->so_accf->so_accept_filter_str != NULL)
198 FREE(so->so_accf->so_accept_filter_str, M_ACCF);
199 FREE(so->so_accf, M_ACCF);
200 }
201#endif /* INET */
202 crfree(so->so_cred);
8a8d5d85 203 zfree(socket_zone, so);
984263bc
MD
204}
205
206int
dadab5e9 207solisten(struct socket *so, int backlog, struct thread *td)
984263bc
MD
208{
209 int s, error;
210
211 s = splnet();
dadab5e9 212 error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td);
984263bc
MD
213 if (error) {
214 splx(s);
215 return (error);
216 }
217 if (TAILQ_EMPTY(&so->so_comp))
218 so->so_options |= SO_ACCEPTCONN;
219 if (backlog < 0 || backlog > somaxconn)
220 backlog = somaxconn;
221 so->so_qlimit = backlog;
222 splx(s);
223 return (0);
224}
225
226void
dadab5e9 227sofree(struct socket *so)
984263bc
MD
228{
229 struct socket *head = so->so_head;
230
231 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
232 return;
233 if (head != NULL) {
234 if (so->so_state & SS_INCOMP) {
235 TAILQ_REMOVE(&head->so_incomp, so, so_list);
236 head->so_incqlen--;
237 } else if (so->so_state & SS_COMP) {
238 /*
239 * We must not decommission a socket that's
240 * on the accept(2) queue. If we do, then
241 * accept(2) may hang after select(2) indicated
242 * that the listening socket was ready.
243 */
244 return;
245 } else {
246 panic("sofree: not queued");
247 }
248 so->so_state &= ~SS_INCOMP;
249 so->so_head = NULL;
250 }
251 sbrelease(&so->so_snd, so);
252 sorflush(so);
253 sodealloc(so);
254}
255
256/*
257 * Close a socket on last file table reference removal.
258 * Initiate disconnect if connected.
259 * Free socket when disconnect complete.
260 */
261int
dadab5e9 262soclose(struct socket *so)
984263bc
MD
263{
264 int s = splnet(); /* conservative */
265 int error = 0;
266
267 funsetown(so->so_sigio);
268 if (so->so_options & SO_ACCEPTCONN) {
269 struct socket *sp, *sonext;
270
271 sp = TAILQ_FIRST(&so->so_incomp);
272 for (; sp != NULL; sp = sonext) {
273 sonext = TAILQ_NEXT(sp, so_list);
274 (void) soabort(sp);
275 }
276 for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
277 sonext = TAILQ_NEXT(sp, so_list);
278 /* Dequeue from so_comp since sofree() won't do it */
279 TAILQ_REMOVE(&so->so_comp, sp, so_list);
280 so->so_qlen--;
281 sp->so_state &= ~SS_COMP;
282 sp->so_head = NULL;
283 (void) soabort(sp);
284 }
285 }
286 if (so->so_pcb == 0)
287 goto discard;
288 if (so->so_state & SS_ISCONNECTED) {
289 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
290 error = sodisconnect(so);
291 if (error)
292 goto drop;
293 }
294 if (so->so_options & SO_LINGER) {
295 if ((so->so_state & SS_ISDISCONNECTING) &&
296 (so->so_state & SS_NBIO))
297 goto drop;
298 while (so->so_state & SS_ISCONNECTED) {
299 error = tsleep((caddr_t)&so->so_timeo,
377d4740 300 PCATCH, "soclos", so->so_linger * hz);
984263bc
MD
301 if (error)
302 break;
303 }
304 }
305 }
306drop:
307 if (so->so_pcb) {
308 int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
309 if (error == 0)
310 error = error2;
311 }
312discard:
313 if (so->so_state & SS_NOFDREF)
314 panic("soclose: NOFDREF");
315 so->so_state |= SS_NOFDREF;
316 sofree(so);
317 splx(s);
318 return (error);
319}
320
321/*
322 * Must be called at splnet...
323 */
324int
325soabort(so)
326 struct socket *so;
327{
328 int error;
329
330 error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
331 if (error) {
332 sofree(so);
333 return error;
334 }
335 return (0);
336}
337
338int
dadab5e9 339soaccept(struct socket *so, struct sockaddr **nam)
984263bc
MD
340{
341 int s = splnet();
342 int error;
343
344 if ((so->so_state & SS_NOFDREF) == 0)
345 panic("soaccept: !NOFDREF");
346 so->so_state &= ~SS_NOFDREF;
347 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
348 splx(s);
349 return (error);
350}
351
352int
dadab5e9 353soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
984263bc
MD
354{
355 int s;
356 int error;
357
358 if (so->so_options & SO_ACCEPTCONN)
359 return (EOPNOTSUPP);
360 s = splnet();
361 /*
362 * If protocol is connection-based, can only connect once.
363 * Otherwise, if connected, try to disconnect first.
364 * This allows user to disconnect by connecting to, e.g.,
365 * a null address.
366 */
367 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
368 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
369 (error = sodisconnect(so))))
370 error = EISCONN;
371 else
dadab5e9 372 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td);
984263bc
MD
373 splx(s);
374 return (error);
375}
376
377int
dadab5e9 378soconnect2(struct socket *so1, struct socket *so2)
984263bc
MD
379{
380 int s = splnet();
381 int error;
382
383 error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
384 splx(s);
385 return (error);
386}
387
388int
dadab5e9 389sodisconnect(struct socket *so)
984263bc
MD
390{
391 int s = splnet();
392 int error;
393
394 if ((so->so_state & SS_ISCONNECTED) == 0) {
395 error = ENOTCONN;
396 goto bad;
397 }
398 if (so->so_state & SS_ISDISCONNECTING) {
399 error = EALREADY;
400 goto bad;
401 }
402 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
403bad:
404 splx(s);
405 return (error);
406}
407
408#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
409/*
410 * Send on a socket.
411 * If send must go all at once and message is larger than
412 * send buffering, then hard error.
413 * Lock against other senders.
414 * If must go all at once and not enough room now, then
415 * inform user that this would block and do nothing.
416 * Otherwise, if nonblocking, send as much as possible.
417 * The data to be sent is described by "uio" if nonzero,
418 * otherwise by the mbuf chain "top" (which must be null
419 * if uio is not). Data provided in mbuf chain must be small
420 * enough to send all at once.
421 *
422 * Returns nonzero on error, timeout or signal; callers
423 * must check for short counts if EINTR/ERESTART are returned.
424 * Data and control buffers are freed on return.
425 */
426int
dadab5e9
MD
427sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
428 struct mbuf *top, struct mbuf *control, int flags,
429 struct thread *td)
984263bc
MD
430{
431 struct mbuf **mp;
dadab5e9
MD
432 struct mbuf *m;
433 long space, len, resid;
984263bc
MD
434 int clen = 0, error, s, dontroute, mlen;
435 int atomic = sosendallatonce(so) || top;
436
437 if (uio)
438 resid = uio->uio_resid;
439 else
440 resid = top->m_pkthdr.len;
441 /*
442 * In theory resid should be unsigned.
443 * However, space must be signed, as it might be less than 0
444 * if we over-committed, and we must use a signed comparison
445 * of space and resid. On the other hand, a negative resid
446 * causes us to loop sending 0-length segments to the protocol.
447 *
448 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
449 * type sockets since that's an error.
450 */
451 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
452 error = EINVAL;
453 goto out;
454 }
455
456 dontroute =
457 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
458 (so->so_proto->pr_flags & PR_ATOMIC);
dadab5e9
MD
459 if (td->td_proc && td->td_proc->p_stats)
460 td->td_proc->p_stats->p_ru.ru_msgsnd++;
984263bc
MD
461 if (control)
462 clen = control->m_len;
463#define snderr(errno) { error = errno; splx(s); goto release; }
464
465restart:
466 error = sblock(&so->so_snd, SBLOCKWAIT(flags));
467 if (error)
468 goto out;
469 do {
470 s = splnet();
471 if (so->so_state & SS_CANTSENDMORE)
472 snderr(EPIPE);
473 if (so->so_error) {
474 error = so->so_error;
475 so->so_error = 0;
476 splx(s);
477 goto release;
478 }
479 if ((so->so_state & SS_ISCONNECTED) == 0) {
480 /*
481 * `sendto' and `sendmsg' is allowed on a connection-
482 * based socket if it supports implied connect.
483 * Return ENOTCONN if not connected and no address is
484 * supplied.
485 */
486 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
487 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
488 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
489 !(resid == 0 && clen != 0))
490 snderr(ENOTCONN);
491 } else if (addr == 0)
492 snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
493 ENOTCONN : EDESTADDRREQ);
494 }
495 space = sbspace(&so->so_snd);
496 if (flags & MSG_OOB)
497 space += 1024;
498 if ((atomic && resid > so->so_snd.sb_hiwat) ||
499 clen > so->so_snd.sb_hiwat)
500 snderr(EMSGSIZE);
501 if (space < resid + clen &&
502 (atomic || space < so->so_snd.sb_lowat || space < clen)) {
503 if (so->so_state & SS_NBIO)
504 snderr(EWOULDBLOCK);
505 sbunlock(&so->so_snd);
506 error = sbwait(&so->so_snd);
507 splx(s);
508 if (error)
509 goto out;
510 goto restart;
511 }
512 splx(s);
513 mp = &top;
514 space -= clen;
515 do {
516 if (uio == NULL) {
517 /*
518 * Data is prepackaged in "top".
519 */
520 resid = 0;
521 if (flags & MSG_EOR)
522 top->m_flags |= M_EOR;
523 } else do {
524 if (top == 0) {
525 MGETHDR(m, M_WAIT, MT_DATA);
526 if (m == NULL) {
527 error = ENOBUFS;
528 goto release;
529 }
530 mlen = MHLEN;
531 m->m_pkthdr.len = 0;
532 m->m_pkthdr.rcvif = (struct ifnet *)0;
533 } else {
534 MGET(m, M_WAIT, MT_DATA);
535 if (m == NULL) {
536 error = ENOBUFS;
537 goto release;
538 }
539 mlen = MLEN;
540 }
541 if (resid >= MINCLSIZE) {
542 MCLGET(m, M_WAIT);
543 if ((m->m_flags & M_EXT) == 0)
544 goto nopages;
545 mlen = MCLBYTES;
546 len = min(min(mlen, resid), space);
547 } else {
548nopages:
549 len = min(min(mlen, resid), space);
550 /*
551 * For datagram protocols, leave room
552 * for protocol headers in first mbuf.
553 */
554 if (atomic && top == 0 && len < mlen)
555 MH_ALIGN(m, len);
556 }
557 space -= len;
558 error = uiomove(mtod(m, caddr_t), (int)len, uio);
559 resid = uio->uio_resid;
560 m->m_len = len;
561 *mp = m;
562 top->m_pkthdr.len += len;
563 if (error)
564 goto release;
565 mp = &m->m_next;
566 if (resid <= 0) {
567 if (flags & MSG_EOR)
568 top->m_flags |= M_EOR;
569 break;
570 }
571 } while (space > 0 && atomic);
572 if (dontroute)
573 so->so_options |= SO_DONTROUTE;
574 s = splnet(); /* XXX */
575 /*
576 * XXX all the SS_CANTSENDMORE checks previously
577 * done could be out of date. We could have recieved
578 * a reset packet in an interrupt or maybe we slept
579 * while doing page faults in uiomove() etc. We could
580 * probably recheck again inside the splnet() protection
581 * here, but there are probably other places that this
582 * also happens. We must rethink this.
583 */
584 error = (*so->so_proto->pr_usrreqs->pru_send)(so,
585 (flags & MSG_OOB) ? PRUS_OOB :
586 /*
587 * If the user set MSG_EOF, the protocol
588 * understands this flag and nothing left to
589 * send then use PRU_SEND_EOF instead of PRU_SEND.
590 */
591 ((flags & MSG_EOF) &&
592 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
593 (resid <= 0)) ?
594 PRUS_EOF :
595 /* If there is more to send set PRUS_MORETOCOME */
596 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
dadab5e9 597 top, addr, control, td);
984263bc
MD
598 splx(s);
599 if (dontroute)
600 so->so_options &= ~SO_DONTROUTE;
601 clen = 0;
602 control = 0;
603 top = 0;
604 mp = &top;
605 if (error)
606 goto release;
607 } while (resid && space > 0);
608 } while (resid);
609
610release:
611 sbunlock(&so->so_snd);
612out:
613 if (top)
614 m_freem(top);
615 if (control)
616 m_freem(control);
617 return (error);
618}
619
620/*
621 * Implement receive operations on a socket.
622 * We depend on the way that records are added to the sockbuf
623 * by sbappend*. In particular, each record (mbufs linked through m_next)
624 * must begin with an address if the protocol so specifies,
625 * followed by an optional mbuf or mbufs containing ancillary data,
626 * and then zero or more mbufs of data.
627 * In order to avoid blocking network interrupts for the entire time here,
628 * we splx() while doing the actual copy to user space.
629 * Although the sockbuf is locked, new data may still be appended,
630 * and thus we must maintain consistency of the sockbuf during that time.
631 *
632 * The caller may receive the data as a single mbuf chain by supplying
633 * an mbuf **mp0 for use in returning the chain. The uio is then used
634 * only for the count in uio_resid.
635 */
636int
637soreceive(so, psa, uio, mp0, controlp, flagsp)
638 register struct socket *so;
639 struct sockaddr **psa;
640 struct uio *uio;
641 struct mbuf **mp0;
642 struct mbuf **controlp;
643 int *flagsp;
644{
645 register struct mbuf *m, **mp;
646 register int flags, len, error, s, offset;
647 struct protosw *pr = so->so_proto;
648 struct mbuf *nextrecord;
649 int moff, type = 0;
650 int orig_resid = uio->uio_resid;
651
652 mp = mp0;
653 if (psa)
654 *psa = 0;
655 if (controlp)
656 *controlp = 0;
657 if (flagsp)
658 flags = *flagsp &~ MSG_EOR;
659 else
660 flags = 0;
661 if (flags & MSG_OOB) {
662 m = m_get(M_WAIT, MT_DATA);
663 if (m == NULL)
664 return (ENOBUFS);
665 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
666 if (error)
667 goto bad;
668 do {
669 error = uiomove(mtod(m, caddr_t),
670 (int) min(uio->uio_resid, m->m_len), uio);
671 m = m_free(m);
672 } while (uio->uio_resid && error == 0 && m);
673bad:
674 if (m)
675 m_freem(m);
676 return (error);
677 }
678 if (mp)
679 *mp = (struct mbuf *)0;
680 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
681 (*pr->pr_usrreqs->pru_rcvd)(so, 0);
682
683restart:
684 error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
685 if (error)
686 return (error);
687 s = splnet();
688
689 m = so->so_rcv.sb_mb;
690 /*
691 * If we have less data than requested, block awaiting more
692 * (subject to any timeout) if:
693 * 1. the current count is less than the low water mark, or
694 * 2. MSG_WAITALL is set, and it is possible to do the entire
695 * receive operation at once if we block (resid <= hiwat).
696 * 3. MSG_DONTWAIT is not set
697 * If MSG_WAITALL is set but resid is larger than the receive buffer,
698 * we have to do the receive in sections, and thus risk returning
699 * a short count if a timeout or signal occurs after we start.
700 */
701 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
702 so->so_rcv.sb_cc < uio->uio_resid) &&
703 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
704 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
705 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
706 KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
707 if (so->so_error) {
708 if (m)
709 goto dontblock;
710 error = so->so_error;
711 if ((flags & MSG_PEEK) == 0)
712 so->so_error = 0;
713 goto release;
714 }
715 if (so->so_state & SS_CANTRCVMORE) {
716 if (m)
717 goto dontblock;
718 else
719 goto release;
720 }
721 for (; m; m = m->m_next)
722 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
723 m = so->so_rcv.sb_mb;
724 goto dontblock;
725 }
726 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
727 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
728 error = ENOTCONN;
729 goto release;
730 }
731 if (uio->uio_resid == 0)
732 goto release;
733 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
734 error = EWOULDBLOCK;
735 goto release;
736 }
737 sbunlock(&so->so_rcv);
738 error = sbwait(&so->so_rcv);
739 splx(s);
740 if (error)
741 return (error);
742 goto restart;
743 }
744dontblock:
dadab5e9
MD
745 if (uio->uio_td && uio->uio_td->td_proc)
746 uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
984263bc
MD
747 nextrecord = m->m_nextpkt;
748 if (pr->pr_flags & PR_ADDR) {
749 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
750 orig_resid = 0;
751 if (psa)
752 *psa = dup_sockaddr(mtod(m, struct sockaddr *),
753 mp0 == 0);
754 if (flags & MSG_PEEK) {
755 m = m->m_next;
756 } else {
757 sbfree(&so->so_rcv, m);
758 so->so_rcv.sb_mb = m_free(m);
759 m = so->so_rcv.sb_mb;
760 }
761 }
762 while (m && m->m_type == MT_CONTROL && error == 0) {
763 if (flags & MSG_PEEK) {
764 if (controlp)
765 *controlp = m_copy(m, 0, m->m_len);
766 m = m->m_next;
767 } else {
768 sbfree(&so->so_rcv, m);
769 if (controlp) {
770 if (pr->pr_domain->dom_externalize &&
771 mtod(m, struct cmsghdr *)->cmsg_type ==
772 SCM_RIGHTS)
773 error = (*pr->pr_domain->dom_externalize)(m);
774 *controlp = m;
775 so->so_rcv.sb_mb = m->m_next;
776 m->m_next = 0;
777 m = so->so_rcv.sb_mb;
778 } else {
779 so->so_rcv.sb_mb = m_free(m);
780 m = so->so_rcv.sb_mb;
781 }
782 }
783 if (controlp) {
784 orig_resid = 0;
785 controlp = &(*controlp)->m_next;
786 }
787 }
788 if (m) {
789 if ((flags & MSG_PEEK) == 0)
790 m->m_nextpkt = nextrecord;
791 type = m->m_type;
792 if (type == MT_OOBDATA)
793 flags |= MSG_OOB;
794 }
795 moff = 0;
796 offset = 0;
797 while (m && uio->uio_resid > 0 && error == 0) {
798 if (m->m_type == MT_OOBDATA) {
799 if (type != MT_OOBDATA)
800 break;
801 } else if (type == MT_OOBDATA)
802 break;
803 else
804 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
805 ("receive 3"));
806 so->so_state &= ~SS_RCVATMARK;
807 len = uio->uio_resid;
808 if (so->so_oobmark && len > so->so_oobmark - offset)
809 len = so->so_oobmark - offset;
810 if (len > m->m_len - moff)
811 len = m->m_len - moff;
812 /*
813 * If mp is set, just pass back the mbufs.
814 * Otherwise copy them out via the uio, then free.
815 * Sockbuf must be consistent here (points to current mbuf,
816 * it points to next record) when we drop priority;
817 * we must note any additions to the sockbuf when we
818 * block interrupts again.
819 */
820 if (mp == 0) {
821 splx(s);
822 error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
823 s = splnet();
824 if (error)
825 goto release;
826 } else
827 uio->uio_resid -= len;
828 if (len == m->m_len - moff) {
829 if (m->m_flags & M_EOR)
830 flags |= MSG_EOR;
831 if (flags & MSG_PEEK) {
832 m = m->m_next;
833 moff = 0;
834 } else {
835 nextrecord = m->m_nextpkt;
836 sbfree(&so->so_rcv, m);
837 if (mp) {
838 *mp = m;
839 mp = &m->m_next;
840 so->so_rcv.sb_mb = m = m->m_next;
841 *mp = (struct mbuf *)0;
842 } else {
843 so->so_rcv.sb_mb = m = m_free(m);
844 }
845 if (m)
846 m->m_nextpkt = nextrecord;
847 }
848 } else {
849 if (flags & MSG_PEEK)
850 moff += len;
851 else {
852 if (mp)
853 *mp = m_copym(m, 0, len, M_WAIT);
854 m->m_data += len;
855 m->m_len -= len;
856 so->so_rcv.sb_cc -= len;
857 }
858 }
859 if (so->so_oobmark) {
860 if ((flags & MSG_PEEK) == 0) {
861 so->so_oobmark -= len;
862 if (so->so_oobmark == 0) {
863 so->so_state |= SS_RCVATMARK;
864 break;
865 }
866 } else {
867 offset += len;
868 if (offset == so->so_oobmark)
869 break;
870 }
871 }
872 if (flags & MSG_EOR)
873 break;
874 /*
875 * If the MSG_WAITALL flag is set (for non-atomic socket),
876 * we must not quit until "uio->uio_resid == 0" or an error
877 * termination. If a signal/timeout occurs, return
878 * with a short count but without error.
879 * Keep sockbuf locked against other readers.
880 */
881 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
882 !sosendallatonce(so) && !nextrecord) {
883 if (so->so_error || so->so_state & SS_CANTRCVMORE)
884 break;
885 /*
886 * The window might have closed to zero, make
887 * sure we send an ack now that we've drained
888 * the buffer or we might end up blocking until
889 * the idle takes over (5 seconds).
890 */
891 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
892 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
893 error = sbwait(&so->so_rcv);
894 if (error) {
895 sbunlock(&so->so_rcv);
896 splx(s);
897 return (0);
898 }
899 m = so->so_rcv.sb_mb;
900 if (m)
901 nextrecord = m->m_nextpkt;
902 }
903 }
904
905 if (m && pr->pr_flags & PR_ATOMIC) {
906 flags |= MSG_TRUNC;
907 if ((flags & MSG_PEEK) == 0)
908 (void) sbdroprecord(&so->so_rcv);
909 }
910 if ((flags & MSG_PEEK) == 0) {
911 if (m == 0)
912 so->so_rcv.sb_mb = nextrecord;
913 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
914 (*pr->pr_usrreqs->pru_rcvd)(so, flags);
915 }
916 if (orig_resid == uio->uio_resid && orig_resid &&
917 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
918 sbunlock(&so->so_rcv);
919 splx(s);
920 goto restart;
921 }
922
923 if (flagsp)
924 *flagsp |= flags;
925release:
926 sbunlock(&so->so_rcv);
927 splx(s);
928 return (error);
929}
930
931int
932soshutdown(so, how)
933 register struct socket *so;
934 register int how;
935{
936 register struct protosw *pr = so->so_proto;
937
938 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
939 return (EINVAL);
940
941 if (how != SHUT_WR)
942 sorflush(so);
943 if (how != SHUT_RD)
944 return ((*pr->pr_usrreqs->pru_shutdown)(so));
945 return (0);
946}
947
948void
949sorflush(so)
950 register struct socket *so;
951{
952 register struct sockbuf *sb = &so->so_rcv;
953 register struct protosw *pr = so->so_proto;
954 register int s;
955 struct sockbuf asb;
956
957 sb->sb_flags |= SB_NOINTR;
958 (void) sblock(sb, M_WAITOK);
959 s = splimp();
960 socantrcvmore(so);
961 sbunlock(sb);
962 asb = *sb;
963 bzero((caddr_t)sb, sizeof (*sb));
964 if (asb.sb_flags & SB_KNOTE) {
965 sb->sb_sel.si_note = asb.sb_sel.si_note;
966 sb->sb_flags = SB_KNOTE;
967 }
968 splx(s);
969 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
970 (*pr->pr_domain->dom_dispose)(asb.sb_mb);
971 sbrelease(&asb, so);
972}
973
974#ifdef INET
975static int
976do_setopt_accept_filter(so, sopt)
977 struct socket *so;
978 struct sockopt *sopt;
979{
980 struct accept_filter_arg *afap = NULL;
981 struct accept_filter *afp;
982 struct so_accf *af = so->so_accf;
983 int error = 0;
984
985 /* do not set/remove accept filters on non listen sockets */
986 if ((so->so_options & SO_ACCEPTCONN) == 0) {
987 error = EINVAL;
988 goto out;
989 }
990
991 /* removing the filter */
992 if (sopt == NULL) {
993 if (af != NULL) {
994 if (af->so_accept_filter != NULL &&
995 af->so_accept_filter->accf_destroy != NULL) {
996 af->so_accept_filter->accf_destroy(so);
997 }
998 if (af->so_accept_filter_str != NULL) {
999 FREE(af->so_accept_filter_str, M_ACCF);
1000 }
1001 FREE(af, M_ACCF);
1002 so->so_accf = NULL;
1003 }
1004 so->so_options &= ~SO_ACCEPTFILTER;
1005 return (0);
1006 }
1007 /* adding a filter */
1008 /* must remove previous filter first */
1009 if (af != NULL) {
1010 error = EINVAL;
1011 goto out;
1012 }
1013 /* don't put large objects on the kernel stack */
1014 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
1015 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
1016 afap->af_name[sizeof(afap->af_name)-1] = '\0';
1017 afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
1018 if (error)
1019 goto out;
1020 afp = accept_filt_get(afap->af_name);
1021 if (afp == NULL) {
1022 error = ENOENT;
1023 goto out;
1024 }
1025 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK);
1026 bzero(af, sizeof(*af));
1027 if (afp->accf_create != NULL) {
1028 if (afap->af_name[0] != '\0') {
1029 int len = strlen(afap->af_name) + 1;
1030
1031 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
1032 strcpy(af->so_accept_filter_str, afap->af_name);
1033 }
1034 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
1035 if (af->so_accept_filter_arg == NULL) {
1036 FREE(af->so_accept_filter_str, M_ACCF);
1037 FREE(af, M_ACCF);
1038 so->so_accf = NULL;
1039 error = EINVAL;
1040 goto out;
1041 }
1042 }
1043 af->so_accept_filter = afp;
1044 so->so_accf = af;
1045 so->so_options |= SO_ACCEPTFILTER;
1046out:
1047 if (afap != NULL)
1048 FREE(afap, M_TEMP);
1049 return (error);
1050}
1051#endif /* INET */
1052
1053/*
1054 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1055 * an additional variant to handle the case where the option value needs
1056 * to be some kind of integer, but not a specific size.
1057 * In addition to their use here, these functions are also called by the
1058 * protocol-level pr_ctloutput() routines.
1059 */
1060int
1061sooptcopyin(sopt, buf, len, minlen)
1062 struct sockopt *sopt;
1063 void *buf;
1064 size_t len;
1065 size_t minlen;
1066{
1067 size_t valsize;
1068
1069 /*
1070 * If the user gives us more than we wanted, we ignore it,
1071 * but if we don't get the minimum length the caller
1072 * wants, we return EINVAL. On success, sopt->sopt_valsize
1073 * is set to however much we actually retrieved.
1074 */
1075 if ((valsize = sopt->sopt_valsize) < minlen)
1076 return EINVAL;
1077 if (valsize > len)
1078 sopt->sopt_valsize = valsize = len;
1079
dadab5e9 1080 if (sopt->sopt_td != NULL)
984263bc
MD
1081 return (copyin(sopt->sopt_val, buf, valsize));
1082
1083 bcopy(sopt->sopt_val, buf, valsize);
1084 return 0;
1085}
1086
1087int
1088sosetopt(so, sopt)
1089 struct socket *so;
1090 struct sockopt *sopt;
1091{
1092 int error, optval;
1093 struct linger l;
1094 struct timeval tv;
1095 u_long val;
1096
1097 error = 0;
1098 if (sopt->sopt_level != SOL_SOCKET) {
1099 if (so->so_proto && so->so_proto->pr_ctloutput)
1100 return ((*so->so_proto->pr_ctloutput)
1101 (so, sopt));
1102 error = ENOPROTOOPT;
1103 } else {
1104 switch (sopt->sopt_name) {
1105#ifdef INET
1106 case SO_ACCEPTFILTER:
1107 error = do_setopt_accept_filter(so, sopt);
1108 if (error)
1109 goto bad;
1110 break;
1111#endif /* INET */
1112 case SO_LINGER:
1113 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1114 if (error)
1115 goto bad;
1116
1117 so->so_linger = l.l_linger;
1118 if (l.l_onoff)
1119 so->so_options |= SO_LINGER;
1120 else
1121 so->so_options &= ~SO_LINGER;
1122 break;
1123
1124 case SO_DEBUG:
1125 case SO_KEEPALIVE:
1126 case SO_DONTROUTE:
1127 case SO_USELOOPBACK:
1128 case SO_BROADCAST:
1129 case SO_REUSEADDR:
1130 case SO_REUSEPORT:
1131 case SO_OOBINLINE:
1132 case SO_TIMESTAMP:
1133 error = sooptcopyin(sopt, &optval, sizeof optval,
1134 sizeof optval);
1135 if (error)
1136 goto bad;
1137 if (optval)
1138 so->so_options |= sopt->sopt_name;
1139 else
1140 so->so_options &= ~sopt->sopt_name;
1141 break;
1142
1143 case SO_SNDBUF:
1144 case SO_RCVBUF:
1145 case SO_SNDLOWAT:
1146 case SO_RCVLOWAT:
1147 error = sooptcopyin(sopt, &optval, sizeof optval,
1148 sizeof optval);
1149 if (error)
1150 goto bad;
1151
1152 /*
1153 * Values < 1 make no sense for any of these
1154 * options, so disallow them.
1155 */
1156 if (optval < 1) {
1157 error = EINVAL;
1158 goto bad;
1159 }
1160
1161 switch (sopt->sopt_name) {
1162 case SO_SNDBUF:
1163 case SO_RCVBUF:
1164 if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
1165 &so->so_snd : &so->so_rcv, (u_long)optval,
1166 so, curproc) == 0) {
1167 error = ENOBUFS;
1168 goto bad;
1169 }
1170 break;
1171
1172 /*
1173 * Make sure the low-water is never greater than
1174 * the high-water.
1175 */
1176 case SO_SNDLOWAT:
1177 so->so_snd.sb_lowat =
1178 (optval > so->so_snd.sb_hiwat) ?
1179 so->so_snd.sb_hiwat : optval;
1180 break;
1181 case SO_RCVLOWAT:
1182 so->so_rcv.sb_lowat =
1183 (optval > so->so_rcv.sb_hiwat) ?
1184 so->so_rcv.sb_hiwat : optval;
1185 break;
1186 }
1187 break;
1188
1189 case SO_SNDTIMEO:
1190 case SO_RCVTIMEO:
1191 error = sooptcopyin(sopt, &tv, sizeof tv,
1192 sizeof tv);
1193 if (error)
1194 goto bad;
1195
1196 /* assert(hz > 0); */
1197 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1198 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1199 error = EDOM;
1200 goto bad;
1201 }
1202 /* assert(tick > 0); */
1203 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1204 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1205 if (val > SHRT_MAX) {
1206 error = EDOM;
1207 goto bad;
1208 }
1209 if (val == 0 && tv.tv_usec != 0)
1210 val = 1;
1211
1212 switch (sopt->sopt_name) {
1213 case SO_SNDTIMEO:
1214 so->so_snd.sb_timeo = val;
1215 break;
1216 case SO_RCVTIMEO:
1217 so->so_rcv.sb_timeo = val;
1218 break;
1219 }
1220 break;
1221 default:
1222 error = ENOPROTOOPT;
1223 break;
1224 }
1225 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
1226 (void) ((*so->so_proto->pr_ctloutput)
1227 (so, sopt));
1228 }
1229 }
1230bad:
1231 return (error);
1232}
1233
1234/* Helper routine for getsockopt */
1235int
1236sooptcopyout(sopt, buf, len)
1237 struct sockopt *sopt;
1238 void *buf;
1239 size_t len;
1240{
1241 int error;
1242 size_t valsize;
1243
1244 error = 0;
1245
1246 /*
1247 * Documented get behavior is that we always return a value,
1248 * possibly truncated to fit in the user's buffer.
1249 * Traditional behavior is that we always tell the user
1250 * precisely how much we copied, rather than something useful
1251 * like the total amount we had available for her.
1252 * Note that this interface is not idempotent; the entire answer must
1253 * generated ahead of time.
1254 */
1255 valsize = min(len, sopt->sopt_valsize);
1256 sopt->sopt_valsize = valsize;
1257 if (sopt->sopt_val != 0) {
dadab5e9 1258 if (sopt->sopt_td != NULL)
984263bc
MD
1259 error = copyout(buf, sopt->sopt_val, valsize);
1260 else
1261 bcopy(buf, sopt->sopt_val, valsize);
1262 }
1263 return error;
1264}
1265
1266int
1267sogetopt(so, sopt)
1268 struct socket *so;
1269 struct sockopt *sopt;
1270{
1271 int error, optval;
1272 struct linger l;
1273 struct timeval tv;
1274 struct accept_filter_arg *afap;
1275
1276 error = 0;
1277 if (sopt->sopt_level != SOL_SOCKET) {
1278 if (so->so_proto && so->so_proto->pr_ctloutput) {
1279 return ((*so->so_proto->pr_ctloutput)
1280 (so, sopt));
1281 } else
1282 return (ENOPROTOOPT);
1283 } else {
1284 switch (sopt->sopt_name) {
1285#ifdef INET
1286 case SO_ACCEPTFILTER:
1287 if ((so->so_options & SO_ACCEPTCONN) == 0)
1288 return (EINVAL);
1289 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
1290 M_TEMP, M_WAITOK);
1291 bzero(afap, sizeof(*afap));
1292 if ((so->so_options & SO_ACCEPTFILTER) != 0) {
1293 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
1294 if (so->so_accf->so_accept_filter_str != NULL)
1295 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
1296 }
1297 error = sooptcopyout(sopt, afap, sizeof(*afap));
1298 FREE(afap, M_TEMP);
1299 break;
1300#endif /* INET */
1301
1302 case SO_LINGER:
1303 l.l_onoff = so->so_options & SO_LINGER;
1304 l.l_linger = so->so_linger;
1305 error = sooptcopyout(sopt, &l, sizeof l);
1306 break;
1307
1308 case SO_USELOOPBACK:
1309 case SO_DONTROUTE:
1310 case SO_DEBUG:
1311 case SO_KEEPALIVE:
1312 case SO_REUSEADDR:
1313 case SO_REUSEPORT:
1314 case SO_BROADCAST:
1315 case SO_OOBINLINE:
1316 case SO_TIMESTAMP:
1317 optval = so->so_options & sopt->sopt_name;
1318integer:
1319 error = sooptcopyout(sopt, &optval, sizeof optval);
1320 break;
1321
1322 case SO_TYPE:
1323 optval = so->so_type;
1324 goto integer;
1325
1326 case SO_ERROR:
1327 optval = so->so_error;
1328 so->so_error = 0;
1329 goto integer;
1330
1331 case SO_SNDBUF:
1332 optval = so->so_snd.sb_hiwat;
1333 goto integer;
1334
1335 case SO_RCVBUF:
1336 optval = so->so_rcv.sb_hiwat;
1337 goto integer;
1338
1339 case SO_SNDLOWAT:
1340 optval = so->so_snd.sb_lowat;
1341 goto integer;
1342
1343 case SO_RCVLOWAT:
1344 optval = so->so_rcv.sb_lowat;
1345 goto integer;
1346
1347 case SO_SNDTIMEO:
1348 case SO_RCVTIMEO:
1349 optval = (sopt->sopt_name == SO_SNDTIMEO ?
1350 so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
1351
1352 tv.tv_sec = optval / hz;
1353 tv.tv_usec = (optval % hz) * tick;
1354 error = sooptcopyout(sopt, &tv, sizeof tv);
1355 break;
1356
1357 default:
1358 error = ENOPROTOOPT;
1359 break;
1360 }
1361 return (error);
1362 }
1363}
1364
1365/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1366int
1367soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1368{
1369 struct mbuf *m, *m_prev;
1370 int sopt_size = sopt->sopt_valsize;
1371
dadab5e9 1372 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
984263bc
MD
1373 if (m == 0)
1374 return ENOBUFS;
1375 if (sopt_size > MLEN) {
dadab5e9 1376 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
984263bc
MD
1377 if ((m->m_flags & M_EXT) == 0) {
1378 m_free(m);
1379 return ENOBUFS;
1380 }
1381 m->m_len = min(MCLBYTES, sopt_size);
1382 } else {
1383 m->m_len = min(MLEN, sopt_size);
1384 }
1385 sopt_size -= m->m_len;
1386 *mp = m;
1387 m_prev = m;
1388
1389 while (sopt_size) {
dadab5e9 1390 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
984263bc
MD
1391 if (m == 0) {
1392 m_freem(*mp);
1393 return ENOBUFS;
1394 }
1395 if (sopt_size > MLEN) {
dadab5e9 1396 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
984263bc
MD
1397 if ((m->m_flags & M_EXT) == 0) {
1398 m_freem(*mp);
1399 return ENOBUFS;
1400 }
1401 m->m_len = min(MCLBYTES, sopt_size);
1402 } else {
1403 m->m_len = min(MLEN, sopt_size);
1404 }
1405 sopt_size -= m->m_len;
1406 m_prev->m_next = m;
1407 m_prev = m;
1408 }
1409 return 0;
1410}
1411
1412/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
1413int
1414soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1415{
1416 struct mbuf *m0 = m;
1417
1418 if (sopt->sopt_val == NULL)
1419 return 0;
1420 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
dadab5e9 1421 if (sopt->sopt_td != NULL) {
984263bc
MD
1422 int error;
1423
1424 error = copyin(sopt->sopt_val, mtod(m, char *),
1425 m->m_len);
1426 if (error != 0) {
1427 m_freem(m0);
1428 return(error);
1429 }
1430 } else
1431 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
1432 sopt->sopt_valsize -= m->m_len;
1433 (caddr_t)sopt->sopt_val += m->m_len;
1434 m = m->m_next;
1435 }
1436 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
1437 panic("ip6_sooptmcopyin");
1438 return 0;
1439}
1440
1441/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
1442int
1443soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
1444{
1445 struct mbuf *m0 = m;
1446 size_t valsize = 0;
1447
1448 if (sopt->sopt_val == NULL)
1449 return 0;
1450 while (m != NULL && sopt->sopt_valsize >= m->m_len) {
dadab5e9 1451 if (sopt->sopt_td != NULL) {
984263bc
MD
1452 int error;
1453
1454 error = copyout(mtod(m, char *), sopt->sopt_val,
1455 m->m_len);
1456 if (error != 0) {
1457 m_freem(m0);
1458 return(error);
1459 }
1460 } else
1461 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
1462 sopt->sopt_valsize -= m->m_len;
1463 (caddr_t)sopt->sopt_val += m->m_len;
1464 valsize += m->m_len;
1465 m = m->m_next;
1466 }
1467 if (m != NULL) {
1468 /* enough soopt buffer should be given from user-land */
1469 m_freem(m0);
1470 return(EINVAL);
1471 }
1472 sopt->sopt_valsize = valsize;
1473 return 0;
1474}
1475
1476void
1477sohasoutofband(so)
1478 register struct socket *so;
1479{
1480 if (so->so_sigio != NULL)
1481 pgsigio(so->so_sigio, SIGURG, 0);
1482 selwakeup(&so->so_rcv.sb_sel);
1483}
1484
1485int
dadab5e9 1486sopoll(struct socket *so, int events, struct ucred *cred, struct thread *td)
984263bc
MD
1487{
1488 int revents = 0;
1489 int s = splnet();
1490
1491 if (events & (POLLIN | POLLRDNORM))
1492 if (soreadable(so))
1493 revents |= events & (POLLIN | POLLRDNORM);
1494
1495 if (events & (POLLOUT | POLLWRNORM))
1496 if (sowriteable(so))
1497 revents |= events & (POLLOUT | POLLWRNORM);
1498
1499 if (events & (POLLPRI | POLLRDBAND))
1500 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1501 revents |= events & (POLLPRI | POLLRDBAND);
1502
1503 if (revents == 0) {
1504 if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
dadab5e9 1505 selrecord(td, &so->so_rcv.sb_sel);
984263bc
MD
1506 so->so_rcv.sb_flags |= SB_SEL;
1507 }
1508
1509 if (events & (POLLOUT | POLLWRNORM)) {
dadab5e9 1510 selrecord(td, &so->so_snd.sb_sel);
984263bc
MD
1511 so->so_snd.sb_flags |= SB_SEL;
1512 }
1513 }
1514
1515 splx(s);
1516 return (revents);
1517}
1518
1519int
1520sokqfilter(struct file *fp, struct knote *kn)
1521{
1522 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1523 struct sockbuf *sb;
1524 int s;
1525
1526 switch (kn->kn_filter) {
1527 case EVFILT_READ:
1528 if (so->so_options & SO_ACCEPTCONN)
1529 kn->kn_fop = &solisten_filtops;
1530 else
1531 kn->kn_fop = &soread_filtops;
1532 sb = &so->so_rcv;
1533 break;
1534 case EVFILT_WRITE:
1535 kn->kn_fop = &sowrite_filtops;
1536 sb = &so->so_snd;
1537 break;
1538 default:
1539 return (1);
1540 }
1541
1542 s = splnet();
1543 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext);
1544 sb->sb_flags |= SB_KNOTE;
1545 splx(s);
1546 return (0);
1547}
1548
1549static void
1550filt_sordetach(struct knote *kn)
1551{
1552 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1553 int s = splnet();
1554
1555 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
1556 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
1557 so->so_rcv.sb_flags &= ~SB_KNOTE;
1558 splx(s);
1559}
1560
1561/*ARGSUSED*/
1562static int
1563filt_soread(struct knote *kn, long hint)
1564{
1565 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1566
1567 kn->kn_data = so->so_rcv.sb_cc;
1568 if (so->so_state & SS_CANTRCVMORE) {
1569 kn->kn_flags |= EV_EOF;
1570 kn->kn_fflags = so->so_error;
1571 return (1);
1572 }
1573 if (so->so_error) /* temporary udp error */
1574 return (1);
1575 if (kn->kn_sfflags & NOTE_LOWAT)
1576 return (kn->kn_data >= kn->kn_sdata);
1577 return (kn->kn_data >= so->so_rcv.sb_lowat);
1578}
1579
1580static void
1581filt_sowdetach(struct knote *kn)
1582{
1583 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1584 int s = splnet();
1585
1586 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
1587 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
1588 so->so_snd.sb_flags &= ~SB_KNOTE;
1589 splx(s);
1590}
1591
1592/*ARGSUSED*/
1593static int
1594filt_sowrite(struct knote *kn, long hint)
1595{
1596 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1597
1598 kn->kn_data = sbspace(&so->so_snd);
1599 if (so->so_state & SS_CANTSENDMORE) {
1600 kn->kn_flags |= EV_EOF;
1601 kn->kn_fflags = so->so_error;
1602 return (1);
1603 }
1604 if (so->so_error) /* temporary udp error */
1605 return (1);
1606 if (((so->so_state & SS_ISCONNECTED) == 0) &&
1607 (so->so_proto->pr_flags & PR_CONNREQUIRED))
1608 return (0);
1609 if (kn->kn_sfflags & NOTE_LOWAT)
1610 return (kn->kn_data >= kn->kn_sdata);
1611 return (kn->kn_data >= so->so_snd.sb_lowat);
1612}
1613
1614/*ARGSUSED*/
1615static int
1616filt_solisten(struct knote *kn, long hint)
1617{
1618 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1619
1620 kn->kn_data = so->so_qlen;
1621 return (! TAILQ_EMPTY(&so->so_comp));
1622}