AMD64 - Refactor uio_resid and size_t assumptions.
[dragonfly.git] / sys / kern / uipc_socket.c
CommitLineData
984263bc 1/*
6ea1e9b9 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved.
66d6c637
JH
3 * Copyright (c) 2004 The DragonFly Project. All rights reserved.
4 *
5 * This code is derived from software contributed to The DragonFly Project
6 * by Jeffrey M. Hsu.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of The DragonFly Project nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific, prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34/*
984263bc
MD
35 * Copyright (c) 1982, 1986, 1988, 1990, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
7405c902 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $
9116be8e 68 * $DragonFly: src/sys/kern/uipc_socket.c,v 1.55 2008/09/02 16:17:52 dillon Exp $
984263bc
MD
69 */
70
71#include "opt_inet.h"
78812139 72#include "opt_sctp.h"
984263bc
MD
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/fcntl.h>
77#include <sys/malloc.h>
78#include <sys/mbuf.h>
79#include <sys/domain.h>
80#include <sys/file.h> /* for struct knote */
81#include <sys/kernel.h>
82#include <sys/malloc.h>
83#include <sys/event.h>
84#include <sys/poll.h>
85#include <sys/proc.h>
86#include <sys/protosw.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
6b6e0885 89#include <sys/socketops.h>
984263bc
MD
90#include <sys/resourcevar.h>
91#include <sys/signalvar.h>
92#include <sys/sysctl.h>
93#include <sys/uio.h>
94#include <sys/jail.h>
95#include <vm/vm_zone.h>
e71a125f 96#include <vm/pmap.h>
984263bc 97
e43a034f 98#include <sys/thread2.h>
d6cb521d 99#include <sys/socketvar2.h>
e43a034f 100
984263bc
MD
101#include <machine/limits.h>
102
103#ifdef INET
104static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
105#endif /* INET */
106
107static void filt_sordetach(struct knote *kn);
108static int filt_soread(struct knote *kn, long hint);
109static void filt_sowdetach(struct knote *kn);
110static int filt_sowrite(struct knote *kn, long hint);
111static int filt_solisten(struct knote *kn, long hint);
112
113static struct filterops solisten_filtops =
114 { 1, NULL, filt_sordetach, filt_solisten };
115static struct filterops soread_filtops =
116 { 1, NULL, filt_sordetach, filt_soread };
117static struct filterops sowrite_filtops =
118 { 1, NULL, filt_sowdetach, filt_sowrite };
119
69ea5b8d 120MALLOC_DEFINE(M_SOCKET, "socket", "socket struct");
984263bc
MD
121MALLOC_DEFINE(M_SONAME, "soname", "socket name");
122MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
123
984263bc
MD
124
125static int somaxconn = SOMAXCONN;
126SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
127 &somaxconn, 0, "Maximum pending socket connection queue size");
128
129/*
130 * Socket operation routines.
131 * These routines are called by the routines in
132 * sys_socket.c or from a system process, and
133 * implement the semantics of socket operations by
134 * switching out to the protocol specific routines.
135 */
136
137/*
69ea5b8d 138 * Get a socket structure, and initialize it.
984263bc
MD
139 * Note that it would probably be better to allocate socket
140 * and PCB at the same time, but I'm not convinced that all
141 * the protocols can be easily modified to do this.
142 */
143struct socket *
c972a82f 144soalloc(int waitok)
984263bc
MD
145{
146 struct socket *so;
69ea5b8d 147 unsigned waitmask;
984263bc 148
69ea5b8d
NT
149 waitmask = waitok ? M_WAITOK : M_NOWAIT;
150 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask);
984263bc
MD
151 if (so) {
152 /* XXX race condition for reentrant kernel */
984263bc 153 TAILQ_INIT(&so->so_aiojobq);
6d49aa6f
MD
154 TAILQ_INIT(&so->so_rcv.ssb_sel.si_mlist);
155 TAILQ_INIT(&so->so_snd.ssb_sel.si_mlist);
984263bc
MD
156 }
157 return so;
158}
159
160int
dadab5e9
MD
161socreate(int dom, struct socket **aso, int type,
162 int proto, struct thread *td)
984263bc 163{
dadab5e9
MD
164 struct proc *p = td->td_proc;
165 struct protosw *prp;
166 struct socket *so;
e4700d00 167 struct pru_attach_info ai;
dadab5e9 168 int error;
984263bc
MD
169
170 if (proto)
171 prp = pffindproto(dom, proto, type);
172 else
173 prp = pffindtype(dom, type);
174
175 if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
176 return (EPROTONOSUPPORT);
177
41c20dac 178 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only &&
984263bc
MD
179 prp->pr_domain->dom_family != PF_LOCAL &&
180 prp->pr_domain->dom_family != PF_INET &&
3e4150ef 181 prp->pr_domain->dom_family != PF_INET6 &&
984263bc
MD
182 prp->pr_domain->dom_family != PF_ROUTE) {
183 return (EPROTONOSUPPORT);
184 }
185
186 if (prp->pr_type != type)
187 return (EPROTOTYPE);
188 so = soalloc(p != 0);
189 if (so == 0)
190 return (ENOBUFS);
191
192 TAILQ_INIT(&so->so_incomp);
193 TAILQ_INIT(&so->so_comp);
194 so->so_type = type;
e9a372eb 195 so->so_cred = crhold(p->p_ucred);
984263bc 196 so->so_proto = prp;
e4700d00
JH
197 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE];
198 ai.p_ucred = p->p_ucred;
199 ai.fd_rdir = p->p_fd->fd_rdir;
5b0b9fa5
PA
200 /*
201 * Auto-sizing of socket buffers is managed by the protocols and
202 * the appropriate flags must be set in the pru_attach function.
203 */
e4700d00 204 error = so_pru_attach(so, proto, &ai);
984263bc
MD
205 if (error) {
206 so->so_state |= SS_NOFDREF;
207 sofree(so);
208 return (error);
209 }
210 *aso = so;
211 return (0);
212}
213
214int
dadab5e9 215sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
984263bc 216{
984263bc
MD
217 int error;
218
e43a034f 219 crit_enter();
6b6e0885 220 error = so_pru_bind(so, nam, td);
e43a034f 221 crit_exit();
984263bc
MD
222 return (error);
223}
224
225void
dadab5e9 226sodealloc(struct socket *so)
984263bc 227{
6d49aa6f 228 if (so->so_rcv.ssb_hiwat)
984263bc 229 (void)chgsbsize(so->so_cred->cr_uidinfo,
6d49aa6f
MD
230 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY);
231 if (so->so_snd.ssb_hiwat)
984263bc 232 (void)chgsbsize(so->so_cred->cr_uidinfo,
6d49aa6f 233 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY);
984263bc 234#ifdef INET
81d59d3d
HP
235 /* remove accept filter if present */
236 if (so->so_accf != NULL)
237 do_setopt_accept_filter(so, NULL);
984263bc
MD
238#endif /* INET */
239 crfree(so->so_cred);
69ea5b8d 240 kfree(so, M_SOCKET);
984263bc
MD
241}
242
243int
dadab5e9 244solisten(struct socket *so, int backlog, struct thread *td)
984263bc 245{
e43a034f 246 int error;
78812139
EN
247#ifdef SCTP
248 short oldopt, oldqlimit;
249#endif /* SCTP */
984263bc 250
e43a034f 251 crit_enter();
78812139 252 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) {
e43a034f 253 crit_exit();
78812139 254 return (EINVAL);
984263bc 255 }
78812139
EN
256
257#ifdef SCTP
258 oldopt = so->so_options;
259 oldqlimit = so->so_qlimit;
260#endif /* SCTP */
261
984263bc
MD
262 if (TAILQ_EMPTY(&so->so_comp))
263 so->so_options |= SO_ACCEPTCONN;
264 if (backlog < 0 || backlog > somaxconn)
265 backlog = somaxconn;
266 so->so_qlimit = backlog;
78812139
EN
267 /* SCTP needs to look at tweak both the inbound backlog parameter AND
268 * the so_options (UDP model both connect's and gets inbound
269 * connections .. implicitly).
270 */
271 error = so_pru_listen(so, td);
272 if (error) {
273#ifdef SCTP
274 /* Restore the params */
275 so->so_options = oldopt;
276 so->so_qlimit = oldqlimit;
277#endif /* SCTP */
278 crit_exit();
279 return (error);
280 }
e43a034f 281 crit_exit();
984263bc
MD
282 return (0);
283}
284
4402d8a2
MD
285/*
286 * Destroy a disconnected socket. This routine is a NOP if entities
287 * still have a reference on the socket:
288 *
289 * so_pcb - The protocol stack still has a reference
290 * SS_NOFDREF - There is no longer a file pointer reference
291 * SS_ABORTING - An abort netmsg is in-flight
292 */
984263bc 293void
dadab5e9 294sofree(struct socket *so)
984263bc
MD
295{
296 struct socket *head = so->so_head;
297
298 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
299 return;
4402d8a2
MD
300 if (so->so_state & SS_ABORTING)
301 return;
984263bc
MD
302 if (head != NULL) {
303 if (so->so_state & SS_INCOMP) {
304 TAILQ_REMOVE(&head->so_incomp, so, so_list);
305 head->so_incqlen--;
306 } else if (so->so_state & SS_COMP) {
307 /*
308 * We must not decommission a socket that's
309 * on the accept(2) queue. If we do, then
310 * accept(2) may hang after select(2) indicated
311 * that the listening socket was ready.
312 */
313 return;
314 } else {
315 panic("sofree: not queued");
316 }
317 so->so_state &= ~SS_INCOMP;
318 so->so_head = NULL;
319 }
6d49aa6f 320 ssb_release(&so->so_snd, so);
984263bc
MD
321 sorflush(so);
322 sodealloc(so);
323}
324
325/*
326 * Close a socket on last file table reference removal.
327 * Initiate disconnect if connected.
328 * Free socket when disconnect complete.
329 */
330int
9ba76b73 331soclose(struct socket *so, int fflag)
984263bc 332{
984263bc
MD
333 int error = 0;
334
e43a034f 335 crit_enter();
984263bc 336 funsetown(so->so_sigio);
19be7d32 337 if (so->so_pcb == NULL)
984263bc
MD
338 goto discard;
339 if (so->so_state & SS_ISCONNECTED) {
340 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
341 error = sodisconnect(so);
342 if (error)
343 goto drop;
344 }
345 if (so->so_options & SO_LINGER) {
346 if ((so->so_state & SS_ISDISCONNECTING) &&
9ba76b73 347 (fflag & FNONBLOCK))
984263bc
MD
348 goto drop;
349 while (so->so_state & SS_ISCONNECTED) {
350 error = tsleep((caddr_t)&so->so_timeo,
377d4740 351 PCATCH, "soclos", so->so_linger * hz);
984263bc
MD
352 if (error)
353 break;
354 }
355 }
356 }
357drop:
358 if (so->so_pcb) {
6b6e0885
JH
359 int error2;
360
361 error2 = so_pru_detach(so);
984263bc
MD
362 if (error == 0)
363 error = error2;
364 }
365discard:
19be7d32 366 if (so->so_options & SO_ACCEPTCONN) {
4402d8a2 367 struct socket *sp;
19be7d32 368
4402d8a2
MD
369 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
370 TAILQ_REMOVE(&so->so_incomp, sp, so_list);
371 sp->so_state &= ~SS_INCOMP;
372 sp->so_head = NULL;
373 so->so_incqlen--;
9116be8e 374 soaborta(sp);
19be7d32 375 }
4402d8a2 376 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
19be7d32 377 TAILQ_REMOVE(&so->so_comp, sp, so_list);
19be7d32
MD
378 sp->so_state &= ~SS_COMP;
379 sp->so_head = NULL;
4402d8a2 380 so->so_qlen--;
9116be8e 381 soaborta(sp);
19be7d32
MD
382 }
383 }
984263bc
MD
384 if (so->so_state & SS_NOFDREF)
385 panic("soclose: NOFDREF");
386 so->so_state |= SS_NOFDREF;
387 sofree(so);
e43a034f 388 crit_exit();
984263bc
MD
389 return (error);
390}
391
392/*
9116be8e
MD
393 * Abort and destroy a socket. Only one abort can be in progress
394 * at any given moment.
984263bc 395 */
4402d8a2 396void
c972a82f 397soabort(struct socket *so)
984263bc 398{
9116be8e
MD
399 if ((so->so_state & SS_ABORTING) == 0) {
400 so->so_state |= SS_ABORTING;
401 so_pru_abort(so);
402 }
4402d8a2 403}
984263bc 404
4402d8a2
MD
405void
406soaborta(struct socket *so)
407{
9116be8e
MD
408 if ((so->so_state & SS_ABORTING) == 0) {
409 so->so_state |= SS_ABORTING;
410 so_pru_aborta(so);
411 }
984263bc
MD
412}
413
fd86a41c
SZ
414void
415soabort_oncpu(struct socket *so)
416{
417 if ((so->so_state & SS_ABORTING) == 0) {
418 so->so_state |= SS_ABORTING;
419 so_pru_abort_oncpu(so);
420 }
421}
422
984263bc 423int
dadab5e9 424soaccept(struct socket *so, struct sockaddr **nam)
984263bc 425{
984263bc
MD
426 int error;
427
e43a034f 428 crit_enter();
984263bc
MD
429 if ((so->so_state & SS_NOFDREF) == 0)
430 panic("soaccept: !NOFDREF");
431 so->so_state &= ~SS_NOFDREF;
6b6e0885 432 error = so_pru_accept(so, nam);
e43a034f 433 crit_exit();
984263bc
MD
434 return (error);
435}
436
437int
dadab5e9 438soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
984263bc 439{
984263bc
MD
440 int error;
441
442 if (so->so_options & SO_ACCEPTCONN)
443 return (EOPNOTSUPP);
e43a034f 444 crit_enter();
984263bc
MD
445 /*
446 * If protocol is connection-based, can only connect once.
447 * Otherwise, if connected, try to disconnect first.
448 * This allows user to disconnect by connecting to, e.g.,
449 * a null address.
450 */
451 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
452 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
59429d28 453 (error = sodisconnect(so)))) {
984263bc 454 error = EISCONN;
59429d28
MD
455 } else {
456 /*
457 * Prevent accumulated error from previous connection
458 * from biting us.
459 */
460 so->so_error = 0;
6b6e0885 461 error = so_pru_connect(so, nam, td);
59429d28 462 }
e43a034f 463 crit_exit();
984263bc
MD
464 return (error);
465}
466
467int
dadab5e9 468soconnect2(struct socket *so1, struct socket *so2)
984263bc 469{
984263bc
MD
470 int error;
471
e43a034f 472 crit_enter();
6b6e0885 473 error = so_pru_connect2(so1, so2);
e43a034f 474 crit_exit();
984263bc
MD
475 return (error);
476}
477
478int
dadab5e9 479sodisconnect(struct socket *so)
984263bc 480{
984263bc
MD
481 int error;
482
e43a034f 483 crit_enter();
984263bc
MD
484 if ((so->so_state & SS_ISCONNECTED) == 0) {
485 error = ENOTCONN;
486 goto bad;
487 }
488 if (so->so_state & SS_ISDISCONNECTING) {
489 error = EALREADY;
490 goto bad;
491 }
6b6e0885 492 error = so_pru_disconnect(so);
984263bc 493bad:
e43a034f 494 crit_exit();
984263bc
MD
495 return (error);
496}
497
498#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
499/*
500 * Send on a socket.
501 * If send must go all at once and message is larger than
502 * send buffering, then hard error.
503 * Lock against other senders.
504 * If must go all at once and not enough room now, then
505 * inform user that this would block and do nothing.
506 * Otherwise, if nonblocking, send as much as possible.
507 * The data to be sent is described by "uio" if nonzero,
508 * otherwise by the mbuf chain "top" (which must be null
509 * if uio is not). Data provided in mbuf chain must be small
510 * enough to send all at once.
511 *
512 * Returns nonzero on error, timeout or signal; callers
513 * must check for short counts if EINTR/ERESTART are returned.
514 * Data and control buffers are freed on return.
515 */
516int
dadab5e9
MD
517sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
518 struct mbuf *top, struct mbuf *control, int flags,
519 struct thread *td)
984263bc
MD
520{
521 struct mbuf **mp;
dadab5e9 522 struct mbuf *m;
e54488bb
MD
523 size_t resid;
524 int space, len;
e43a034f 525 int clen = 0, error, dontroute, mlen;
984263bc 526 int atomic = sosendallatonce(so) || top;
6b6e0885 527 int pru_flags;
984263bc
MD
528
529 if (uio)
530 resid = uio->uio_resid;
531 else
e54488bb 532 resid = (size_t)top->m_pkthdr.len;
984263bc 533 /*
e54488bb
MD
534 * WARNING! resid is unsigned, space and len are signed. space
535 * can wind up negative if the sockbuf is overcommitted.
984263bc
MD
536 *
537 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
538 * type sockets since that's an error.
539 */
e54488bb 540 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
984263bc
MD
541 error = EINVAL;
542 goto out;
543 }
544
545 dontroute =
546 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
547 (so->so_proto->pr_flags & PR_ATOMIC);
fde7ac71
SS
548 if (td->td_lwp != NULL)
549 td->td_lwp->lwp_ru.ru_msgsnd++;
984263bc
MD
550 if (control)
551 clen = control->m_len;
71f385dc 552#define gotoerr(errcode) { error = errcode; crit_exit(); goto release; }
984263bc
MD
553
554restart:
6d49aa6f 555 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags));
984263bc
MD
556 if (error)
557 goto out;
558 do {
e43a034f 559 crit_enter();
984263bc 560 if (so->so_state & SS_CANTSENDMORE)
6ea1e9b9 561 gotoerr(EPIPE);
984263bc
MD
562 if (so->so_error) {
563 error = so->so_error;
564 so->so_error = 0;
e43a034f 565 crit_exit();
984263bc
MD
566 goto release;
567 }
568 if ((so->so_state & SS_ISCONNECTED) == 0) {
569 /*
570 * `sendto' and `sendmsg' is allowed on a connection-
571 * based socket if it supports implied connect.
572 * Return ENOTCONN if not connected and no address is
573 * supplied.
574 */
575 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
576 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
577 if ((so->so_state & SS_ISCONFIRMING) == 0 &&
578 !(resid == 0 && clen != 0))
6ea1e9b9 579 gotoerr(ENOTCONN);
984263bc 580 } else if (addr == 0)
6ea1e9b9 581 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
984263bc
MD
582 ENOTCONN : EDESTADDRREQ);
583 }
3a6117bb
MD
584 if ((atomic && resid > so->so_snd.ssb_hiwat) ||
585 clen > so->so_snd.ssb_hiwat) {
586 gotoerr(EMSGSIZE);
587 }
6d49aa6f 588 space = ssb_space(&so->so_snd);
984263bc
MD
589 if (flags & MSG_OOB)
590 space += 1024;
e54488bb 591 if ((space < 0 || (size_t)space < resid + clen) && uio &&
6d49aa6f 592 (atomic || space < so->so_snd.ssb_lowat || space < clen)) {
9ba76b73 593 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))
6ea1e9b9 594 gotoerr(EWOULDBLOCK);
6d49aa6f
MD
595 ssb_unlock(&so->so_snd);
596 error = ssb_wait(&so->so_snd);
e43a034f 597 crit_exit();
984263bc
MD
598 if (error)
599 goto out;
600 goto restart;
601 }
e43a034f 602 crit_exit();
984263bc
MD
603 mp = &top;
604 space -= clen;
605 do {
606 if (uio == NULL) {
607 /*
608 * Data is prepackaged in "top".
609 */
610 resid = 0;
611 if (flags & MSG_EOR)
612 top->m_flags |= M_EOR;
613 } else do {
e54488bb
MD
614 if (resid > INT_MAX)
615 resid = INT_MAX;
616 m = m_getl((int)resid, MB_WAIT, MT_DATA,
50503f0f
JH
617 top == NULL ? M_PKTHDR : 0, &mlen);
618 if (top == NULL) {
984263bc 619 m->m_pkthdr.len = 0;
60233e58 620 m->m_pkthdr.rcvif = NULL;
984263bc 621 }
e54488bb 622 len = imin((int)szmin(mlen, resid), space);
50503f0f 623 if (resid < MINCLSIZE) {
984263bc
MD
624 /*
625 * For datagram protocols, leave room
626 * for protocol headers in first mbuf.
627 */
628 if (atomic && top == 0 && len < mlen)
629 MH_ALIGN(m, len);
630 }
631 space -= len;
e54488bb 632 error = uiomove(mtod(m, caddr_t), (size_t)len, uio);
984263bc
MD
633 resid = uio->uio_resid;
634 m->m_len = len;
635 *mp = m;
636 top->m_pkthdr.len += len;
637 if (error)
638 goto release;
639 mp = &m->m_next;
e54488bb 640 if (resid == 0) {
984263bc
MD
641 if (flags & MSG_EOR)
642 top->m_flags |= M_EOR;
643 break;
644 }
645 } while (space > 0 && atomic);
646 if (dontroute)
647 so->so_options |= SO_DONTROUTE;
6b6e0885
JH
648 if (flags & MSG_OOB) {
649 pru_flags = PRUS_OOB;
650 } else if ((flags & MSG_EOF) &&
651 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
e54488bb 652 (resid == 0)) {
6b6e0885
JH
653 /*
654 * If the user set MSG_EOF, the protocol
655 * understands this flag and nothing left to
656 * send then use PRU_SEND_EOF instead of PRU_SEND.
657 */
658 pru_flags = PRUS_EOF;
659 } else if (resid > 0 && space > 0) {
660 /* If there is more to send, set PRUS_MORETOCOME */
661 pru_flags = PRUS_MORETOCOME;
662 } else {
663 pru_flags = 0;
664 }
e43a034f 665 crit_enter();
984263bc
MD
666 /*
667 * XXX all the SS_CANTSENDMORE checks previously
668 * done could be out of date. We could have recieved
669 * a reset packet in an interrupt or maybe we slept
670 * while doing page faults in uiomove() etc. We could
671 * probably recheck again inside the splnet() protection
672 * here, but there are probably other places that this
673 * also happens. We must rethink this.
674 */
6b6e0885 675 error = so_pru_send(so, pru_flags, top, addr, control, td);
e43a034f 676 crit_exit();
984263bc
MD
677 if (dontroute)
678 so->so_options &= ~SO_DONTROUTE;
679 clen = 0;
680 control = 0;
681 top = 0;
682 mp = &top;
683 if (error)
6b6e0885 684 goto release;
984263bc
MD
685 } while (resid && space > 0);
686 } while (resid);
687
688release:
6d49aa6f 689 ssb_unlock(&so->so_snd);
984263bc
MD
690out:
691 if (top)
692 m_freem(top);
693 if (control)
694 m_freem(control);
695 return (error);
696}
697
698/*
6ea1e9b9
JH
699 * A specialization of sosend() for UDP based on protocol-specific knowledge:
700 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that
701 * sosendallatonce() returns true,
702 * the "atomic" variable is true,
703 * and sosendudp() blocks until space is available for the entire send.
704 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or
705 * PR_IMPLOPCL flags set.
706 * UDP has no out-of-band data.
707 * UDP has no control data.
708 * UDP does not support MSG_EOR.
709 */
710int
711sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio,
712 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
713{
6ea1e9b9 714 boolean_t dontroute; /* temporary SO_DONTROUTE setting */
e54488bb
MD
715 size_t resid;
716 int error;
717 int space;
6ea1e9b9 718
fde7ac71
SS
719 if (td->td_lwp != NULL)
720 td->td_lwp->lwp_ru.ru_msgsnd++;
6ea1e9b9
JH
721 if (control)
722 m_freem(control);
723
724 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp"));
e54488bb 725 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len;
6ea1e9b9
JH
726
727restart:
6d49aa6f 728 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags));
6ea1e9b9
JH
729 if (error)
730 goto out;
731
e43a034f 732 crit_enter();
6ea1e9b9
JH
733 if (so->so_state & SS_CANTSENDMORE)
734 gotoerr(EPIPE);
735 if (so->so_error) {
736 error = so->so_error;
737 so->so_error = 0;
e43a034f 738 crit_exit();
6ea1e9b9
JH
739 goto release;
740 }
741 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL)
742 gotoerr(EDESTADDRREQ);
6d49aa6f 743 if (resid > so->so_snd.ssb_hiwat)
6ea1e9b9 744 gotoerr(EMSGSIZE);
e54488bb
MD
745 space = ssb_space(&so->so_snd);
746 if (uio && (space < 0 || (size_t)space < resid)) {
9ba76b73 747 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))
6ea1e9b9 748 gotoerr(EWOULDBLOCK);
6d49aa6f
MD
749 ssb_unlock(&so->so_snd);
750 error = ssb_wait(&so->so_snd);
e43a034f 751 crit_exit();
6ea1e9b9
JH
752 if (error)
753 goto out;
754 goto restart;
755 }
e43a034f 756 crit_exit();
6ea1e9b9
JH
757
758 if (uio) {
e12241e1 759 top = m_uiomove(uio);
6ea1e9b9
JH
760 if (top == NULL)
761 goto release;
762 }
763
764 dontroute = (flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE);
765 if (dontroute)
766 so->so_options |= SO_DONTROUTE;
767
768 error = so_pru_send(so, 0, top, addr, NULL, td);
769 top = NULL; /* sent or freed in lower layer */
770
771 if (dontroute)
772 so->so_options &= ~SO_DONTROUTE;
773
774release:
6d49aa6f 775 ssb_unlock(&so->so_snd);
6ea1e9b9
JH
776out:
777 if (top)
778 m_freem(top);
779 return (error);
780}
781
782/*
984263bc 783 * Implement receive operations on a socket.
6d49aa6f 784 * We depend on the way that records are added to the signalsockbuf
984263bc
MD
785 * by sbappend*. In particular, each record (mbufs linked through m_next)
786 * must begin with an address if the protocol so specifies,
787 * followed by an optional mbuf or mbufs containing ancillary data,
788 * and then zero or more mbufs of data.
789 * In order to avoid blocking network interrupts for the entire time here,
e43a034f 790 * we exit the critical section while doing the actual copy to user space.
6d49aa6f
MD
791 * Although the signalsockbuf is locked, new data may still be appended,
792 * and thus we must maintain consistency of the signalsockbuf during that time.
984263bc
MD
793 *
794 * The caller may receive the data as a single mbuf chain by supplying
795 * an mbuf **mp0 for use in returning the chain. The uio is then used
796 * only for the count in uio_resid.
797 */
798int
c972a82f 799soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
6d49aa6f 800 struct sockbuf *sio, struct mbuf **controlp, int *flagsp)
984263bc 801{
d8a9a23b 802 struct mbuf *m, *n;
857caa4a 803 struct mbuf *free_chain = NULL;
e43a034f 804 int flags, len, error, offset;
984263bc 805 struct protosw *pr = so->so_proto;
984263bc 806 int moff, type = 0;
e54488bb 807 size_t resid, orig_resid;
d8a9a23b
MD
808
809 if (uio)
810 resid = uio->uio_resid;
811 else
e54488bb 812 resid = (size_t)(sio->sb_climit - sio->sb_cc);
d8a9a23b 813 orig_resid = resid;
984263bc 814
984263bc 815 if (psa)
857caa4a 816 *psa = NULL;
984263bc 817 if (controlp)
857caa4a 818 *controlp = NULL;
984263bc
MD
819 if (flagsp)
820 flags = *flagsp &~ MSG_EOR;
821 else
822 flags = 0;
823 if (flags & MSG_OOB) {
74f1caca 824 m = m_get(MB_WAIT, MT_DATA);
984263bc
MD
825 if (m == NULL)
826 return (ENOBUFS);
6b6e0885 827 error = so_pru_rcvoob(so, m, flags & MSG_PEEK);
984263bc
MD
828 if (error)
829 goto bad;
d8a9a23b
MD
830 if (sio) {
831 do {
6d49aa6f 832 sbappend(sio, m);
e54488bb
MD
833 KKASSERT(resid >= (size_t)m->m_len);
834 resid -= (size_t)m->m_len;
d8a9a23b
MD
835 } while (resid > 0 && m);
836 } else {
837 do {
838 uio->uio_resid = resid;
839 error = uiomove(mtod(m, caddr_t),
e54488bb
MD
840 (int)szmin(resid, m->m_len),
841 uio);
d8a9a23b
MD
842 resid = uio->uio_resid;
843 m = m_free(m);
844 } while (uio->uio_resid && error == 0 && m);
845 }
984263bc
MD
846bad:
847 if (m)
848 m_freem(m);
849 return (error);
850 }
e54488bb 851 if ((so->so_state & SS_ISCONFIRMING) && resid)
6b6e0885 852 so_pru_rcvd(so, 0);
984263bc
MD
853
854restart:
857caa4a 855 crit_enter();
6d49aa6f 856 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags));
984263bc 857 if (error)
857caa4a 858 goto done;
984263bc 859
6d49aa6f 860 m = so->so_rcv.ssb_mb;
984263bc
MD
861 /*
862 * If we have less data than requested, block awaiting more
863 * (subject to any timeout) if:
864 * 1. the current count is less than the low water mark, or
865 * 2. MSG_WAITALL is set, and it is possible to do the entire
866 * receive operation at once if we block (resid <= hiwat).
867 * 3. MSG_DONTWAIT is not set
868 * If MSG_WAITALL is set but resid is larger than the receive buffer,
869 * we have to do the receive in sections, and thus risk returning
870 * a short count if a timeout or signal occurs after we start.
871 */
857caa4a 872 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
e54488bb 873 (size_t)so->so_rcv.ssb_cc < resid) &&
6d49aa6f 874 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat ||
e54488bb 875 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) &&
984263bc 876 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
6d49aa6f 877 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1"));
984263bc
MD
878 if (so->so_error) {
879 if (m)
880 goto dontblock;
881 error = so->so_error;
882 if ((flags & MSG_PEEK) == 0)
883 so->so_error = 0;
884 goto release;
885 }
886 if (so->so_state & SS_CANTRCVMORE) {
887 if (m)
888 goto dontblock;
889 else
890 goto release;
891 }
857caa4a 892 for (; m; m = m->m_next) {
984263bc 893 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
6d49aa6f 894 m = so->so_rcv.ssb_mb;
984263bc
MD
895 goto dontblock;
896 }
857caa4a 897 }
984263bc 898 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
6b6e0885 899 (pr->pr_flags & PR_CONNREQUIRED)) {
984263bc
MD
900 error = ENOTCONN;
901 goto release;
902 }
d8a9a23b 903 if (resid == 0)
984263bc 904 goto release;
9ba76b73 905 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) {
984263bc
MD
906 error = EWOULDBLOCK;
907 goto release;
908 }
6d49aa6f
MD
909 ssb_unlock(&so->so_rcv);
910 error = ssb_wait(&so->so_rcv);
984263bc 911 if (error)
857caa4a
MD
912 goto done;
913 crit_exit();
984263bc
MD
914 goto restart;
915 }
916dontblock:
d8a9a23b 917 if (uio && uio->uio_td && uio->uio_td->td_proc)
fde7ac71 918 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++;
857caa4a
MD
919
920 /*
921 * note: m should be == sb_mb here. Cache the next record while
922 * cleaning up. Note that calling m_free*() will break out critical
923 * section.
924 */
6d49aa6f 925 KKASSERT(m == so->so_rcv.ssb_mb);
857caa4a
MD
926
927 /*
928 * Skip any address mbufs prepending the record.
929 */
984263bc
MD
930 if (pr->pr_flags & PR_ADDR) {
931 KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
932 orig_resid = 0;
933 if (psa)
cfa2ba21 934 *psa = dup_sockaddr(mtod(m, struct sockaddr *));
857caa4a 935 if (flags & MSG_PEEK)
984263bc 936 m = m->m_next;
857caa4a 937 else
6d49aa6f 938 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain);
984263bc 939 }
857caa4a
MD
940
941 /*
942 * Skip any control mbufs prepending the record.
943 */
78812139
EN
944#ifdef SCTP
945 if (pr->pr_flags & PR_ADDR_OPT) {
946 /*
947 * For SCTP we may be getting a
948 * whole message OR a partial delivery.
949 */
857caa4a 950 if (m && m->m_type == MT_SONAME) {
78812139
EN
951 orig_resid = 0;
952 if (psa)
953 *psa = dup_sockaddr(mtod(m, struct sockaddr *));
857caa4a 954 if (flags & MSG_PEEK)
78812139 955 m = m->m_next;
857caa4a 956 else
6d49aa6f 957 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain);
78812139
EN
958 }
959 }
960#endif /* SCTP */
984263bc
MD
961 while (m && m->m_type == MT_CONTROL && error == 0) {
962 if (flags & MSG_PEEK) {
963 if (controlp)
964 *controlp = m_copy(m, 0, m->m_len);
857caa4a 965 m = m->m_next; /* XXX race */
984263bc 966 } else {
984263bc 967 if (controlp) {
6d49aa6f 968 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL);
984263bc
MD
969 if (pr->pr_domain->dom_externalize &&
970 mtod(m, struct cmsghdr *)->cmsg_type ==
971 SCM_RIGHTS)
972 error = (*pr->pr_domain->dom_externalize)(m);
973 *controlp = m;
857caa4a 974 m = n;
984263bc 975 } else {
6d49aa6f 976 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain);
984263bc
MD
977 }
978 }
857caa4a 979 if (controlp && *controlp) {
984263bc
MD
980 orig_resid = 0;
981 controlp = &(*controlp)->m_next;
982 }
983 }
857caa4a
MD
984
985 /*
986 * flag OOB data.
987 */
984263bc 988 if (m) {
984263bc
MD
989 type = m->m_type;
990 if (type == MT_OOBDATA)
991 flags |= MSG_OOB;
992 }
857caa4a
MD
993
994 /*
995 * Copy to the UIO or mbuf return chain (*mp).
996 */
984263bc
MD
997 moff = 0;
998 offset = 0;
d8a9a23b 999 while (m && resid > 0 && error == 0) {
984263bc
MD
1000 if (m->m_type == MT_OOBDATA) {
1001 if (type != MT_OOBDATA)
1002 break;
1003 } else if (type == MT_OOBDATA)
1004 break;
1005 else
1006 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
1007 ("receive 3"));
1008 so->so_state &= ~SS_RCVATMARK;
e54488bb 1009 len = (resid > INT_MAX) ? INT_MAX : resid;
984263bc
MD
1010 if (so->so_oobmark && len > so->so_oobmark - offset)
1011 len = so->so_oobmark - offset;
1012 if (len > m->m_len - moff)
1013 len = m->m_len - moff;
d8a9a23b 1014
984263bc 1015 /*
d8a9a23b
MD
1016 * Copy out to the UIO or pass the mbufs back to the SIO.
1017 * The SIO is dealt with when we eat the mbuf, but deal
1018 * with the resid here either way.
984263bc 1019 */
d8a9a23b 1020 if (uio) {
e43a034f 1021 crit_exit();
d8a9a23b
MD
1022 uio->uio_resid = resid;
1023 error = uiomove(mtod(m, caddr_t) + moff, len, uio);
1024 resid = uio->uio_resid;
e43a034f 1025 crit_enter();
984263bc
MD
1026 if (error)
1027 goto release;
857caa4a 1028 } else {
e54488bb 1029 resid -= (size_t)len;
857caa4a
MD
1030 }
1031
1032 /*
1033 * Eat the entire mbuf or just a piece of it
1034 */
984263bc
MD
1035 if (len == m->m_len - moff) {
1036 if (m->m_flags & M_EOR)
1037 flags |= MSG_EOR;
78812139
EN
1038#ifdef SCTP
1039 if (m->m_flags & M_NOTIFICATION)
1040 flags |= MSG_NOTIFICATION;
1041#endif /* SCTP */
984263bc
MD
1042 if (flags & MSG_PEEK) {
1043 m = m->m_next;
1044 moff = 0;
1045 } else {
d8a9a23b 1046 if (sio) {
6d49aa6f
MD
1047 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL);
1048 sbappend(sio, m);
857caa4a 1049 m = n;
984263bc 1050 } else {
6d49aa6f 1051 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain);
984263bc 1052 }
984263bc
MD
1053 }
1054 } else {
857caa4a 1055 if (flags & MSG_PEEK) {
984263bc 1056 moff += len;
857caa4a 1057 } else {
d8a9a23b 1058 if (sio) {
6d49aa6f
MD
1059 n = m_copym(m, 0, len, MB_WAIT);
1060 if (n)
1061 sbappend(sio, n);
d8a9a23b 1062 }
984263bc
MD
1063 m->m_data += len;
1064 m->m_len -= len;
6d49aa6f 1065 so->so_rcv.ssb_cc -= len;
984263bc
MD
1066 }
1067 }
1068 if (so->so_oobmark) {
1069 if ((flags & MSG_PEEK) == 0) {
1070 so->so_oobmark -= len;
1071 if (so->so_oobmark == 0) {
1072 so->so_state |= SS_RCVATMARK;
1073 break;
1074 }
1075 } else {
1076 offset += len;
1077 if (offset == so->so_oobmark)
1078 break;
1079 }
1080 }
1081 if (flags & MSG_EOR)
1082 break;
1083 /*
1084 * If the MSG_WAITALL flag is set (for non-atomic socket),
d8a9a23b 1085 * we must not quit until resid == 0 or an error
984263bc
MD
1086 * termination. If a signal/timeout occurs, return
1087 * with a short count but without error.
6d49aa6f 1088 * Keep signalsockbuf locked against other readers.
984263bc 1089 */
d8a9a23b
MD
1090 while ((flags & MSG_WAITALL) && m == NULL &&
1091 resid > 0 && !sosendallatonce(so) &&
6d49aa6f 1092 so->so_rcv.ssb_mb == NULL) {
984263bc
MD
1093 if (so->so_error || so->so_state & SS_CANTRCVMORE)
1094 break;
1095 /*
1096 * The window might have closed to zero, make
1097 * sure we send an ack now that we've drained
1098 * the buffer or we might end up blocking until
1099 * the idle takes over (5 seconds).
1100 */
1101 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
6b6e0885 1102 so_pru_rcvd(so, flags);
6d49aa6f 1103 error = ssb_wait(&so->so_rcv);
984263bc 1104 if (error) {
6d49aa6f 1105 ssb_unlock(&so->so_rcv);
857caa4a
MD
1106 error = 0;
1107 goto done;
984263bc 1108 }
6d49aa6f 1109 m = so->so_rcv.ssb_mb;
984263bc
MD
1110 }
1111 }
1112
857caa4a
MD
1113 /*
1114 * If an atomic read was requested but unread data still remains
1115 * in the record, set MSG_TRUNC.
1116 */
bf8a9a6f 1117 if (m && pr->pr_flags & PR_ATOMIC)
984263bc 1118 flags |= MSG_TRUNC;
857caa4a
MD
1119
1120 /*
1121 * Cleanup. If an atomic read was requested drop any unread data.
1122 */
1123 if ((flags & MSG_PEEK) == 0) {
1124 if (m && (pr->pr_flags & PR_ATOMIC))
6d49aa6f 1125 sbdroprecord(&so->so_rcv.sb);
857caa4a 1126 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
6b6e0885 1127 so_pru_rcvd(so, flags);
984263bc 1128 }
bf8a9a6f 1129
d8a9a23b 1130 if (orig_resid == resid && orig_resid &&
984263bc 1131 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
6d49aa6f 1132 ssb_unlock(&so->so_rcv);
e43a034f 1133 crit_exit();
984263bc
MD
1134 goto restart;
1135 }
1136
1137 if (flagsp)
1138 *flagsp |= flags;
1139release:
6d49aa6f 1140 ssb_unlock(&so->so_rcv);
857caa4a 1141done:
e43a034f 1142 crit_exit();
857caa4a
MD
1143 if (free_chain)
1144 m_freem(free_chain);
984263bc
MD
1145 return (error);
1146}
1147
1148int
c972a82f 1149soshutdown(struct socket *so, int how)
984263bc 1150{
984263bc
MD
1151 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
1152 return (EINVAL);
1153
1154 if (how != SHUT_WR)
1155 sorflush(so);
1156 if (how != SHUT_RD)
6b6e0885 1157 return (so_pru_shutdown(so));
984263bc
MD
1158 return (0);
1159}
1160
1161void
c972a82f 1162sorflush(struct socket *so)
984263bc 1163{
6d49aa6f 1164 struct signalsockbuf *ssb = &so->so_rcv;
1fd87d54 1165 struct protosw *pr = so->so_proto;
6d49aa6f 1166 struct signalsockbuf asb;
984263bc 1167
6d49aa6f
MD
1168 ssb->ssb_flags |= SSB_NOINTR;
1169 (void) ssb_lock(ssb, M_WAITOK);
e43a034f
MD
1170
1171 crit_enter();
984263bc 1172 socantrcvmore(so);
6d49aa6f
MD
1173 ssb_unlock(ssb);
1174 asb = *ssb;
1175 bzero((caddr_t)ssb, sizeof (*ssb));
1176 if (asb.ssb_flags & SSB_KNOTE) {
1177 ssb->ssb_sel.si_note = asb.ssb_sel.si_note;
1178 ssb->ssb_flags = SSB_KNOTE;
984263bc 1179 }
e43a034f
MD
1180 crit_exit();
1181
984263bc 1182 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
6d49aa6f
MD
1183 (*pr->pr_domain->dom_dispose)(asb.ssb_mb);
1184 ssb_release(&asb, so);
984263bc
MD
1185}
1186
1187#ifdef INET
1188static int
c972a82f 1189do_setopt_accept_filter(struct socket *so, struct sockopt *sopt)
984263bc
MD
1190{
1191 struct accept_filter_arg *afap = NULL;
1192 struct accept_filter *afp;
1193 struct so_accf *af = so->so_accf;
1194 int error = 0;
1195
1196 /* do not set/remove accept filters on non listen sockets */
1197 if ((so->so_options & SO_ACCEPTCONN) == 0) {
1198 error = EINVAL;
1199 goto out;
1200 }
1201
1202 /* removing the filter */
1203 if (sopt == NULL) {
1204 if (af != NULL) {
1205 if (af->so_accept_filter != NULL &&
1206 af->so_accept_filter->accf_destroy != NULL) {
1207 af->so_accept_filter->accf_destroy(so);
1208 }
1209 if (af->so_accept_filter_str != NULL) {
1210 FREE(af->so_accept_filter_str, M_ACCF);
1211 }
1212 FREE(af, M_ACCF);
1213 so->so_accf = NULL;
1214 }
1215 so->so_options &= ~SO_ACCEPTFILTER;
1216 return (0);
1217 }
1218 /* adding a filter */
1219 /* must remove previous filter first */
1220 if (af != NULL) {
1221 error = EINVAL;
1222 goto out;
1223 }
1224 /* don't put large objects on the kernel stack */
1225 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
1226 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
1227 afap->af_name[sizeof(afap->af_name)-1] = '\0';
1228 afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
1229 if (error)
1230 goto out;
1231 afp = accept_filt_get(afap->af_name);
1232 if (afp == NULL) {
1233 error = ENOENT;
1234 goto out;
1235 }
e7b4468c 1236 MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO);
984263bc
MD
1237 if (afp->accf_create != NULL) {
1238 if (afap->af_name[0] != '\0') {
1239 int len = strlen(afap->af_name) + 1;
1240
1241 MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
1242 strcpy(af->so_accept_filter_str, afap->af_name);
1243 }
1244 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
1245 if (af->so_accept_filter_arg == NULL) {
1246 FREE(af->so_accept_filter_str, M_ACCF);
1247 FREE(af, M_ACCF);
1248 so->so_accf = NULL;
1249 error = EINVAL;
1250 goto out;
1251 }
1252 }
1253 af->so_accept_filter = afp;
1254 so->so_accf = af;
1255 so->so_options |= SO_ACCEPTFILTER;
1256out:
1257 if (afap != NULL)
1258 FREE(afap, M_TEMP);
1259 return (error);
1260}
1261#endif /* INET */
1262
1263/*
1264 * Perhaps this routine, and sooptcopyout(), below, ought to come in
1265 * an additional variant to handle the case where the option value needs
1266 * to be some kind of integer, but not a specific size.
1267 * In addition to their use here, these functions are also called by the
1268 * protocol-level pr_ctloutput() routines.
1269 */
1270int
c972a82f 1271sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
984263bc 1272{
de0003fe
AE
1273 return soopt_to_kbuf(sopt, buf, len, minlen);
1274}
1275
1276int
1277soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
1278{
984263bc
MD
1279 size_t valsize;
1280
792239df 1281 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val));
de0003fe
AE
1282 KKASSERT(kva_p(buf));
1283
984263bc
MD
1284 /*
1285 * If the user gives us more than we wanted, we ignore it,
1286 * but if we don't get the minimum length the caller
1287 * wants, we return EINVAL. On success, sopt->sopt_valsize
1288 * is set to however much we actually retrieved.
1289 */
1290 if ((valsize = sopt->sopt_valsize) < minlen)
1291 return EINVAL;
1292 if (valsize > len)
1293 sopt->sopt_valsize = valsize = len;
1294
984263bc
MD
1295 bcopy(sopt->sopt_val, buf, valsize);
1296 return 0;
1297}
1298
e71a125f
AE
1299
1300int
c972a82f 1301sosetopt(struct socket *so, struct sockopt *sopt)
984263bc
MD
1302{
1303 int error, optval;
1304 struct linger l;
1305 struct timeval tv;
1306 u_long val;
1307
1308 error = 0;
e79d388f 1309 sopt->sopt_dir = SOPT_SET;
984263bc 1310 if (sopt->sopt_level != SOL_SOCKET) {
6b6e0885 1311 if (so->so_proto && so->so_proto->pr_ctloutput) {
e71a125f 1312 return (so_pru_ctloutput(so, sopt));
6b6e0885 1313 }
984263bc
MD
1314 error = ENOPROTOOPT;
1315 } else {
1316 switch (sopt->sopt_name) {
1317#ifdef INET
1318 case SO_ACCEPTFILTER:
1319 error = do_setopt_accept_filter(so, sopt);
1320 if (error)
1321 goto bad;
1322 break;
1323#endif /* INET */
1324 case SO_LINGER:
1325 error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
1326 if (error)
1327 goto bad;
1328
1329 so->so_linger = l.l_linger;
1330 if (l.l_onoff)
1331 so->so_options |= SO_LINGER;
1332 else
1333 so->so_options &= ~SO_LINGER;
1334 break;
1335
1336 case SO_DEBUG:
1337 case SO_KEEPALIVE:
1338 case SO_DONTROUTE:
1339 case SO_USELOOPBACK:
1340 case SO_BROADCAST:
1341 case SO_REUSEADDR:
1342 case SO_REUSEPORT:
1343 case SO_OOBINLINE:
1344 case SO_TIMESTAMP:
1345 error = sooptcopyin(sopt, &optval, sizeof optval,
1346 sizeof optval);
1347 if (error)
1348 goto bad;
1349 if (optval)
1350 so->so_options |= sopt->sopt_name;
1351 else
1352 so->so_options &= ~sopt->sopt_name;
1353 break;
1354
1355 case SO_SNDBUF:
1356 case SO_RCVBUF:
1357 case SO_SNDLOWAT:
1358 case SO_RCVLOWAT:
1359 error = sooptcopyin(sopt, &optval, sizeof optval,
1360 sizeof optval);
1361 if (error)
1362 goto bad;
1363
1364 /*
1365 * Values < 1 make no sense for any of these
1366 * options, so disallow them.
1367 */
1368 if (optval < 1) {
1369 error = EINVAL;
1370 goto bad;
1371 }
1372
1373 switch (sopt->sopt_name) {
1374 case SO_SNDBUF:
1375 case SO_RCVBUF:
6d49aa6f 1376 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ?
984263bc 1377 &so->so_snd : &so->so_rcv, (u_long)optval,
e4700d00
JH
1378 so,
1379 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) {
984263bc
MD
1380 error = ENOBUFS;
1381 goto bad;
1382 }
5b0b9fa5
PA
1383 (sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
1384 &so->so_rcv)->ssb_flags &= ~SSB_AUTOSIZE;
984263bc
MD
1385 break;
1386
1387 /*
1388 * Make sure the low-water is never greater than
1389 * the high-water.
1390 */
1391 case SO_SNDLOWAT:
6d49aa6f
MD
1392 so->so_snd.ssb_lowat =
1393 (optval > so->so_snd.ssb_hiwat) ?
1394 so->so_snd.ssb_hiwat : optval;
984263bc
MD
1395 break;
1396 case SO_RCVLOWAT:
6d49aa6f
MD
1397 so->so_rcv.ssb_lowat =
1398 (optval > so->so_rcv.ssb_hiwat) ?
1399 so->so_rcv.ssb_hiwat : optval;
984263bc
MD
1400 break;
1401 }
1402 break;
1403
1404 case SO_SNDTIMEO:
1405 case SO_RCVTIMEO:
1406 error = sooptcopyin(sopt, &tv, sizeof tv,
1407 sizeof tv);
1408 if (error)
1409 goto bad;
1410
1411 /* assert(hz > 0); */
1412 if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
1413 tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
1414 error = EDOM;
1415 goto bad;
1416 }
1417 /* assert(tick > 0); */
1418 /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
1419 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
1420 if (val > SHRT_MAX) {
1421 error = EDOM;
1422 goto bad;
1423 }
1424 if (val == 0 && tv.tv_usec != 0)
1425 val = 1;
1426
1427 switch (sopt->sopt_name) {
1428 case SO_SNDTIMEO:
6d49aa6f 1429 so->so_snd.ssb_timeo = val;
984263bc
MD
1430 break;
1431 case SO_RCVTIMEO:
6d49aa6f 1432 so->so_rcv.ssb_timeo = val;
984263bc
MD
1433 break;
1434 }
1435 break;
1436 default:
1437 error = ENOPROTOOPT;
1438 break;
1439 }
1440 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
e71a125f 1441 (void) so_pru_ctloutput(so, sopt);
984263bc
MD
1442 }
1443 }
1444bad:
1445 return (error);
1446}
1447
1448/* Helper routine for getsockopt */
1449int
f1f552f6 1450sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
984263bc 1451{
de0003fe
AE
1452 soopt_from_kbuf(sopt, buf, len);
1453 return 0;
1454}
1455
1456void
1457soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len)
1458{
984263bc
MD
1459 size_t valsize;
1460
792239df 1461 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val));
de0003fe 1462 KKASSERT(kva_p(buf));
984263bc
MD
1463
1464 /*
1465 * Documented get behavior is that we always return a value,
1466 * possibly truncated to fit in the user's buffer.
1467 * Traditional behavior is that we always tell the user
1468 * precisely how much we copied, rather than something useful
1469 * like the total amount we had available for her.
1470 * Note that this interface is not idempotent; the entire answer must
1471 * generated ahead of time.
1472 */
1473 valsize = min(len, sopt->sopt_valsize);
1474 sopt->sopt_valsize = valsize;
1475 if (sopt->sopt_val != 0) {
de0003fe 1476 bcopy(buf, sopt->sopt_val, valsize);
984263bc 1477 }
e71a125f
AE
1478}
1479
984263bc 1480int
c972a82f 1481sogetopt(struct socket *so, struct sockopt *sopt)
984263bc
MD
1482{
1483 int error, optval;
1484 struct linger l;
1485 struct timeval tv;
51f4ca92 1486#ifdef INET
984263bc 1487 struct accept_filter_arg *afap;
51f4ca92 1488#endif
984263bc
MD
1489
1490 error = 0;
e79d388f 1491 sopt->sopt_dir = SOPT_GET;
984263bc
MD
1492 if (sopt->sopt_level != SOL_SOCKET) {
1493 if (so->so_proto && so->so_proto->pr_ctloutput) {
e71a125f 1494 return (so_pru_ctloutput(so, sopt));
984263bc
MD
1495 } else
1496 return (ENOPROTOOPT);
1497 } else {
1498 switch (sopt->sopt_name) {
1499#ifdef INET
1500 case SO_ACCEPTFILTER:
1501 if ((so->so_options & SO_ACCEPTCONN) == 0)
1502 return (EINVAL);
1503 MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
e7b4468c 1504 M_TEMP, M_WAITOK | M_ZERO);
984263bc
MD
1505 if ((so->so_options & SO_ACCEPTFILTER) != 0) {
1506 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
1507 if (so->so_accf->so_accept_filter_str != NULL)
1508 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
1509 }
1510 error = sooptcopyout(sopt, afap, sizeof(*afap));
1511 FREE(afap, M_TEMP);
1512 break;
1513#endif /* INET */
1514
1515 case SO_LINGER:
1516 l.l_onoff = so->so_options & SO_LINGER;
1517 l.l_linger = so->so_linger;
1518 error = sooptcopyout(sopt, &l, sizeof l);
1519 break;
1520
1521 case SO_USELOOPBACK:
1522 case SO_DONTROUTE:
1523 case SO_DEBUG:
1524 case SO_KEEPALIVE:
1525 case SO_REUSEADDR:
1526 case SO_REUSEPORT:
1527 case SO_BROADCAST:
1528 case SO_OOBINLINE:
1529 case SO_TIMESTAMP:
1530 optval = so->so_options & sopt->sopt_name;
1531integer:
1532 error = sooptcopyout(sopt, &optval, sizeof optval);
1533 break;
1534
1535 case SO_TYPE:
1536 optval = so->so_type;
1537 goto integer;
1538
1539 case SO_ERROR:
1540 optval = so->so_error;
1541 so->so_error = 0;
1542 goto integer;
1543
1544 case SO_SNDBUF:
6d49aa6f 1545 optval = so->so_snd.ssb_hiwat;
984263bc
MD
1546 goto integer;
1547
1548 case SO_RCVBUF:
6d49aa6f 1549 optval = so->so_rcv.ssb_hiwat;
984263bc
MD
1550 goto integer;
1551
1552 case SO_SNDLOWAT:
6d49aa6f 1553 optval = so->so_snd.ssb_lowat;
984263bc
MD
1554 goto integer;
1555
1556 case SO_RCVLOWAT:
6d49aa6f 1557 optval = so->so_rcv.ssb_lowat;
984263bc
MD
1558 goto integer;
1559
1560 case SO_SNDTIMEO:
1561 case SO_RCVTIMEO:
1562 optval = (sopt->sopt_name == SO_SNDTIMEO ?
6d49aa6f 1563 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo);
984263bc
MD
1564
1565 tv.tv_sec = optval / hz;
1566 tv.tv_usec = (optval % hz) * tick;
1567 error = sooptcopyout(sopt, &tv, sizeof tv);
1568 break;
1569
1570 default:
1571 error = ENOPROTOOPT;
1572 break;
1573 }
1574 return (error);
1575 }
1576}
1577
1578/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
1579int
1580soopt_getm(struct sockopt *sopt, struct mbuf **mp)
1581{
1582 struct mbuf *m, *m_prev;
bf6ac9fa
JH
1583 int sopt_size = sopt->sopt_valsize, msize;
1584
1585 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA,
1586 0, &msize);
1587 if (m == NULL)
1588 return (ENOBUFS);
1589 m->m_len = min(msize, sopt_size);
984263bc
MD
1590 sopt_size -= m->m_len;
1591 *mp = m;
1592 m_prev = m;
1593
bf6ac9fa
JH
1594 while (sopt_size > 0) {
1595 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT,
1596 MT_DATA, 0, &msize);
1597 if (m == NULL) {
984263bc 1598 m_freem(*mp);
bf6ac9fa 1599 return (ENOBUFS);
984263bc 1600 }
bf6ac9fa 1601 m->m_len = min(msize, sopt_size);
984263bc
MD
1602 sopt_size -= m->m_len;
1603 m_prev->m_next = m;
1604 m_prev = m;
1605 }
bf6ac9fa 1606 return (0);
984263bc
MD
1607}
1608
1609/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
1610int
1611soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
1612{
de0003fe
AE
1613 soopt_to_mbuf(sopt, m);
1614 return 0;
1615}
1616
1617void
1618soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m)
1619{
c3e742f9
NT
1620 size_t valsize;
1621 void *val;
984263bc 1622
792239df 1623 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val));
de0003fe 1624 KKASSERT(kva_p(m));
984263bc 1625 if (sopt->sopt_val == NULL)
792239df 1626 return;
c3e742f9
NT
1627 val = sopt->sopt_val;
1628 valsize = sopt->sopt_valsize;
1629 while (m != NULL && valsize >= m->m_len) {
de0003fe 1630 bcopy(val, mtod(m, char *), m->m_len);
c3e742f9
NT
1631 valsize -= m->m_len;
1632 val = (caddr_t)val + m->m_len;
984263bc
MD
1633 m = m->m_next;
1634 }
1635 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
1636 panic("ip6_sooptmcopyin");
984263bc
MD
1637}
1638
de0003fe
AE
1639/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
1640int
1641soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
e71a125f 1642{
de0003fe 1643 return soopt_from_mbuf(sopt, m);
e71a125f
AE
1644}
1645
984263bc 1646int
de0003fe 1647soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m)
984263bc
MD
1648{
1649 struct mbuf *m0 = m;
1650 size_t valsize = 0;
c3e742f9
NT
1651 size_t maxsize;
1652 void *val;
984263bc 1653
792239df 1654 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val));
de0003fe 1655 KKASSERT(kva_p(m));
984263bc
MD
1656 if (sopt->sopt_val == NULL)
1657 return 0;
c3e742f9
NT
1658 val = sopt->sopt_val;
1659 maxsize = sopt->sopt_valsize;
1660 while (m != NULL && maxsize >= m->m_len) {
de0003fe 1661 bcopy(mtod(m, char *), val, m->m_len);
c3e742f9
NT
1662 maxsize -= m->m_len;
1663 val = (caddr_t)val + m->m_len;
984263bc
MD
1664 valsize += m->m_len;
1665 m = m->m_next;
1666 }
1667 if (m != NULL) {
1668 /* enough soopt buffer should be given from user-land */
1669 m_freem(m0);
bf6ac9fa 1670 return (EINVAL);
984263bc
MD
1671 }
1672 sopt->sopt_valsize = valsize;
1673 return 0;
1674}
1675
1676void
c972a82f 1677sohasoutofband(struct socket *so)
984263bc
MD
1678{
1679 if (so->so_sigio != NULL)
1680 pgsigio(so->so_sigio, SIGURG, 0);
6d49aa6f 1681 selwakeup(&so->so_rcv.ssb_sel);
984263bc
MD
1682}
1683
1684int
dadab5e9 1685sopoll(struct socket *so, int events, struct ucred *cred, struct thread *td)
984263bc
MD
1686{
1687 int revents = 0;
e43a034f
MD
1688
1689 crit_enter();
984263bc
MD
1690
1691 if (events & (POLLIN | POLLRDNORM))
1692 if (soreadable(so))
1693 revents |= events & (POLLIN | POLLRDNORM);
1694
d08a3c4d 1695 if (events & POLLINIGNEOF)
6d49aa6f 1696 if (so->so_rcv.ssb_cc >= so->so_rcv.ssb_lowat ||
d08a3c4d
HP
1697 !TAILQ_EMPTY(&so->so_comp) || so->so_error)
1698 revents |= POLLINIGNEOF;
1699
984263bc
MD
1700 if (events & (POLLOUT | POLLWRNORM))
1701 if (sowriteable(so))
1702 revents |= events & (POLLOUT | POLLWRNORM);
1703
1704 if (events & (POLLPRI | POLLRDBAND))
1705 if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
1706 revents |= events & (POLLPRI | POLLRDBAND);
1707
1708 if (revents == 0) {
d08a3c4d 1709 if (events &
d99a0cbe 1710 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM |
d08a3c4d 1711 POLLRDBAND)) {
6d49aa6f
MD
1712 selrecord(td, &so->so_rcv.ssb_sel);
1713 so->so_rcv.ssb_flags |= SSB_SEL;
984263bc
MD
1714 }
1715
1716 if (events & (POLLOUT | POLLWRNORM)) {
6d49aa6f
MD
1717 selrecord(td, &so->so_snd.ssb_sel);
1718 so->so_snd.ssb_flags |= SSB_SEL;
984263bc
MD
1719 }
1720 }
1721
e43a034f 1722 crit_exit();
984263bc
MD
1723 return (revents);
1724}
1725
1726int
1727sokqfilter(struct file *fp, struct knote *kn)
1728{
1729 struct socket *so = (struct socket *)kn->kn_fp->f_data;
6d49aa6f 1730 struct signalsockbuf *ssb;
984263bc
MD
1731
1732 switch (kn->kn_filter) {
1733 case EVFILT_READ:
1734 if (so->so_options & SO_ACCEPTCONN)
1735 kn->kn_fop = &solisten_filtops;
1736 else
1737 kn->kn_fop = &soread_filtops;
6d49aa6f 1738 ssb = &so->so_rcv;
984263bc
MD
1739 break;
1740 case EVFILT_WRITE:
1741 kn->kn_fop = &sowrite_filtops;
6d49aa6f 1742 ssb = &so->so_snd;
984263bc
MD
1743 break;
1744 default:
1745 return (1);
1746 }
1747
e43a034f 1748 crit_enter();
6d49aa6f
MD
1749 SLIST_INSERT_HEAD(&ssb->ssb_sel.si_note, kn, kn_selnext);
1750 ssb->ssb_flags |= SSB_KNOTE;
e43a034f 1751 crit_exit();
984263bc
MD
1752 return (0);
1753}
1754
1755static void
1756filt_sordetach(struct knote *kn)
1757{
1758 struct socket *so = (struct socket *)kn->kn_fp->f_data;
984263bc 1759
e43a034f 1760 crit_enter();
6d49aa6f
MD
1761 SLIST_REMOVE(&so->so_rcv.ssb_sel.si_note, kn, knote, kn_selnext);
1762 if (SLIST_EMPTY(&so->so_rcv.ssb_sel.si_note))
1763 so->so_rcv.ssb_flags &= ~SSB_KNOTE;
e43a034f 1764 crit_exit();
984263bc
MD
1765}
1766
1767/*ARGSUSED*/
1768static int
1769filt_soread(struct knote *kn, long hint)
1770{
1771 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1772
6d49aa6f 1773 kn->kn_data = so->so_rcv.ssb_cc;
984263bc
MD
1774 if (so->so_state & SS_CANTRCVMORE) {
1775 kn->kn_flags |= EV_EOF;
1776 kn->kn_fflags = so->so_error;
1777 return (1);
1778 }
1779 if (so->so_error) /* temporary udp error */
1780 return (1);
1781 if (kn->kn_sfflags & NOTE_LOWAT)
1782 return (kn->kn_data >= kn->kn_sdata);
6d49aa6f 1783 return (kn->kn_data >= so->so_rcv.ssb_lowat);
984263bc
MD
1784}
1785
1786static void
1787filt_sowdetach(struct knote *kn)
1788{
1789 struct socket *so = (struct socket *)kn->kn_fp->f_data;
984263bc 1790
e43a034f 1791 crit_enter();
6d49aa6f
MD
1792 SLIST_REMOVE(&so->so_snd.ssb_sel.si_note, kn, knote, kn_selnext);
1793 if (SLIST_EMPTY(&so->so_snd.ssb_sel.si_note))
1794 so->so_snd.ssb_flags &= ~SSB_KNOTE;
e43a034f 1795 crit_exit();
984263bc
MD
1796}
1797
1798/*ARGSUSED*/
1799static int
1800filt_sowrite(struct knote *kn, long hint)
1801{
1802 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1803
6d49aa6f 1804 kn->kn_data = ssb_space(&so->so_snd);
984263bc
MD
1805 if (so->so_state & SS_CANTSENDMORE) {
1806 kn->kn_flags |= EV_EOF;
1807 kn->kn_fflags = so->so_error;
1808 return (1);
1809 }
1810 if (so->so_error) /* temporary udp error */
1811 return (1);
1812 if (((so->so_state & SS_ISCONNECTED) == 0) &&
1813 (so->so_proto->pr_flags & PR_CONNREQUIRED))
1814 return (0);
1815 if (kn->kn_sfflags & NOTE_LOWAT)
1816 return (kn->kn_data >= kn->kn_sdata);
6d49aa6f 1817 return (kn->kn_data >= so->so_snd.ssb_lowat);
984263bc
MD
1818}
1819
1820/*ARGSUSED*/
1821static int
1822filt_solisten(struct knote *kn, long hint)
1823{
1824 struct socket *so = (struct socket *)kn->kn_fp->f_data;
1825
1826 kn->kn_data = so->so_qlen;
1827 return (! TAILQ_EMPTY(&so->so_comp));
1828}