Commit | Line | Data |
---|---|---|
984263bc | 1 | /* |
6ea1e9b9 | 2 | * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. |
66d6c637 | 3 | * Copyright (c) 2004 The DragonFly Project. All rights reserved. |
b272101a | 4 | * |
66d6c637 JH |
5 | * This code is derived from software contributed to The DragonFly Project |
6 | * by Jeffrey M. Hsu. | |
b272101a | 7 | * |
66d6c637 JH |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. Neither the name of The DragonFly Project nor the names of its | |
17 | * contributors may be used to endorse or promote products derived | |
18 | * from this software without specific, prior written permission. | |
b272101a | 19 | * |
66d6c637 JH |
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
24 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
25 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
30 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | */ | |
33 | ||
66d6c637 | 34 | /* |
984263bc MD |
35 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 |
36 | * The Regents of the University of California. All rights reserved. | |
37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | |
dc71b7ab | 46 | * 3. Neither the name of the University nor the names of its contributors |
984263bc MD |
47 | * may be used to endorse or promote products derived from this software |
48 | * without specific prior written permission. | |
49 | * | |
50 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
51 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
52 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
53 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
54 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
55 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
56 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
58 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
59 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
60 | * SUCH DAMAGE. | |
61 | * | |
62 | * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 | |
7405c902 | 63 | * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ |
984263bc MD |
64 | */ |
65 | ||
66 | #include "opt_inet.h" | |
67 | ||
68 | #include <sys/param.h> | |
69 | #include <sys/systm.h> | |
70 | #include <sys/fcntl.h> | |
71 | #include <sys/malloc.h> | |
72 | #include <sys/mbuf.h> | |
73 | #include <sys/domain.h> | |
74 | #include <sys/file.h> /* for struct knote */ | |
75 | #include <sys/kernel.h> | |
984263bc | 76 | #include <sys/event.h> |
984263bc MD |
77 | #include <sys/proc.h> |
78 | #include <sys/protosw.h> | |
79 | #include <sys/socket.h> | |
80 | #include <sys/socketvar.h> | |
6b6e0885 | 81 | #include <sys/socketops.h> |
984263bc MD |
82 | #include <sys/resourcevar.h> |
83 | #include <sys/signalvar.h> | |
84 | #include <sys/sysctl.h> | |
85 | #include <sys/uio.h> | |
86 | #include <sys/jail.h> | |
87 | #include <vm/vm_zone.h> | |
e71a125f | 88 | #include <vm/pmap.h> |
acd31a69 | 89 | #include <net/netmsg2.h> |
5337421c | 90 | #include <net/netisr2.h> |
984263bc | 91 | |
d6cb521d | 92 | #include <sys/socketvar2.h> |
96c6eb29 | 93 | #include <sys/spinlock2.h> |
e43a034f | 94 | |
984263bc MD |
95 | #include <machine/limits.h> |
96 | ||
4d3eb9a9 | 97 | #ifdef INET |
0df7608b SZ |
98 | extern int tcp_sosend_agglim; |
99 | extern int tcp_sosend_async; | |
ef82c254 | 100 | extern int tcp_sosend_jcluster; |
0df7608b | 101 | extern int udp_sosend_async; |
f5991d1f | 102 | extern int udp_sosend_prepend; |
c16aca65 | 103 | |
984263bc MD |
104 | static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); |
105 | #endif /* INET */ | |
106 | ||
107 | static void filt_sordetach(struct knote *kn); | |
108 | static int filt_soread(struct knote *kn, long hint); | |
109 | static void filt_sowdetach(struct knote *kn); | |
110 | static int filt_sowrite(struct knote *kn, long hint); | |
111 | static int filt_solisten(struct knote *kn, long hint); | |
112 | ||
acd31a69 SZ |
113 | static int soclose_sync(struct socket *so, int fflag); |
114 | static void soclose_fast(struct socket *so); | |
115 | ||
b272101a | 116 | static struct filterops solisten_filtops = |
8d1b9f93 | 117 | { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_solisten }; |
984263bc | 118 | static struct filterops soread_filtops = |
8d1b9f93 | 119 | { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; |
b272101a | 120 | static struct filterops sowrite_filtops = |
8d1b9f93 | 121 | { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sowdetach, filt_sowrite }; |
73c344d3 | 122 | static struct filterops soexcept_filtops = |
8d1b9f93 | 123 | { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; |
984263bc | 124 | |
69ea5b8d | 125 | MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); |
984263bc MD |
126 | MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
127 | MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); | |
128 | ||
984263bc MD |
129 | |
130 | static int somaxconn = SOMAXCONN; | |
131 | SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, | |
132 | &somaxconn, 0, "Maximum pending socket connection queue size"); | |
133 | ||
acd31a69 SZ |
134 | static int use_soclose_fast = 1; |
135 | SYSCTL_INT(_kern_ipc, OID_AUTO, soclose_fast, CTLFLAG_RW, | |
136 | &use_soclose_fast, 0, "Fast socket close"); | |
137 | ||
5e4b3994 SZ |
138 | int use_soaccept_pred_fast = 1; |
139 | SYSCTL_INT(_kern_ipc, OID_AUTO, soaccept_pred_fast, CTLFLAG_RW, | |
140 | &use_soaccept_pred_fast, 0, "Fast socket accept predication"); | |
141 | ||
f786a6ac | 142 | int use_sendfile_async = 1; |
828cf263 SZ |
143 | SYSCTL_INT(_kern_ipc, OID_AUTO, sendfile_async, CTLFLAG_RW, |
144 | &use_sendfile_async, 0, "sendfile uses asynchronized pru_send"); | |
145 | ||
e368a6e9 SZ |
146 | int use_soconnect_async = 1; |
147 | SYSCTL_INT(_kern_ipc, OID_AUTO, soconnect_async, CTLFLAG_RW, | |
148 | &use_soconnect_async, 0, "soconnect uses asynchronized pru_connect"); | |
149 | ||
c892825f SZ |
150 | static int use_socreate_fast = 1; |
151 | SYSCTL_INT(_kern_ipc, OID_AUTO, socreate_fast, CTLFLAG_RW, | |
152 | &use_socreate_fast, 0, "Fast socket creation"); | |
153 | ||
65e531c6 SZ |
154 | static int soavailconn = 32; |
155 | SYSCTL_INT(_kern_ipc, OID_AUTO, soavailconn, CTLFLAG_RW, | |
156 | &soavailconn, 0, "Maximum available socket connection queue size"); | |
157 | ||
984263bc MD |
158 | /* |
159 | * Socket operation routines. | |
160 | * These routines are called by the routines in | |
161 | * sys_socket.c or from a system process, and | |
162 | * implement the semantics of socket operations by | |
163 | * switching out to the protocol specific routines. | |
164 | */ | |
165 | ||
166 | /* | |
69ea5b8d | 167 | * Get a socket structure, and initialize it. |
984263bc MD |
168 | * Note that it would probably be better to allocate socket |
169 | * and PCB at the same time, but I'm not convinced that all | |
170 | * the protocols can be easily modified to do this. | |
171 | */ | |
172 | struct socket * | |
96c6eb29 | 173 | soalloc(int waitok, struct protosw *pr) |
984263bc | 174 | { |
c6e47da6 | 175 | globaldata_t gd = mycpu; |
984263bc | 176 | struct socket *so; |
69ea5b8d | 177 | unsigned waitmask; |
984263bc | 178 | |
69ea5b8d NT |
179 | waitmask = waitok ? M_WAITOK : M_NOWAIT; |
180 | so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); | |
984263bc MD |
181 | if (so) { |
182 | /* XXX race condition for reentrant kernel */ | |
96c6eb29 | 183 | so->so_proto = pr; |
984263bc | 184 | TAILQ_INIT(&so->so_aiojobq); |
20faa324 SZ |
185 | TAILQ_INIT(&so->so_rcv.ssb_mlist); |
186 | TAILQ_INIT(&so->so_snd.ssb_mlist); | |
a3c18566 MD |
187 | lwkt_token_init(&so->so_rcv.ssb_token, "rcvtok"); |
188 | lwkt_token_init(&so->so_snd.ssb_token, "sndtok"); | |
ba87a4ab | 189 | spin_init(&so->so_rcvd_spin, "soalloc"); |
96c6eb29 | 190 | netmsg_init(&so->so_rcvd_msg.base, so, &netisr_adone_rport, |
d6b70ca6 SZ |
191 | MSGF_DROPABLE | MSGF_PRIORITY, |
192 | so->so_proto->pr_usrreqs->pru_rcvd); | |
96c6eb29 | 193 | so->so_rcvd_msg.nm_pru_flags |= PRUR_ASYNC; |
6cef7136 MD |
194 | so->so_state = SS_NOFDREF; |
195 | so->so_refs = 1; | |
c6e47da6 | 196 | so->so_inum = gd->gd_anoninum++ * ncpus + gd->gd_cpuid + 2; |
984263bc MD |
197 | } |
198 | return so; | |
199 | } | |
200 | ||
201 | int | |
dadab5e9 MD |
202 | socreate(int dom, struct socket **aso, int type, |
203 | int proto, struct thread *td) | |
984263bc | 204 | { |
dadab5e9 MD |
205 | struct proc *p = td->td_proc; |
206 | struct protosw *prp; | |
207 | struct socket *so; | |
e4700d00 | 208 | struct pru_attach_info ai; |
7104f312 | 209 | struct prison *pr = p->p_ucred->cr_prison; |
dadab5e9 | 210 | int error; |
984263bc MD |
211 | |
212 | if (proto) | |
213 | prp = pffindproto(dom, proto, type); | |
214 | else | |
215 | prp = pffindtype(dom, type); | |
216 | ||
4090d6ff | 217 | if (prp == NULL || prp->pr_usrreqs->pru_attach == 0) |
984263bc MD |
218 | return (EPROTONOSUPPORT); |
219 | ||
25e27214 | 220 | if (pr && PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_UNIXIPROUTE) && |
984263bc MD |
221 | prp->pr_domain->dom_family != PF_LOCAL && |
222 | prp->pr_domain->dom_family != PF_INET && | |
3e4150ef | 223 | prp->pr_domain->dom_family != PF_INET6 && |
984263bc MD |
224 | prp->pr_domain->dom_family != PF_ROUTE) { |
225 | return (EPROTONOSUPPORT); | |
226 | } | |
227 | ||
228 | if (prp->pr_type != type) | |
229 | return (EPROTOTYPE); | |
96c6eb29 | 230 | so = soalloc(p != NULL, prp); |
6cef7136 | 231 | if (so == NULL) |
984263bc MD |
232 | return (ENOBUFS); |
233 | ||
6cef7136 MD |
234 | /* |
235 | * Callers of socreate() presumably will connect up a descriptor | |
236 | * and call soclose() if they cannot. This represents our so_refs | |
237 | * (which should be 1) from soalloc(). | |
238 | */ | |
239 | soclrstate(so, SS_NOFDREF); | |
240 | ||
48e7b118 MD |
241 | /* |
242 | * Set a default port for protocol processing. No action will occur | |
243 | * on the socket on this port until an inpcb is attached to it and | |
244 | * is able to match incoming packets, or until the socket becomes | |
245 | * available to userland. | |
002c1265 | 246 | * |
ee0be9ca SZ |
247 | * We normally default the socket to the protocol thread on cpu 0, |
248 | * if protocol does not provide its own method to initialize the | |
249 | * default port. | |
250 | * | |
002c1265 MD |
251 | * If PR_SYNC_PORT is set (unix domain sockets) there is no protocol |
252 | * thread and all pr_*()/pru_*() calls are executed synchronously. | |
48e7b118 | 253 | */ |
ee0be9ca | 254 | if (prp->pr_flags & PR_SYNC_PORT) |
002c1265 | 255 | so->so_port = &netisr_sync_port; |
ee0be9ca SZ |
256 | else if (prp->pr_initport != NULL) |
257 | so->so_port = prp->pr_initport(); | |
258 | else | |
ec7f7fc8 | 259 | so->so_port = netisr_cpuport(0); |
48e7b118 | 260 | |
984263bc MD |
261 | TAILQ_INIT(&so->so_incomp); |
262 | TAILQ_INIT(&so->so_comp); | |
263 | so->so_type = type; | |
e9a372eb | 264 | so->so_cred = crhold(p->p_ucred); |
e4700d00 JH |
265 | ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; |
266 | ai.p_ucred = p->p_ucred; | |
267 | ai.fd_rdir = p->p_fd->fd_rdir; | |
48e7b118 | 268 | |
5b0b9fa5 PA |
269 | /* |
270 | * Auto-sizing of socket buffers is managed by the protocols and | |
271 | * the appropriate flags must be set in the pru_attach function. | |
272 | */ | |
c892825f SZ |
273 | if (use_socreate_fast && prp->pr_usrreqs->pru_preattach) |
274 | error = so_pru_attach_fast(so, proto, &ai); | |
275 | else | |
276 | error = so_pru_attach(so, proto, &ai); | |
984263bc | 277 | if (error) { |
6cef7136 MD |
278 | sosetstate(so, SS_NOFDREF); |
279 | sofree(so); /* from soalloc */ | |
280 | return error; | |
984263bc | 281 | } |
48e7b118 | 282 | |
6cef7136 MD |
283 | /* |
284 | * NOTE: Returns referenced socket. | |
285 | */ | |
984263bc MD |
286 | *aso = so; |
287 | return (0); | |
288 | } | |
289 | ||
290 | int | |
dadab5e9 | 291 | sobind(struct socket *so, struct sockaddr *nam, struct thread *td) |
984263bc | 292 | { |
984263bc MD |
293 | int error; |
294 | ||
6b6e0885 | 295 | error = so_pru_bind(so, nam, td); |
984263bc MD |
296 | return (error); |
297 | } | |
298 | ||
6cef7136 | 299 | static void |
dadab5e9 | 300 | sodealloc(struct socket *so) |
984263bc | 301 | { |
ee39a18e | 302 | KKASSERT((so->so_state & (SS_INCOMP | SS_COMP)) == 0); |
a6974ef8 SZ |
303 | |
304 | #ifdef INVARIANTS | |
305 | if (so->so_options & SO_ACCEPTCONN) { | |
306 | KASSERT(TAILQ_EMPTY(&so->so_comp), ("so_comp is not empty")); | |
307 | KASSERT(TAILQ_EMPTY(&so->so_incomp), | |
308 | ("so_incomp is not empty")); | |
309 | } | |
310 | #endif | |
1bbcd2ba | 311 | |
6d49aa6f | 312 | if (so->so_rcv.ssb_hiwat) |
984263bc | 313 | (void)chgsbsize(so->so_cred->cr_uidinfo, |
6d49aa6f MD |
314 | &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); |
315 | if (so->so_snd.ssb_hiwat) | |
984263bc | 316 | (void)chgsbsize(so->so_cred->cr_uidinfo, |
6d49aa6f | 317 | &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); |
984263bc | 318 | #ifdef INET |
81d59d3d HP |
319 | /* remove accept filter if present */ |
320 | if (so->so_accf != NULL) | |
321 | do_setopt_accept_filter(so, NULL); | |
984263bc MD |
322 | #endif /* INET */ |
323 | crfree(so->so_cred); | |
88da6203 SZ |
324 | if (so->so_faddr != NULL) |
325 | kfree(so->so_faddr, M_SONAME); | |
69ea5b8d | 326 | kfree(so, M_SOCKET); |
984263bc MD |
327 | } |
328 | ||
329 | int | |
dadab5e9 | 330 | solisten(struct socket *so, int backlog, struct thread *td) |
984263bc | 331 | { |
6cef7136 | 332 | if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) |
78812139 | 333 | return (EINVAL); |
78812139 | 334 | |
6cef7136 | 335 | lwkt_gettoken(&so->so_rcv.ssb_token); |
984263bc MD |
336 | if (TAILQ_EMPTY(&so->so_comp)) |
337 | so->so_options |= SO_ACCEPTCONN; | |
6cef7136 | 338 | lwkt_reltoken(&so->so_rcv.ssb_token); |
984263bc MD |
339 | if (backlog < 0 || backlog > somaxconn) |
340 | backlog = somaxconn; | |
341 | so->so_qlimit = backlog; | |
60a260ad | 342 | return so_pru_listen(so, td); |
984263bc MD |
343 | } |
344 | ||
1bbcd2ba SZ |
345 | static void |
346 | soqflush(struct socket *so) | |
347 | { | |
348 | lwkt_getpooltoken(so); | |
349 | if (so->so_options & SO_ACCEPTCONN) { | |
350 | struct socket *sp; | |
351 | ||
352 | while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { | |
353 | KKASSERT((sp->so_state & (SS_INCOMP | SS_COMP)) == | |
354 | SS_INCOMP); | |
355 | TAILQ_REMOVE(&so->so_incomp, sp, so_list); | |
356 | so->so_incqlen--; | |
357 | soclrstate(sp, SS_INCOMP); | |
358 | soabort_async(sp, TRUE); | |
359 | } | |
360 | while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { | |
361 | KKASSERT((sp->so_state & (SS_INCOMP | SS_COMP)) == | |
362 | SS_COMP); | |
363 | TAILQ_REMOVE(&so->so_comp, sp, so_list); | |
364 | so->so_qlen--; | |
365 | soclrstate(sp, SS_COMP); | |
366 | soabort_async(sp, TRUE); | |
367 | } | |
368 | } | |
369 | lwkt_relpooltoken(so); | |
370 | } | |
371 | ||
4402d8a2 MD |
372 | /* |
373 | * Destroy a disconnected socket. This routine is a NOP if entities | |
374 | * still have a reference on the socket: | |
375 | * | |
376 | * so_pcb - The protocol stack still has a reference | |
377 | * SS_NOFDREF - There is no longer a file pointer reference | |
4402d8a2 | 378 | */ |
984263bc | 379 | void |
dadab5e9 | 380 | sofree(struct socket *so) |
984263bc | 381 | { |
5217bcbc MD |
382 | struct socket *head; |
383 | ||
384 | /* | |
385 | * This is a bit hackish at the moment. We need to interlock | |
386 | * any accept queue we are on before we potentially lose the | |
387 | * last reference to avoid races against a re-reference from | |
388 | * someone operating on the queue. | |
389 | */ | |
390 | while ((head = so->so_head) != NULL) { | |
391 | lwkt_getpooltoken(head); | |
392 | if (so->so_head == head) | |
393 | break; | |
394 | lwkt_relpooltoken(head); | |
395 | } | |
984263bc | 396 | |
6cef7136 MD |
397 | /* |
398 | * Arbitrage the last free. | |
399 | */ | |
400 | KKASSERT(so->so_refs > 0); | |
5217bcbc MD |
401 | if (atomic_fetchadd_int(&so->so_refs, -1) != 1) { |
402 | if (head) | |
403 | lwkt_relpooltoken(head); | |
4402d8a2 | 404 | return; |
5217bcbc | 405 | } |
6cef7136 MD |
406 | |
407 | KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); | |
e28d8186 | 408 | KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); |
6cef7136 | 409 | |
984263bc | 410 | if (head != NULL) { |
1bbcd2ba SZ |
411 | /* |
412 | * We're done, remove ourselves from the accept queue we are | |
413 | * on, if we are on one. | |
414 | */ | |
984263bc | 415 | if (so->so_state & SS_INCOMP) { |
ee39a18e SZ |
416 | KKASSERT((so->so_state & (SS_INCOMP | SS_COMP)) == |
417 | SS_INCOMP); | |
984263bc MD |
418 | TAILQ_REMOVE(&head->so_incomp, so, so_list); |
419 | head->so_incqlen--; | |
420 | } else if (so->so_state & SS_COMP) { | |
421 | /* | |
422 | * We must not decommission a socket that's | |
423 | * on the accept(2) queue. If we do, then | |
424 | * accept(2) may hang after select(2) indicated | |
425 | * that the listening socket was ready. | |
426 | */ | |
ee39a18e SZ |
427 | KKASSERT((so->so_state & (SS_INCOMP | SS_COMP)) == |
428 | SS_COMP); | |
5217bcbc | 429 | lwkt_relpooltoken(head); |
984263bc MD |
430 | return; |
431 | } else { | |
432 | panic("sofree: not queued"); | |
433 | } | |
6cef7136 | 434 | soclrstate(so, SS_INCOMP); |
984263bc | 435 | so->so_head = NULL; |
5217bcbc | 436 | lwkt_relpooltoken(head); |
1bbcd2ba SZ |
437 | } else { |
438 | /* Flush accept queues, if we are accepting. */ | |
439 | soqflush(so); | |
984263bc | 440 | } |
6d49aa6f | 441 | ssb_release(&so->so_snd, so); |
984263bc MD |
442 | sorflush(so); |
443 | sodealloc(so); | |
444 | } | |
445 | ||
446 | /* | |
447 | * Close a socket on last file table reference removal. | |
448 | * Initiate disconnect if connected. | |
449 | * Free socket when disconnect complete. | |
450 | */ | |
451 | int | |
9ba76b73 | 452 | soclose(struct socket *so, int fflag) |
984263bc | 453 | { |
acd31a69 | 454 | int error; |
984263bc | 455 | |
58c2553a | 456 | funsetown(&so->so_sigio); |
be4519a2 | 457 | sosetstate(so, SS_ISCLOSING); |
acd31a69 SZ |
458 | if (!use_soclose_fast || |
459 | (so->so_proto->pr_flags & PR_SYNC_PORT) || | |
de5833ad | 460 | ((so->so_state & SS_ISCONNECTED) && |
27d2f9e2 SZ |
461 | (so->so_options & SO_LINGER) && |
462 | so->so_linger != 0)) { | |
acd31a69 SZ |
463 | error = soclose_sync(so, fflag); |
464 | } else { | |
465 | soclose_fast(so); | |
466 | error = 0; | |
467 | } | |
468 | return error; | |
469 | } | |
470 | ||
be4519a2 | 471 | void |
acd31a69 SZ |
472 | sodiscard(struct socket *so) |
473 | { | |
acd31a69 SZ |
474 | if (so->so_state & SS_NOFDREF) |
475 | panic("soclose: NOFDREF"); | |
476 | sosetstate(so, SS_NOFDREF); /* take ref */ | |
477 | } | |
478 | ||
838d2682 SZ |
479 | /* |
480 | * Append the completed queue of head to head_inh (inherting listen socket). | |
481 | */ | |
02ad2f0b | 482 | void |
838d2682 | 483 | soinherit(struct socket *head, struct socket *head_inh) |
02ad2f0b | 484 | { |
838d2682 | 485 | boolean_t do_wakeup = FALSE; |
02ad2f0b | 486 | |
838d2682 SZ |
487 | KASSERT(head->so_options & SO_ACCEPTCONN, |
488 | ("head does not accept connection")); | |
489 | KASSERT(head_inh->so_options & SO_ACCEPTCONN, | |
490 | ("head_inh does not accept connection")); | |
02ad2f0b | 491 | |
838d2682 SZ |
492 | lwkt_getpooltoken(head); |
493 | lwkt_getpooltoken(head_inh); | |
02ad2f0b | 494 | |
838d2682 SZ |
495 | if (head->so_qlen > 0) |
496 | do_wakeup = TRUE; | |
02ad2f0b | 497 | |
838d2682 SZ |
498 | while (!TAILQ_EMPTY(&head->so_comp)) { |
499 | struct ucred *old_cr; | |
500 | struct socket *sp; | |
02ad2f0b | 501 | |
838d2682 | 502 | sp = TAILQ_FIRST(&head->so_comp); |
ee39a18e | 503 | KKASSERT((sp->so_state & (SS_INCOMP | SS_COMP)) == SS_COMP); |
02ad2f0b | 504 | |
838d2682 SZ |
505 | /* |
506 | * Remove this socket from the current listen socket | |
507 | * completed queue. | |
508 | */ | |
509 | TAILQ_REMOVE(&head->so_comp, sp, so_list); | |
510 | head->so_qlen--; | |
02ad2f0b | 511 | |
838d2682 SZ |
512 | /* Save the old ucred for later free. */ |
513 | old_cr = sp->so_cred; | |
514 | ||
515 | /* | |
516 | * Install this socket to the inheriting listen socket | |
517 | * completed queue. | |
518 | */ | |
519 | sp->so_cred = crhold(head_inh->so_cred); /* non-blocking */ | |
520 | sp->so_head = head_inh; | |
02ad2f0b | 521 | |
838d2682 SZ |
522 | TAILQ_INSERT_TAIL(&head_inh->so_comp, sp, so_list); |
523 | head_inh->so_qlen++; | |
02ad2f0b | 524 | |
838d2682 SZ |
525 | /* |
526 | * NOTE: | |
527 | * crfree() may block and release the tokens temporarily. | |
528 | * However, we are fine here, since the transition is done. | |
529 | */ | |
530 | crfree(old_cr); | |
531 | } | |
02ad2f0b | 532 | |
838d2682 SZ |
533 | lwkt_relpooltoken(head_inh); |
534 | lwkt_relpooltoken(head); | |
02ad2f0b | 535 | |
838d2682 | 536 | if (do_wakeup) { |
02ad2f0b SZ |
537 | /* |
538 | * "New" connections have arrived | |
539 | */ | |
838d2682 SZ |
540 | sorwakeup(head_inh); |
541 | wakeup(&head_inh->so_timeo); | |
02ad2f0b SZ |
542 | } |
543 | } | |
544 | ||
acd31a69 SZ |
545 | static int |
546 | soclose_sync(struct socket *so, int fflag) | |
547 | { | |
548 | int error = 0; | |
549 | ||
f5c5b724 SZ |
550 | if ((so->so_proto->pr_flags & PR_SYNC_PORT) == 0) |
551 | so_pru_sync(so); /* unpend async prus */ | |
552 | ||
19be7d32 | 553 | if (so->so_pcb == NULL) |
984263bc | 554 | goto discard; |
f5c5b724 | 555 | |
984263bc MD |
556 | if (so->so_state & SS_ISCONNECTED) { |
557 | if ((so->so_state & SS_ISDISCONNECTING) == 0) { | |
558 | error = sodisconnect(so); | |
559 | if (error) | |
560 | goto drop; | |
561 | } | |
562 | if (so->so_options & SO_LINGER) { | |
563 | if ((so->so_state & SS_ISDISCONNECTING) && | |
9ba76b73 | 564 | (fflag & FNONBLOCK)) |
984263bc MD |
565 | goto drop; |
566 | while (so->so_state & SS_ISCONNECTED) { | |
6cef7136 MD |
567 | error = tsleep(&so->so_timeo, PCATCH, |
568 | "soclos", so->so_linger * hz); | |
984263bc MD |
569 | if (error) |
570 | break; | |
571 | } | |
572 | } | |
573 | } | |
574 | drop: | |
575 | if (so->so_pcb) { | |
6b6e0885 JH |
576 | int error2; |
577 | ||
578 | error2 = so_pru_detach(so); | |
be4519a2 SZ |
579 | if (error2 == EJUSTRETURN) { |
580 | /* | |
581 | * Protocol will call sodiscard() | |
582 | * and sofree() for us. | |
583 | */ | |
584 | return error; | |
585 | } | |
984263bc MD |
586 | if (error == 0) |
587 | error = error2; | |
588 | } | |
589 | discard: | |
acd31a69 | 590 | sodiscard(so); |
acd31a69 | 591 | sofree(so); /* dispose of ref */ |
19be7d32 | 592 | |
acd31a69 SZ |
593 | return (error); |
594 | } | |
595 | ||
596 | static void | |
f5c5b724 | 597 | soclose_fast_handler(netmsg_t msg) |
acd31a69 SZ |
598 | { |
599 | struct socket *so = msg->base.nm_so; | |
600 | ||
f5c5b724 SZ |
601 | if (so->so_pcb == NULL) |
602 | goto discard; | |
603 | ||
acd31a69 SZ |
604 | if ((so->so_state & SS_ISCONNECTED) && |
605 | (so->so_state & SS_ISDISCONNECTING) == 0) | |
606 | so_pru_disconnect_direct(so); | |
607 | ||
be4519a2 SZ |
608 | if (so->so_pcb) { |
609 | int error; | |
610 | ||
611 | error = so_pru_detach_direct(so); | |
612 | if (error == EJUSTRETURN) { | |
613 | /* | |
614 | * Protocol will call sodiscard() | |
615 | * and sofree() for us. | |
616 | */ | |
617 | return; | |
618 | } | |
619 | } | |
f5c5b724 | 620 | discard: |
acd31a69 SZ |
621 | sodiscard(so); |
622 | sofree(so); | |
623 | } | |
624 | ||
625 | static void | |
f5c5b724 | 626 | soclose_fast(struct socket *so) |
acd31a69 SZ |
627 | { |
628 | struct netmsg_base *base = &so->so_clomsg; | |
629 | ||
630 | netmsg_init(base, so, &netisr_apanic_rport, 0, | |
f5c5b724 | 631 | soclose_fast_handler); |
0ebcb559 SZ |
632 | if (so->so_port == netisr_curport()) |
633 | lwkt_sendmsg_oncpu(so->so_port, &base->lmsg); | |
634 | else | |
635 | lwkt_sendmsg(so->so_port, &base->lmsg); | |
acd31a69 SZ |
636 | } |
637 | ||
984263bc | 638 | /* |
9116be8e MD |
639 | * Abort and destroy a socket. Only one abort can be in progress |
640 | * at any given moment. | |
984263bc | 641 | */ |
4402d8a2 | 642 | void |
3735a885 | 643 | soabort_async(struct socket *so, boolean_t clr_head) |
4402d8a2 | 644 | { |
3735a885 SZ |
645 | /* |
646 | * Keep a reference before clearing the so_head | |
647 | * to avoid racing socket close in netisr. | |
648 | */ | |
6cef7136 | 649 | soreference(so); |
3735a885 SZ |
650 | if (clr_head) |
651 | so->so_head = NULL; | |
fd27efb4 | 652 | so_pru_abort_async(so); |
984263bc MD |
653 | } |
654 | ||
fd86a41c | 655 | void |
94aba184 | 656 | soabort_direct(struct socket *so) |
fd86a41c | 657 | { |
6cef7136 | 658 | soreference(so); |
2deaa561 | 659 | so_pru_abort_direct(so); |
fd86a41c SZ |
660 | } |
661 | ||
c19fdb0e MD |
662 | /* |
663 | * so is passed in ref'd, which becomes owned by | |
664 | * the cleared SS_NOFDREF flag. | |
665 | */ | |
88da6203 SZ |
666 | void |
667 | soaccept_generic(struct socket *so) | |
668 | { | |
669 | if ((so->so_state & SS_NOFDREF) == 0) | |
670 | panic("soaccept: !NOFDREF"); | |
671 | soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ | |
672 | } | |
673 | ||
984263bc | 674 | int |
dadab5e9 | 675 | soaccept(struct socket *so, struct sockaddr **nam) |
984263bc | 676 | { |
984263bc MD |
677 | int error; |
678 | ||
88da6203 | 679 | soaccept_generic(so); |
f328adf5 | 680 | error = so_pru_accept(so, nam); |
984263bc MD |
681 | return (error); |
682 | } | |
683 | ||
684 | int | |
e368a6e9 SZ |
685 | soconnect(struct socket *so, struct sockaddr *nam, struct thread *td, |
686 | boolean_t sync) | |
984263bc | 687 | { |
984263bc MD |
688 | int error; |
689 | ||
690 | if (so->so_options & SO_ACCEPTCONN) | |
691 | return (EOPNOTSUPP); | |
984263bc MD |
692 | /* |
693 | * If protocol is connection-based, can only connect once. | |
694 | * Otherwise, if connected, try to disconnect first. | |
695 | * This allows user to disconnect by connecting to, e.g., | |
696 | * a null address. | |
697 | */ | |
698 | if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && | |
699 | ((so->so_proto->pr_flags & PR_CONNREQUIRED) || | |
59429d28 | 700 | (error = sodisconnect(so)))) { |
984263bc | 701 | error = EISCONN; |
59429d28 MD |
702 | } else { |
703 | /* | |
704 | * Prevent accumulated error from previous connection | |
705 | * from biting us. | |
706 | */ | |
707 | so->so_error = 0; | |
e368a6e9 SZ |
708 | if (!sync && so->so_proto->pr_usrreqs->pru_preconnect) |
709 | error = so_pru_connect_async(so, nam, td); | |
710 | else | |
711 | error = so_pru_connect(so, nam, td); | |
59429d28 | 712 | } |
984263bc MD |
713 | return (error); |
714 | } | |
715 | ||
716 | int | |
22db3608 | 717 | soconnect2(struct socket *so1, struct socket *so2, struct ucred *cred) |
984263bc | 718 | { |
984263bc MD |
719 | int error; |
720 | ||
22db3608 | 721 | error = so_pru_connect2(so1, so2, cred); |
984263bc MD |
722 | return (error); |
723 | } | |
724 | ||
725 | int | |
dadab5e9 | 726 | sodisconnect(struct socket *so) |
984263bc | 727 | { |
984263bc MD |
728 | int error; |
729 | ||
730 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
731 | error = ENOTCONN; | |
732 | goto bad; | |
733 | } | |
734 | if (so->so_state & SS_ISDISCONNECTING) { | |
735 | error = EALREADY; | |
736 | goto bad; | |
737 | } | |
6b6e0885 | 738 | error = so_pru_disconnect(so); |
984263bc | 739 | bad: |
984263bc MD |
740 | return (error); |
741 | } | |
742 | ||
743 | #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) | |
744 | /* | |
745 | * Send on a socket. | |
746 | * If send must go all at once and message is larger than | |
747 | * send buffering, then hard error. | |
748 | * Lock against other senders. | |
749 | * If must go all at once and not enough room now, then | |
750 | * inform user that this would block and do nothing. | |
751 | * Otherwise, if nonblocking, send as much as possible. | |
752 | * The data to be sent is described by "uio" if nonzero, | |
753 | * otherwise by the mbuf chain "top" (which must be null | |
754 | * if uio is not). Data provided in mbuf chain must be small | |
755 | * enough to send all at once. | |
756 | * | |
757 | * Returns nonzero on error, timeout or signal; callers | |
758 | * must check for short counts if EINTR/ERESTART are returned. | |
759 | * Data and control buffers are freed on return. | |
760 | */ | |
761 | int | |
dadab5e9 MD |
762 | sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, |
763 | struct mbuf *top, struct mbuf *control, int flags, | |
764 | struct thread *td) | |
984263bc MD |
765 | { |
766 | struct mbuf **mp; | |
dadab5e9 | 767 | struct mbuf *m; |
e54488bb MD |
768 | size_t resid; |
769 | int space, len; | |
e43a034f | 770 | int clen = 0, error, dontroute, mlen; |
984263bc | 771 | int atomic = sosendallatonce(so) || top; |
6b6e0885 | 772 | int pru_flags; |
984263bc | 773 | |
5bd48c1d | 774 | if (uio) { |
984263bc | 775 | resid = uio->uio_resid; |
5bd48c1d | 776 | } else { |
e54488bb | 777 | resid = (size_t)top->m_pkthdr.len; |
5bd48c1d MD |
778 | #ifdef INVARIANTS |
779 | len = 0; | |
780 | for (m = top; m; m = m->m_next) | |
781 | len += m->m_len; | |
782 | KKASSERT(top->m_pkthdr.len == len); | |
783 | #endif | |
784 | } | |
48e7b118 | 785 | |
984263bc | 786 | /* |
e54488bb MD |
787 | * WARNING! resid is unsigned, space and len are signed. space |
788 | * can wind up negative if the sockbuf is overcommitted. | |
984263bc MD |
789 | * |
790 | * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM | |
791 | * type sockets since that's an error. | |
792 | */ | |
e54488bb | 793 | if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { |
984263bc MD |
794 | error = EINVAL; |
795 | goto out; | |
796 | } | |
797 | ||
798 | dontroute = | |
799 | (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && | |
800 | (so->so_proto->pr_flags & PR_ATOMIC); | |
fde7ac71 SS |
801 | if (td->td_lwp != NULL) |
802 | td->td_lwp->lwp_ru.ru_msgsnd++; | |
984263bc MD |
803 | if (control) |
804 | clen = control->m_len; | |
6cef7136 | 805 | #define gotoerr(errcode) { error = errcode; goto release; } |
984263bc MD |
806 | |
807 | restart: | |
6d49aa6f | 808 | error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); |
984263bc MD |
809 | if (error) |
810 | goto out; | |
48e7b118 | 811 | |
984263bc | 812 | do { |
984263bc | 813 | if (so->so_state & SS_CANTSENDMORE) |
6ea1e9b9 | 814 | gotoerr(EPIPE); |
984263bc MD |
815 | if (so->so_error) { |
816 | error = so->so_error; | |
817 | so->so_error = 0; | |
984263bc MD |
818 | goto release; |
819 | } | |
820 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
821 | /* | |
822 | * `sendto' and `sendmsg' is allowed on a connection- | |
823 | * based socket if it supports implied connect. | |
824 | * Return ENOTCONN if not connected and no address is | |
825 | * supplied. | |
826 | */ | |
827 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && | |
828 | (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { | |
829 | if ((so->so_state & SS_ISCONFIRMING) == 0 && | |
830 | !(resid == 0 && clen != 0)) | |
6ea1e9b9 | 831 | gotoerr(ENOTCONN); |
4090d6ff | 832 | } else if (addr == NULL) |
6ea1e9b9 | 833 | gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? |
984263bc MD |
834 | ENOTCONN : EDESTADDRREQ); |
835 | } | |
3a6117bb MD |
836 | if ((atomic && resid > so->so_snd.ssb_hiwat) || |
837 | clen > so->so_snd.ssb_hiwat) { | |
838 | gotoerr(EMSGSIZE); | |
839 | } | |
6d49aa6f | 840 | space = ssb_space(&so->so_snd); |
984263bc MD |
841 | if (flags & MSG_OOB) |
842 | space += 1024; | |
e54488bb | 843 | if ((space < 0 || (size_t)space < resid + clen) && uio && |
6d49aa6f | 844 | (atomic || space < so->so_snd.ssb_lowat || space < clen)) { |
9ba76b73 | 845 | if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) |
6ea1e9b9 | 846 | gotoerr(EWOULDBLOCK); |
6d49aa6f MD |
847 | ssb_unlock(&so->so_snd); |
848 | error = ssb_wait(&so->so_snd); | |
984263bc MD |
849 | if (error) |
850 | goto out; | |
851 | goto restart; | |
852 | } | |
984263bc MD |
853 | mp = ⊤ |
854 | space -= clen; | |
855 | do { | |
856 | if (uio == NULL) { | |
857 | /* | |
858 | * Data is prepackaged in "top". | |
859 | */ | |
860 | resid = 0; | |
861 | if (flags & MSG_EOR) | |
862 | top->m_flags |= M_EOR; | |
863 | } else do { | |
e54488bb MD |
864 | if (resid > INT_MAX) |
865 | resid = INT_MAX; | |
b5523eac | 866 | m = m_getl((int)resid, M_WAITOK, MT_DATA, |
50503f0f JH |
867 | top == NULL ? M_PKTHDR : 0, &mlen); |
868 | if (top == NULL) { | |
984263bc | 869 | m->m_pkthdr.len = 0; |
60233e58 | 870 | m->m_pkthdr.rcvif = NULL; |
984263bc | 871 | } |
e54488bb | 872 | len = imin((int)szmin(mlen, resid), space); |
50503f0f | 873 | if (resid < MINCLSIZE) { |
984263bc MD |
874 | /* |
875 | * For datagram protocols, leave room | |
876 | * for protocol headers in first mbuf. | |
877 | */ | |
4090d6ff | 878 | if (atomic && top == NULL && len < mlen) |
984263bc MD |
879 | MH_ALIGN(m, len); |
880 | } | |
881 | space -= len; | |
e54488bb | 882 | error = uiomove(mtod(m, caddr_t), (size_t)len, uio); |
984263bc MD |
883 | resid = uio->uio_resid; |
884 | m->m_len = len; | |
885 | *mp = m; | |
886 | top->m_pkthdr.len += len; | |
887 | if (error) | |
888 | goto release; | |
889 | mp = &m->m_next; | |
e54488bb | 890 | if (resid == 0) { |
984263bc MD |
891 | if (flags & MSG_EOR) |
892 | top->m_flags |= M_EOR; | |
893 | break; | |
894 | } | |
895 | } while (space > 0 && atomic); | |
896 | if (dontroute) | |
897 | so->so_options |= SO_DONTROUTE; | |
6b6e0885 JH |
898 | if (flags & MSG_OOB) { |
899 | pru_flags = PRUS_OOB; | |
900 | } else if ((flags & MSG_EOF) && | |
901 | (so->so_proto->pr_flags & PR_IMPLOPCL) && | |
e54488bb | 902 | (resid == 0)) { |
6b6e0885 JH |
903 | /* |
904 | * If the user set MSG_EOF, the protocol | |
905 | * understands this flag and nothing left to | |
906 | * send then use PRU_SEND_EOF instead of PRU_SEND. | |
907 | */ | |
908 | pru_flags = PRUS_EOF; | |
909 | } else if (resid > 0 && space > 0) { | |
910 | /* If there is more to send, set PRUS_MORETOCOME */ | |
911 | pru_flags = PRUS_MORETOCOME; | |
912 | } else { | |
913 | pru_flags = 0; | |
914 | } | |
984263bc MD |
915 | /* |
916 | * XXX all the SS_CANTSENDMORE checks previously | |
917 | * done could be out of date. We could have recieved | |
918 | * a reset packet in an interrupt or maybe we slept | |
919 | * while doing page faults in uiomove() etc. We could | |
920 | * probably recheck again inside the splnet() protection | |
921 | * here, but there are probably other places that this | |
922 | * also happens. We must rethink this. | |
923 | */ | |
6b6e0885 | 924 | error = so_pru_send(so, pru_flags, top, addr, control, td); |
984263bc MD |
925 | if (dontroute) |
926 | so->so_options &= ~SO_DONTROUTE; | |
927 | clen = 0; | |
4090d6ff | 928 | control = NULL; |
e28d8186 | 929 | top = NULL; |
984263bc MD |
930 | mp = ⊤ |
931 | if (error) | |
6b6e0885 | 932 | goto release; |
984263bc MD |
933 | } while (resid && space > 0); |
934 | } while (resid); | |
935 | ||
936 | release: | |
6d49aa6f | 937 | ssb_unlock(&so->so_snd); |
984263bc MD |
938 | out: |
939 | if (top) | |
940 | m_freem(top); | |
941 | if (control) | |
942 | m_freem(control); | |
943 | return (error); | |
944 | } | |
945 | ||
a6ae3d3b | 946 | #ifdef INET |
6ea1e9b9 JH |
947 | /* |
948 | * A specialization of sosend() for UDP based on protocol-specific knowledge: | |
949 | * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that | |
950 | * sosendallatonce() returns true, | |
951 | * the "atomic" variable is true, | |
952 | * and sosendudp() blocks until space is available for the entire send. | |
953 | * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or | |
954 | * PR_IMPLOPCL flags set. | |
955 | * UDP has no out-of-band data. | |
956 | * UDP has no control data. | |
957 | * UDP does not support MSG_EOR. | |
958 | */ | |
959 | int | |
960 | sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, | |
961 | struct mbuf *top, struct mbuf *control, int flags, struct thread *td) | |
962 | { | |
e54488bb | 963 | size_t resid; |
47d61296 | 964 | int error, pru_flags = 0; |
e54488bb | 965 | int space; |
6ea1e9b9 | 966 | |
fde7ac71 SS |
967 | if (td->td_lwp != NULL) |
968 | td->td_lwp->lwp_ru.ru_msgsnd++; | |
6ea1e9b9 JH |
969 | |
970 | KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); | |
e54488bb | 971 | resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; |
6ea1e9b9 JH |
972 | |
973 | restart: | |
6d49aa6f | 974 | error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); |
6ea1e9b9 JH |
975 | if (error) |
976 | goto out; | |
977 | ||
6ea1e9b9 JH |
978 | if (so->so_state & SS_CANTSENDMORE) |
979 | gotoerr(EPIPE); | |
980 | if (so->so_error) { | |
981 | error = so->so_error; | |
982 | so->so_error = 0; | |
6ea1e9b9 JH |
983 | goto release; |
984 | } | |
985 | if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) | |
986 | gotoerr(EDESTADDRREQ); | |
6d49aa6f | 987 | if (resid > so->so_snd.ssb_hiwat) |
6ea1e9b9 | 988 | gotoerr(EMSGSIZE); |
e54488bb MD |
989 | space = ssb_space(&so->so_snd); |
990 | if (uio && (space < 0 || (size_t)space < resid)) { | |
9ba76b73 | 991 | if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) |
6ea1e9b9 | 992 | gotoerr(EWOULDBLOCK); |
6d49aa6f MD |
993 | ssb_unlock(&so->so_snd); |
994 | error = ssb_wait(&so->so_snd); | |
6ea1e9b9 JH |
995 | if (error) |
996 | goto out; | |
997 | goto restart; | |
998 | } | |
6ea1e9b9 JH |
999 | |
1000 | if (uio) { | |
f5991d1f SZ |
1001 | int hdrlen = max_hdr; |
1002 | ||
1003 | /* | |
1004 | * We try to optimize out the additional mbuf | |
1005 | * allocations in M_PREPEND() on output path, e.g. | |
1006 | * - udp_output(), when it tries to prepend protocol | |
1007 | * headers. | |
1008 | * - Link layer output function, when it tries to | |
1009 | * prepend link layer header. | |
1010 | * | |
1011 | * This probably will not benefit any data that will | |
1012 | * be fragmented, so this optimization is only performed | |
1013 | * when the size of data and max size of protocol+link | |
1014 | * headers fit into one mbuf cluster. | |
1015 | */ | |
1016 | if (uio->uio_resid > MCLBYTES - hdrlen || | |
1017 | !udp_sosend_prepend) { | |
1018 | top = m_uiomove(uio); | |
1019 | if (top == NULL) | |
1020 | goto release; | |
1021 | } else { | |
1022 | int nsize; | |
1023 | ||
b5523eac | 1024 | top = m_getl(uio->uio_resid + hdrlen, M_WAITOK, |
f5991d1f SZ |
1025 | MT_DATA, M_PKTHDR, &nsize); |
1026 | KASSERT(nsize >= uio->uio_resid + hdrlen, | |
1027 | ("sosendudp invalid nsize %d, " | |
1028 | "resid %zu, hdrlen %d", | |
1029 | nsize, uio->uio_resid, hdrlen)); | |
1030 | ||
1031 | top->m_len = uio->uio_resid; | |
1032 | top->m_pkthdr.len = uio->uio_resid; | |
1033 | top->m_data += hdrlen; | |
1034 | ||
1035 | error = uiomove(mtod(top, caddr_t), top->m_len, uio); | |
1036 | if (error) | |
1037 | goto out; | |
1038 | } | |
6ea1e9b9 JH |
1039 | } |
1040 | ||
47d61296 SZ |
1041 | if (flags & MSG_DONTROUTE) |
1042 | pru_flags |= PRUS_DONTROUTE; | |
6ea1e9b9 | 1043 | |
0df7608b | 1044 | if (udp_sosend_async && (flags & MSG_SYNC) == 0) { |
14886aca | 1045 | so_pru_send_async(so, pru_flags, top, addr, control, td); |
6ce03112 SZ |
1046 | error = 0; |
1047 | } else { | |
14886aca | 1048 | error = so_pru_send(so, pru_flags, top, addr, control, td); |
6ce03112 | 1049 | } |
14886aca SZ |
1050 | |
1051 | /* sent or freed in lower layer */ | |
1052 | control = NULL; | |
1053 | top = NULL; | |
6ea1e9b9 | 1054 | |
6ea1e9b9 | 1055 | release: |
6d49aa6f | 1056 | ssb_unlock(&so->so_snd); |
6ea1e9b9 JH |
1057 | out: |
1058 | if (top) | |
1059 | m_freem(top); | |
14886aca SZ |
1060 | if (control) |
1061 | m_freem(control); | |
6ea1e9b9 JH |
1062 | return (error); |
1063 | } | |
1064 | ||
5bc42dd1 SZ |
1065 | int |
1066 | sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio, | |
1067 | struct mbuf *top, struct mbuf *control, int flags, | |
1068 | struct thread *td) | |
1069 | { | |
1070 | struct mbuf **mp; | |
1071 | struct mbuf *m; | |
1072 | size_t resid; | |
1073 | int space, len; | |
1074 | int error, mlen; | |
1075 | int allatonce; | |
1076 | int pru_flags; | |
1077 | ||
1078 | if (uio) { | |
1079 | KKASSERT(top == NULL); | |
1080 | allatonce = 0; | |
1081 | resid = uio->uio_resid; | |
1082 | } else { | |
1083 | allatonce = 1; | |
1084 | resid = (size_t)top->m_pkthdr.len; | |
1085 | #ifdef INVARIANTS | |
1086 | len = 0; | |
1087 | for (m = top; m; m = m->m_next) | |
1088 | len += m->m_len; | |
1089 | KKASSERT(top->m_pkthdr.len == len); | |
1090 | #endif | |
1091 | } | |
1092 | ||
1093 | /* | |
1094 | * WARNING! resid is unsigned, space and len are signed. space | |
1095 | * can wind up negative if the sockbuf is overcommitted. | |
1096 | * | |
1097 | * Also check to make sure that MSG_EOR isn't used on TCP | |
1098 | */ | |
1099 | if (flags & MSG_EOR) { | |
1100 | error = EINVAL; | |
1101 | goto out; | |
1102 | } | |
1103 | ||
1104 | if (control) { | |
1105 | /* TCP doesn't do control messages (rights, creds, etc) */ | |
1106 | if (control->m_len) { | |
1107 | error = EINVAL; | |
1108 | goto out; | |
1109 | } | |
1110 | m_freem(control); /* empty control, just free it */ | |
1111 | control = NULL; | |
1112 | } | |
1113 | ||
1114 | if (td->td_lwp != NULL) | |
1115 | td->td_lwp->lwp_ru.ru_msgsnd++; | |
1116 | ||
1117 | #define gotoerr(errcode) { error = errcode; goto release; } | |
1118 | ||
1119 | restart: | |
1120 | error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); | |
1121 | if (error) | |
1122 | goto out; | |
1123 | ||
1124 | do { | |
1125 | if (so->so_state & SS_CANTSENDMORE) | |
1126 | gotoerr(EPIPE); | |
1127 | if (so->so_error) { | |
1128 | error = so->so_error; | |
1129 | so->so_error = 0; | |
1130 | goto release; | |
1131 | } | |
1132 | if ((so->so_state & SS_ISCONNECTED) == 0 && | |
1133 | (so->so_state & SS_ISCONFIRMING) == 0) | |
1134 | gotoerr(ENOTCONN); | |
1135 | if (allatonce && resid > so->so_snd.ssb_hiwat) | |
1136 | gotoerr(EMSGSIZE); | |
1137 | ||
f786a6ac | 1138 | space = ssb_space_prealloc(&so->so_snd); |
5bc42dd1 SZ |
1139 | if (flags & MSG_OOB) |
1140 | space += 1024; | |
1141 | if ((space < 0 || (size_t)space < resid) && !allatonce && | |
1142 | space < so->so_snd.ssb_lowat) { | |
1143 | if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) | |
1144 | gotoerr(EWOULDBLOCK); | |
1145 | ssb_unlock(&so->so_snd); | |
1146 | error = ssb_wait(&so->so_snd); | |
1147 | if (error) | |
1148 | goto out; | |
1149 | goto restart; | |
1150 | } | |
1151 | mp = ⊤ | |
1152 | do { | |
f2a3782e | 1153 | int cnt = 0, async = 0; |
c16aca65 | 1154 | |
5bc42dd1 SZ |
1155 | if (uio == NULL) { |
1156 | /* | |
1157 | * Data is prepackaged in "top". | |
1158 | */ | |
1159 | resid = 0; | |
1160 | } else do { | |
1161 | if (resid > INT_MAX) | |
1162 | resid = INT_MAX; | |
ef82c254 | 1163 | if (tcp_sosend_jcluster) { |
b5523eac | 1164 | m = m_getlj((int)resid, M_WAITOK, MT_DATA, |
ef82c254 SZ |
1165 | top == NULL ? M_PKTHDR : 0, &mlen); |
1166 | } else { | |
b5523eac | 1167 | m = m_getl((int)resid, M_WAITOK, MT_DATA, |
ef82c254 SZ |
1168 | top == NULL ? M_PKTHDR : 0, &mlen); |
1169 | } | |
5bc42dd1 SZ |
1170 | if (top == NULL) { |
1171 | m->m_pkthdr.len = 0; | |
1172 | m->m_pkthdr.rcvif = NULL; | |
1173 | } | |
1174 | len = imin((int)szmin(mlen, resid), space); | |
1175 | space -= len; | |
1176 | error = uiomove(mtod(m, caddr_t), (size_t)len, uio); | |
1177 | resid = uio->uio_resid; | |
1178 | m->m_len = len; | |
1179 | *mp = m; | |
1180 | top->m_pkthdr.len += len; | |
1181 | if (error) | |
1182 | goto release; | |
1183 | mp = &m->m_next; | |
1184 | if (resid == 0) | |
1185 | break; | |
c16aca65 | 1186 | ++cnt; |
0df7608b | 1187 | } while (space > 0 && cnt < tcp_sosend_agglim); |
5bc42dd1 | 1188 | |
0df7608b | 1189 | if (tcp_sosend_async) |
6d618102 SZ |
1190 | async = 1; |
1191 | ||
5bc42dd1 SZ |
1192 | if (flags & MSG_OOB) { |
1193 | pru_flags = PRUS_OOB; | |
6d618102 SZ |
1194 | async = 0; |
1195 | } else if ((flags & MSG_EOF) && resid == 0) { | |
1196 | pru_flags = PRUS_EOF; | |
5bc42dd1 SZ |
1197 | } else if (resid > 0 && space > 0) { |
1198 | /* If there is more to send, set PRUS_MORETOCOME */ | |
1199 | pru_flags = PRUS_MORETOCOME; | |
f2a3782e | 1200 | async = 1; |
5bc42dd1 SZ |
1201 | } else { |
1202 | pru_flags = 0; | |
1203 | } | |
1204 | ||
1af30d61 | 1205 | if (flags & MSG_SYNC) |
6d618102 | 1206 | async = 0; |
1af30d61 | 1207 | |
5bc42dd1 SZ |
1208 | /* |
1209 | * XXX all the SS_CANTSENDMORE checks previously | |
1210 | * done could be out of date. We could have recieved | |
1211 | * a reset packet in an interrupt or maybe we slept | |
1212 | * while doing page faults in uiomove() etc. We could | |
1213 | * probably recheck again inside the splnet() protection | |
1214 | * here, but there are probably other places that this | |
1215 | * also happens. We must rethink this. | |
1216 | */ | |
f786a6ac SZ |
1217 | for (m = top; m; m = m->m_next) |
1218 | ssb_preallocstream(&so->so_snd, m); | |
f2a3782e | 1219 | if (!async) { |
0ad8e15e SZ |
1220 | error = so_pru_send(so, pru_flags, top, |
1221 | NULL, NULL, td); | |
1222 | } else { | |
1223 | so_pru_send_async(so, pru_flags, top, | |
1224 | NULL, NULL, td); | |
1225 | error = 0; | |
1226 | } | |
5bc42dd1 SZ |
1227 | |
1228 | top = NULL; | |
1229 | mp = ⊤ | |
1230 | if (error) | |
1231 | goto release; | |
1232 | } while (resid && space > 0); | |
1233 | } while (resid); | |
1234 | ||
1235 | release: | |
1236 | ssb_unlock(&so->so_snd); | |
1237 | out: | |
1238 | if (top) | |
1239 | m_freem(top); | |
1240 | if (control) | |
1241 | m_freem(control); | |
1242 | return (error); | |
1243 | } | |
a6ae3d3b | 1244 | #endif |
5bc42dd1 | 1245 | |
984263bc MD |
1246 | /* |
1247 | * Implement receive operations on a socket. | |
6cef7136 | 1248 | * |
6d49aa6f | 1249 | * We depend on the way that records are added to the signalsockbuf |
984263bc MD |
1250 | * by sbappend*. In particular, each record (mbufs linked through m_next) |
1251 | * must begin with an address if the protocol so specifies, | |
1252 | * followed by an optional mbuf or mbufs containing ancillary data, | |
1253 | * and then zero or more mbufs of data. | |
6cef7136 MD |
1254 | * |
1255 | * Although the signalsockbuf is locked, new data may still be appended. | |
1256 | * A token inside the ssb_lock deals with MP issues and still allows | |
1257 | * the network to access the socket if we block in a uio. | |
984263bc MD |
1258 | * |
1259 | * The caller may receive the data as a single mbuf chain by supplying | |
387ce9a6 | 1260 | * sio for use in returning the chain. |
984263bc MD |
1261 | */ |
1262 | int | |
c972a82f | 1263 | soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, |
6d49aa6f | 1264 | struct sockbuf *sio, struct mbuf **controlp, int *flagsp) |
984263bc | 1265 | { |
d8a9a23b | 1266 | struct mbuf *m, *n; |
857caa4a | 1267 | struct mbuf *free_chain = NULL; |
e43a034f | 1268 | int flags, len, error, offset; |
984263bc | 1269 | struct protosw *pr = so->so_proto; |
984263bc | 1270 | int moff, type = 0; |
e54488bb | 1271 | size_t resid, orig_resid; |
e62cfe62 | 1272 | boolean_t free_rights = FALSE; |
d8a9a23b MD |
1273 | |
1274 | if (uio) | |
1275 | resid = uio->uio_resid; | |
1276 | else | |
e54488bb | 1277 | resid = (size_t)(sio->sb_climit - sio->sb_cc); |
d8a9a23b | 1278 | orig_resid = resid; |
984263bc | 1279 | |
9510c423 SZ |
1280 | if (psa) |
1281 | *psa = NULL; | |
1282 | if (controlp) | |
1283 | *controlp = NULL; | |
1284 | if (flagsp) | |
1285 | flags = *flagsp &~ MSG_EOR; | |
1286 | else | |
1287 | flags = 0; | |
1288 | if (flags & MSG_OOB) { | |
b5523eac | 1289 | m = m_get(M_WAITOK, MT_DATA); |
9510c423 SZ |
1290 | error = so_pru_rcvoob(so, m, flags & MSG_PEEK); |
1291 | if (error) | |
1292 | goto bad; | |
1293 | if (sio) { | |
1294 | do { | |
1295 | sbappend(sio, m); | |
1296 | KKASSERT(resid >= (size_t)m->m_len); | |
1297 | resid -= (size_t)m->m_len; | |
387ce9a6 | 1298 | m = m_free(m); |
9510c423 SZ |
1299 | } while (resid > 0 && m); |
1300 | } else { | |
1301 | do { | |
1302 | uio->uio_resid = resid; | |
1303 | error = uiomove(mtod(m, caddr_t), | |
1304 | (int)szmin(resid, m->m_len), | |
1305 | uio); | |
1306 | resid = uio->uio_resid; | |
1307 | m = m_free(m); | |
1308 | } while (uio->uio_resid && error == 0 && m); | |
1309 | } | |
1310 | bad: | |
1311 | if (m) | |
1312 | m_freem(m); | |
1313 | return (error); | |
1314 | } | |
1315 | if ((so->so_state & SS_ISCONFIRMING) && resid) | |
1316 | so_pru_rcvd(so, 0); | |
1317 | ||
1318 | /* | |
1319 | * The token interlocks against the protocol thread while | |
1320 | * ssb_lock is a blocking lock against other userland entities. | |
1321 | */ | |
1322 | lwkt_gettoken(&so->so_rcv.ssb_token); | |
1323 | restart: | |
1324 | error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); | |
1325 | if (error) | |
1326 | goto done; | |
1327 | ||
1328 | m = so->so_rcv.ssb_mb; | |
1329 | /* | |
1330 | * If we have less data than requested, block awaiting more | |
1331 | * (subject to any timeout) if: | |
1332 | * 1. the current count is less than the low water mark, or | |
1333 | * 2. MSG_WAITALL is set, and it is possible to do the entire | |
1334 | * receive operation at once if we block (resid <= hiwat). | |
1335 | * 3. MSG_DONTWAIT is not set | |
1336 | * If MSG_WAITALL is set but resid is larger than the receive buffer, | |
1337 | * we have to do the receive in sections, and thus risk returning | |
1338 | * a short count if a timeout or signal occurs after we start. | |
1339 | */ | |
b272101a AL |
1340 | if (m == NULL || |
1341 | (((flags & MSG_DONTWAIT) == 0 && | |
1342 | (size_t)so->so_rcv.ssb_cc < resid) && | |
1343 | (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || | |
1344 | ((flags & MSG_WAITALL) && | |
1345 | resid <= (size_t)so->so_rcv.ssb_hiwat)) && | |
1346 | m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) | |
1347 | { | |
9510c423 | 1348 | KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); |
7eaeff3d | 1349 | if (so->so_error || so->so_rerror) { |
9510c423 SZ |
1350 | if (m) |
1351 | goto dontblock; | |
7eaeff3d RM |
1352 | if (so->so_error) |
1353 | error = so->so_error; | |
1354 | else | |
1355 | error = so->so_rerror; | |
1356 | if ((flags & MSG_PEEK) == 0) { | |
1357 | if (so->so_error) | |
1358 | so->so_error = 0; | |
1359 | else | |
1360 | so->so_rerror = 0; | |
1361 | } | |
9510c423 SZ |
1362 | goto release; |
1363 | } | |
1364 | if (so->so_state & SS_CANTRCVMORE) { | |
1365 | if (m) | |
1366 | goto dontblock; | |
1367 | else | |
1368 | goto release; | |
1369 | } | |
1370 | for (; m; m = m->m_next) { | |
1371 | if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { | |
1372 | m = so->so_rcv.ssb_mb; | |
1373 | goto dontblock; | |
1374 | } | |
1375 | } | |
1376 | if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && | |
1377 | (pr->pr_flags & PR_CONNREQUIRED)) { | |
1378 | error = ENOTCONN; | |
1379 | goto release; | |
1380 | } | |
1381 | if (resid == 0) | |
1382 | goto release; | |
1383 | if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { | |
1384 | error = EWOULDBLOCK; | |
1385 | goto release; | |
1386 | } | |
1387 | ssb_unlock(&so->so_rcv); | |
1388 | error = ssb_wait(&so->so_rcv); | |
1389 | if (error) | |
1390 | goto done; | |
1391 | goto restart; | |
1392 | } | |
1393 | dontblock: | |
1394 | if (uio && uio->uio_td && uio->uio_td->td_proc) | |
1395 | uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; | |
1396 | ||
1397 | /* | |
1398 | * note: m should be == sb_mb here. Cache the next record while | |
1399 | * cleaning up. Note that calling m_free*() will break out critical | |
1400 | * section. | |
1401 | */ | |
1402 | KKASSERT(m == so->so_rcv.ssb_mb); | |
1403 | ||
1404 | /* | |
1405 | * Skip any address mbufs prepending the record. | |
1406 | */ | |
1407 | if (pr->pr_flags & PR_ADDR) { | |
1408 | KASSERT(m->m_type == MT_SONAME, ("receive 1a")); | |
1409 | orig_resid = 0; | |
1410 | if (psa) | |
1411 | *psa = dup_sockaddr(mtod(m, struct sockaddr *)); | |
1412 | if (flags & MSG_PEEK) | |
1413 | m = m->m_next; | |
1414 | else | |
1415 | m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); | |
1416 | } | |
1417 | ||
1418 | /* | |
1419 | * Skip any control mbufs prepending the record. | |
1420 | */ | |
9510c423 SZ |
1421 | while (m && m->m_type == MT_CONTROL && error == 0) { |
1422 | if (flags & MSG_PEEK) { | |
1423 | if (controlp) | |
410f8572 | 1424 | *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); |
9510c423 SZ |
1425 | m = m->m_next; /* XXX race */ |
1426 | } else { | |
e62cfe62 SZ |
1427 | const struct cmsghdr *cm = mtod(m, struct cmsghdr *); |
1428 | ||
9510c423 SZ |
1429 | if (controlp) { |
1430 | n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); | |
1431 | if (pr->pr_domain->dom_externalize && | |
e62cfe62 SZ |
1432 | cm->cmsg_level == SOL_SOCKET && |
1433 | cm->cmsg_type == SCM_RIGHTS) { | |
1434 | error = pr->pr_domain->dom_externalize | |
1435 | (m, flags); | |
1436 | } | |
9510c423 SZ |
1437 | *controlp = m; |
1438 | m = n; | |
1439 | } else { | |
e62cfe62 SZ |
1440 | if (cm->cmsg_level == SOL_SOCKET && |
1441 | cm->cmsg_type == SCM_RIGHTS) | |
1442 | free_rights = TRUE; | |
9510c423 SZ |
1443 | m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); |
1444 | } | |
1445 | } | |
1446 | if (controlp && *controlp) { | |
1447 | orig_resid = 0; | |
1448 | controlp = &(*controlp)->m_next; | |
1449 | } | |
1450 | } | |
1451 | ||
1452 | /* | |
1453 | * flag OOB data. | |
1454 | */ | |
1455 | if (m) { | |
1456 | type = m->m_type; | |
1457 | if (type == MT_OOBDATA) | |
1458 | flags |= MSG_OOB; | |
1459 | } | |
1460 | ||
1461 | /* | |
1462 | * Copy to the UIO or mbuf return chain (*mp). | |
1463 | */ | |
1464 | moff = 0; | |
1465 | offset = 0; | |
1466 | while (m && resid > 0 && error == 0) { | |
1467 | if (m->m_type == MT_OOBDATA) { | |
1468 | if (type != MT_OOBDATA) | |
1469 | break; | |
b272101a | 1470 | } else if (type == MT_OOBDATA) { |
9510c423 | 1471 | break; |
b272101a AL |
1472 | } else { |
1473 | KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, | |
1474 | ("receive 3")); | |
1475 | } | |
9510c423 SZ |
1476 | soclrstate(so, SS_RCVATMARK); |
1477 | len = (resid > INT_MAX) ? INT_MAX : resid; | |
1478 | if (so->so_oobmark && len > so->so_oobmark - offset) | |
1479 | len = so->so_oobmark - offset; | |
1480 | if (len > m->m_len - moff) | |
1481 | len = m->m_len - moff; | |
1482 | ||
1483 | /* | |
1484 | * Copy out to the UIO or pass the mbufs back to the SIO. | |
1485 | * The SIO is dealt with when we eat the mbuf, but deal | |
1486 | * with the resid here either way. | |
1487 | */ | |
1488 | if (uio) { | |
1489 | uio->uio_resid = resid; | |
1490 | error = uiomove(mtod(m, caddr_t) + moff, len, uio); | |
1491 | resid = uio->uio_resid; | |
1492 | if (error) | |
1493 | goto release; | |
1494 | } else { | |
1495 | resid -= (size_t)len; | |
1496 | } | |
1497 | ||
1498 | /* | |
1499 | * Eat the entire mbuf or just a piece of it | |
1500 | */ | |
1501 | if (len == m->m_len - moff) { | |
1502 | if (m->m_flags & M_EOR) | |
1503 | flags |= MSG_EOR; | |
9510c423 SZ |
1504 | if (flags & MSG_PEEK) { |
1505 | m = m->m_next; | |
1506 | moff = 0; | |
1507 | } else { | |
1508 | if (sio) { | |
b272101a AL |
1509 | n = sbunlinkmbuf(&so->so_rcv.sb, m, |
1510 | NULL); | |
9510c423 SZ |
1511 | sbappend(sio, m); |
1512 | m = n; | |
1513 | } else { | |
b272101a AL |
1514 | m = sbunlinkmbuf(&so->so_rcv.sb, m, |
1515 | &free_chain); | |
9510c423 SZ |
1516 | } |
1517 | } | |
1518 | } else { | |
1519 | if (flags & MSG_PEEK) { | |
1520 | moff += len; | |
1521 | } else { | |
1522 | if (sio) { | |
b5523eac | 1523 | n = m_copym(m, 0, len, M_WAITOK); |
9510c423 SZ |
1524 | if (n) |
1525 | sbappend(sio, n); | |
1526 | } | |
1527 | m->m_data += len; | |
1528 | m->m_len -= len; | |
1529 | so->so_rcv.ssb_cc -= len; | |
1530 | } | |
1531 | } | |
1532 | if (so->so_oobmark) { | |
1533 | if ((flags & MSG_PEEK) == 0) { | |
1534 | so->so_oobmark -= len; | |
1535 | if (so->so_oobmark == 0) { | |
1536 | sosetstate(so, SS_RCVATMARK); | |
1537 | break; | |
1538 | } | |
1539 | } else { | |
1540 | offset += len; | |
1541 | if (offset == so->so_oobmark) | |
1542 | break; | |
1543 | } | |
1544 | } | |
1545 | if (flags & MSG_EOR) | |
1546 | break; | |
1547 | /* | |
1548 | * If the MSG_WAITALL flag is set (for non-atomic socket), | |
1549 | * we must not quit until resid == 0 or an error | |
1550 | * termination. If a signal/timeout occurs, return | |
1551 | * with a short count but without error. | |
1552 | * Keep signalsockbuf locked against other readers. | |
1553 | */ | |
b272101a AL |
1554 | while ((flags & MSG_WAITALL) && m == NULL && |
1555 | resid > 0 && !sosendallatonce(so) && | |
1556 | so->so_rcv.ssb_mb == NULL) | |
1557 | { | |
7eaeff3d RM |
1558 | if (so->so_error || so->so_rerror || |
1559 | so->so_state & SS_CANTRCVMORE) | |
9510c423 SZ |
1560 | break; |
1561 | /* | |
1562 | * The window might have closed to zero, make | |
1563 | * sure we send an ack now that we've drained | |
1564 | * the buffer or we might end up blocking until | |
1565 | * the idle takes over (5 seconds). | |
1566 | */ | |
1567 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) | |
1568 | so_pru_rcvd(so, flags); | |
1569 | error = ssb_wait(&so->so_rcv); | |
1570 | if (error) { | |
1571 | ssb_unlock(&so->so_rcv); | |
1572 | error = 0; | |
1573 | goto done; | |
1574 | } | |
1575 | m = so->so_rcv.ssb_mb; | |
1576 | } | |
1577 | } | |
1578 | ||
1579 | /* | |
1580 | * If an atomic read was requested but unread data still remains | |
1581 | * in the record, set MSG_TRUNC. | |
1582 | */ | |
1583 | if (m && pr->pr_flags & PR_ATOMIC) | |
1584 | flags |= MSG_TRUNC; | |
1585 | ||
1586 | /* | |
1587 | * Cleanup. If an atomic read was requested drop any unread data. | |
1588 | */ | |
1589 | if ((flags & MSG_PEEK) == 0) { | |
1590 | if (m && (pr->pr_flags & PR_ATOMIC)) | |
1591 | sbdroprecord(&so->so_rcv.sb); | |
1592 | if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) | |
1593 | so_pru_rcvd(so, flags); | |
1594 | } | |
1595 | ||
1596 | if (orig_resid == resid && orig_resid && | |
1597 | (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { | |
1598 | ssb_unlock(&so->so_rcv); | |
1599 | goto restart; | |
1600 | } | |
1601 | ||
1602 | if (flagsp) | |
1603 | *flagsp |= flags; | |
1604 | release: | |
1605 | ssb_unlock(&so->so_rcv); | |
1606 | done: | |
1607 | lwkt_reltoken(&so->so_rcv.ssb_token); | |
e62cfe62 SZ |
1608 | if (free_chain) { |
1609 | if (free_rights && (pr->pr_flags & PR_RIGHTS) && | |
1610 | pr->pr_domain->dom_dispose) | |
1611 | pr->pr_domain->dom_dispose(free_chain); | |
9510c423 | 1612 | m_freem(free_chain); |
e62cfe62 | 1613 | } |
9510c423 SZ |
1614 | return (error); |
1615 | } | |
1616 | ||
1617 | int | |
1618 | sorecvtcp(struct socket *so, struct sockaddr **psa, struct uio *uio, | |
1619 | struct sockbuf *sio, struct mbuf **controlp, int *flagsp) | |
1620 | { | |
1621 | struct mbuf *m, *n; | |
1622 | struct mbuf *free_chain = NULL; | |
1623 | int flags, len, error, offset; | |
1624 | struct protosw *pr = so->so_proto; | |
fd600585 | 1625 | int moff; |
11b81f5d MD |
1626 | int didoob; |
1627 | size_t resid, orig_resid, restmp; | |
9510c423 SZ |
1628 | |
1629 | if (uio) | |
1630 | resid = uio->uio_resid; | |
1631 | else | |
1632 | resid = (size_t)(sio->sb_climit - sio->sb_cc); | |
1633 | orig_resid = resid; | |
1634 | ||
984263bc | 1635 | if (psa) |
857caa4a | 1636 | *psa = NULL; |
984263bc | 1637 | if (controlp) |
857caa4a | 1638 | *controlp = NULL; |
984263bc MD |
1639 | if (flagsp) |
1640 | flags = *flagsp &~ MSG_EOR; | |
1641 | else | |
1642 | flags = 0; | |
1643 | if (flags & MSG_OOB) { | |
b5523eac | 1644 | m = m_get(M_WAITOK, MT_DATA); |
6b6e0885 | 1645 | error = so_pru_rcvoob(so, m, flags & MSG_PEEK); |
984263bc MD |
1646 | if (error) |
1647 | goto bad; | |
d8a9a23b MD |
1648 | if (sio) { |
1649 | do { | |
6d49aa6f | 1650 | sbappend(sio, m); |
e54488bb MD |
1651 | KKASSERT(resid >= (size_t)m->m_len); |
1652 | resid -= (size_t)m->m_len; | |
d8a9a23b MD |
1653 | } while (resid > 0 && m); |
1654 | } else { | |
1655 | do { | |
1656 | uio->uio_resid = resid; | |
1657 | error = uiomove(mtod(m, caddr_t), | |
e54488bb MD |
1658 | (int)szmin(resid, m->m_len), |
1659 | uio); | |
d8a9a23b MD |
1660 | resid = uio->uio_resid; |
1661 | m = m_free(m); | |
1662 | } while (uio->uio_resid && error == 0 && m); | |
1663 | } | |
984263bc MD |
1664 | bad: |
1665 | if (m) | |
1666 | m_freem(m); | |
1667 | return (error); | |
1668 | } | |
984263bc | 1669 | |
20156c7a MD |
1670 | /* |
1671 | * The token interlocks against the protocol thread while | |
1672 | * ssb_lock is a blocking lock against other userland entities. | |
11b81f5d MD |
1673 | * |
1674 | * Lock a limited number of mbufs (not all, so sbcompress() still | |
1675 | * works well). The token is used as an interlock for sbwait() so | |
1676 | * release it afterwords. | |
20156c7a | 1677 | */ |
984263bc | 1678 | restart: |
6d49aa6f | 1679 | error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); |
984263bc | 1680 | if (error) |
857caa4a | 1681 | goto done; |
984263bc | 1682 | |
11b81f5d | 1683 | lwkt_gettoken(&so->so_rcv.ssb_token); |
6d49aa6f | 1684 | m = so->so_rcv.ssb_mb; |
11b81f5d | 1685 | |
984263bc MD |
1686 | /* |
1687 | * If we have less data than requested, block awaiting more | |
1688 | * (subject to any timeout) if: | |
1689 | * 1. the current count is less than the low water mark, or | |
1690 | * 2. MSG_WAITALL is set, and it is possible to do the entire | |
1691 | * receive operation at once if we block (resid <= hiwat). | |
1692 | * 3. MSG_DONTWAIT is not set | |
1693 | * If MSG_WAITALL is set but resid is larger than the receive buffer, | |
1694 | * we have to do the receive in sections, and thus risk returning | |
1695 | * a short count if a timeout or signal occurs after we start. | |
1696 | */ | |
857caa4a | 1697 | if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && |
e54488bb | 1698 | (size_t)so->so_rcv.ssb_cc < resid) && |
6d49aa6f | 1699 | (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || |
fd600585 | 1700 | ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)))) { |
6d49aa6f | 1701 | KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); |
984263bc MD |
1702 | if (so->so_error) { |
1703 | if (m) | |
1704 | goto dontblock; | |
11b81f5d | 1705 | lwkt_reltoken(&so->so_rcv.ssb_token); |
984263bc MD |
1706 | error = so->so_error; |
1707 | if ((flags & MSG_PEEK) == 0) | |
1708 | so->so_error = 0; | |
1709 | goto release; | |
1710 | } | |
1711 | if (so->so_state & SS_CANTRCVMORE) { | |
1712 | if (m) | |
1713 | goto dontblock; | |
11b81f5d MD |
1714 | lwkt_reltoken(&so->so_rcv.ssb_token); |
1715 | goto release; | |
984263bc | 1716 | } |
984263bc | 1717 | if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && |
6b6e0885 | 1718 | (pr->pr_flags & PR_CONNREQUIRED)) { |
11b81f5d | 1719 | lwkt_reltoken(&so->so_rcv.ssb_token); |
984263bc MD |
1720 | error = ENOTCONN; |
1721 | goto release; | |
1722 | } | |
11b81f5d MD |
1723 | if (resid == 0) { |
1724 | lwkt_reltoken(&so->so_rcv.ssb_token); | |
984263bc | 1725 | goto release; |
11b81f5d | 1726 | } |
9ba76b73 | 1727 | if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { |
11b81f5d | 1728 | lwkt_reltoken(&so->so_rcv.ssb_token); |
984263bc MD |
1729 | error = EWOULDBLOCK; |
1730 | goto release; | |
1731 | } | |
6d49aa6f MD |
1732 | ssb_unlock(&so->so_rcv); |
1733 | error = ssb_wait(&so->so_rcv); | |
11b81f5d | 1734 | lwkt_reltoken(&so->so_rcv.ssb_token); |
984263bc | 1735 | if (error) |
857caa4a | 1736 | goto done; |
984263bc MD |
1737 | goto restart; |
1738 | } | |
11b81f5d MD |
1739 | |
1740 | /* | |
1741 | * Token still held | |
1742 | */ | |
984263bc | 1743 | dontblock: |
11b81f5d MD |
1744 | n = m; |
1745 | restmp = 0; | |
1746 | while (n && restmp < resid) { | |
1747 | n->m_flags |= M_SOLOCKED; | |
1748 | restmp += n->m_len; | |
1749 | if (n->m_next == NULL) | |
1750 | n = n->m_nextpkt; | |
1751 | else | |
1752 | n = n->m_next; | |
1753 | } | |
1754 | ||
1755 | /* | |
1756 | * Release token for loop | |
1757 | */ | |
1758 | lwkt_reltoken(&so->so_rcv.ssb_token); | |
d8a9a23b | 1759 | if (uio && uio->uio_td && uio->uio_td->td_proc) |
fde7ac71 | 1760 | uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; |
857caa4a MD |
1761 | |
1762 | /* | |
1763 | * note: m should be == sb_mb here. Cache the next record while | |
1764 | * cleaning up. Note that calling m_free*() will break out critical | |
1765 | * section. | |
1766 | */ | |
6d49aa6f | 1767 | KKASSERT(m == so->so_rcv.ssb_mb); |
857caa4a | 1768 | |
857caa4a MD |
1769 | /* |
1770 | * Copy to the UIO or mbuf return chain (*mp). | |
11b81f5d MD |
1771 | * |
1772 | * NOTE: Token is not held for loop | |
857caa4a | 1773 | */ |
984263bc MD |
1774 | moff = 0; |
1775 | offset = 0; | |
11b81f5d MD |
1776 | didoob = 0; |
1777 | ||
1778 | while (m && (m->m_flags & M_SOLOCKED) && resid > 0 && error == 0) { | |
fd600585 SZ |
1779 | KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, |
1780 | ("receive 3")); | |
1781 | ||
6cef7136 | 1782 | soclrstate(so, SS_RCVATMARK); |
e54488bb | 1783 | len = (resid > INT_MAX) ? INT_MAX : resid; |
984263bc MD |
1784 | if (so->so_oobmark && len > so->so_oobmark - offset) |
1785 | len = so->so_oobmark - offset; | |
1786 | if (len > m->m_len - moff) | |
1787 | len = m->m_len - moff; | |
d8a9a23b | 1788 | |
984263bc | 1789 | /* |
d8a9a23b MD |
1790 | * Copy out to the UIO or pass the mbufs back to the SIO. |
1791 | * The SIO is dealt with when we eat the mbuf, but deal | |
1792 | * with the resid here either way. | |
984263bc | 1793 | */ |
d8a9a23b | 1794 | if (uio) { |
d8a9a23b MD |
1795 | uio->uio_resid = resid; |
1796 | error = uiomove(mtod(m, caddr_t) + moff, len, uio); | |
1797 | resid = uio->uio_resid; | |
984263bc MD |
1798 | if (error) |
1799 | goto release; | |
857caa4a | 1800 | } else { |
e54488bb | 1801 | resid -= (size_t)len; |
857caa4a MD |
1802 | } |
1803 | ||
1804 | /* | |
1805 | * Eat the entire mbuf or just a piece of it | |
1806 | */ | |
11b81f5d | 1807 | offset += len; |
984263bc | 1808 | if (len == m->m_len - moff) { |
11b81f5d MD |
1809 | m = m->m_next; |
1810 | moff = 0; | |
1811 | } else { | |
1812 | moff += len; | |
1813 | } | |
1814 | ||
1815 | /* | |
1816 | * Check oobmark | |
1817 | */ | |
1818 | if (so->so_oobmark && offset == so->so_oobmark) { | |
1819 | didoob = 1; | |
1820 | break; | |
1821 | } | |
1822 | } | |
1823 | ||
1824 | /* | |
1825 | * Synchronize sockbuf with data we read. | |
1826 | * | |
1827 | * NOTE: (m) is junk on entry (it could be left over from the | |
1828 | * previous loop). | |
1829 | */ | |
1830 | if ((flags & MSG_PEEK) == 0) { | |
1831 | lwkt_gettoken(&so->so_rcv.ssb_token); | |
1832 | m = so->so_rcv.ssb_mb; | |
1833 | while (m && offset >= m->m_len) { | |
1834 | if (so->so_oobmark) { | |
1835 | so->so_oobmark -= m->m_len; | |
1836 | if (so->so_oobmark == 0) { | |
1837 | sosetstate(so, SS_RCVATMARK); | |
1838 | didoob = 1; | |
984263bc | 1839 | } |
984263bc | 1840 | } |
11b81f5d MD |
1841 | offset -= m->m_len; |
1842 | if (sio) { | |
1843 | n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); | |
1844 | sbappend(sio, m); | |
1845 | m = n; | |
857caa4a | 1846 | } else { |
11b81f5d MD |
1847 | m = sbunlinkmbuf(&so->so_rcv.sb, |
1848 | m, &free_chain); | |
984263bc MD |
1849 | } |
1850 | } | |
11b81f5d MD |
1851 | if (offset) { |
1852 | KKASSERT(m); | |
1853 | if (sio) { | |
b5523eac | 1854 | n = m_copym(m, 0, offset, M_WAITOK); |
11b81f5d MD |
1855 | if (n) |
1856 | sbappend(sio, n); | |
1857 | } | |
1858 | m->m_data += offset; | |
1859 | m->m_len -= offset; | |
1860 | so->so_rcv.ssb_cc -= offset; | |
1861 | if (so->so_oobmark) { | |
1862 | so->so_oobmark -= offset; | |
984263bc | 1863 | if (so->so_oobmark == 0) { |
6cef7136 | 1864 | sosetstate(so, SS_RCVATMARK); |
11b81f5d | 1865 | didoob = 1; |
984263bc | 1866 | } |
984263bc | 1867 | } |
11b81f5d | 1868 | offset = 0; |
984263bc | 1869 | } |
11b81f5d MD |
1870 | lwkt_reltoken(&so->so_rcv.ssb_token); |
1871 | } | |
1872 | ||
1873 | /* | |
1874 | * If the MSG_WAITALL flag is set (for non-atomic socket), | |
1875 | * we must not quit until resid == 0 or an error termination. | |
1876 | * | |
1877 | * If a signal/timeout occurs, return with a short count but without | |
1878 | * error. | |
1879 | * | |
1880 | * Keep signalsockbuf locked against other readers. | |
1881 | * | |
1882 | * XXX if MSG_PEEK we currently do quit. | |
1883 | */ | |
1884 | if ((flags & MSG_WAITALL) && !(flags & MSG_PEEK) && | |
1885 | didoob == 0 && resid > 0 && | |
1886 | !sosendallatonce(so)) { | |
1887 | lwkt_gettoken(&so->so_rcv.ssb_token); | |
1888 | error = 0; | |
1889 | while ((m = so->so_rcv.ssb_mb) == NULL) { | |
1890 | if (so->so_error || (so->so_state & SS_CANTRCVMORE)) { | |
1891 | error = so->so_error; | |
984263bc | 1892 | break; |
11b81f5d | 1893 | } |
984263bc MD |
1894 | /* |
1895 | * The window might have closed to zero, make | |
1896 | * sure we send an ack now that we've drained | |
1897 | * the buffer or we might end up blocking until | |
1898 | * the idle takes over (5 seconds). | |
1899 | */ | |
fd600585 | 1900 | if (so->so_pcb) |
96c6eb29 | 1901 | so_pru_rcvd_async(so); |
11b81f5d MD |
1902 | if (so->so_rcv.ssb_mb == NULL) |
1903 | error = ssb_wait(&so->so_rcv); | |
984263bc | 1904 | if (error) { |
11b81f5d | 1905 | lwkt_reltoken(&so->so_rcv.ssb_token); |
6d49aa6f | 1906 | ssb_unlock(&so->so_rcv); |
857caa4a MD |
1907 | error = 0; |
1908 | goto done; | |
984263bc | 1909 | } |
984263bc | 1910 | } |
11b81f5d MD |
1911 | if (m && error == 0) |
1912 | goto dontblock; | |
1913 | lwkt_reltoken(&so->so_rcv.ssb_token); | |
984263bc MD |
1914 | } |
1915 | ||
857caa4a | 1916 | /* |
11b81f5d MD |
1917 | * Token not held here. |
1918 | * | |
1919 | * Cleanup. If an atomic read was requested drop any unread data XXX | |
857caa4a MD |
1920 | */ |
1921 | if ((flags & MSG_PEEK) == 0) { | |
fd600585 | 1922 | if (so->so_pcb) |
96c6eb29 | 1923 | so_pru_rcvd_async(so); |
984263bc | 1924 | } |
bf8a9a6f | 1925 | |
d8a9a23b | 1926 | if (orig_resid == resid && orig_resid && |
fd600585 | 1927 | (so->so_state & SS_CANTRCVMORE) == 0) { |
6d49aa6f | 1928 | ssb_unlock(&so->so_rcv); |
984263bc MD |
1929 | goto restart; |
1930 | } | |
1931 | ||
1932 | if (flagsp) | |
1933 | *flagsp |= flags; | |
1934 | release: | |
6d49aa6f | 1935 | ssb_unlock(&so->so_rcv); |
857caa4a | 1936 | done: |
857caa4a MD |
1937 | if (free_chain) |
1938 | m_freem(free_chain); | |
984263bc MD |
1939 | return (error); |
1940 | } | |
1941 | ||
edf5c732 MD |
1942 | /* |
1943 | * Shut a socket down. Note that we do not get a frontend lock as we | |
1944 | * want to be able to shut the socket down even if another thread is | |
1945 | * blocked in a read(), thus waking it up. | |
1946 | */ | |
984263bc | 1947 | int |
c972a82f | 1948 | soshutdown(struct socket *so, int how) |
984263bc | 1949 | { |
984263bc MD |
1950 | if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) |
1951 | return (EINVAL); | |
1952 | ||
ff518922 | 1953 | if (how != SHUT_WR) { |
edf5c732 | 1954 | /*ssb_lock(&so->so_rcv, M_WAITOK);*/ |
984263bc | 1955 | sorflush(so); |
edf5c732 | 1956 | /*ssb_unlock(&so->so_rcv);*/ |
ff518922 | 1957 | } |
0cd6e642 | 1958 | if (how != SHUT_RD) |
6b6e0885 | 1959 | return (so_pru_shutdown(so)); |
984263bc MD |
1960 | return (0); |
1961 | } | |
1962 | ||
1963 | void | |
c972a82f | 1964 | sorflush(struct socket *so) |
984263bc | 1965 | { |
6d49aa6f | 1966 | struct signalsockbuf *ssb = &so->so_rcv; |
1fd87d54 | 1967 | struct protosw *pr = so->so_proto; |
6d49aa6f | 1968 | struct signalsockbuf asb; |
984263bc | 1969 | |
14343ad3 | 1970 | atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); |
e43a034f | 1971 | |
ff518922 | 1972 | lwkt_gettoken(&ssb->ssb_token); |
984263bc | 1973 | socantrcvmore(so); |
6d49aa6f | 1974 | asb = *ssb; |
14343ad3 MD |
1975 | |
1976 | /* | |
1977 | * Can't just blow up the ssb structure here | |
1978 | */ | |
ff518922 | 1979 | bzero(&ssb->sb, sizeof(ssb->sb)); |
14343ad3 | 1980 | ssb->ssb_timeo = 0; |
14343ad3 MD |
1981 | ssb->ssb_lowat = 0; |
1982 | ssb->ssb_hiwat = 0; | |
1983 | ssb->ssb_mbmax = 0; | |
1984 | atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); | |
1985 | ||
edf5c732 | 1986 | if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) |
6d49aa6f MD |
1987 | (*pr->pr_domain->dom_dispose)(asb.ssb_mb); |
1988 | ssb_release(&asb, so); | |
edf5c732 MD |
1989 | |
1990 | lwkt_reltoken(&ssb->ssb_token); | |
984263bc MD |
1991 | } |
1992 | ||
1993 | #ifdef INET | |
1994 | static int | |
c972a82f | 1995 | do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) |
984263bc MD |
1996 | { |
1997 | struct accept_filter_arg *afap = NULL; | |
1998 | struct accept_filter *afp; | |
1999 | struct so_accf *af = so->so_accf; | |
2000 | int error = 0; | |
2001 | ||
2002 | /* do not set/remove accept filters on non listen sockets */ | |
2003 | if ((so->so_options & SO_ACCEPTCONN) == 0) { | |
2004 | error = EINVAL; | |
2005 | goto out; | |
2006 | } | |
2007 | ||
2008 | /* removing the filter */ | |
2009 | if (sopt == NULL) { | |
2010 | if (af != NULL) { | |
b272101a | 2011 | if (af->so_accept_filter != NULL && |
984263bc MD |
2012 | af->so_accept_filter->accf_destroy != NULL) { |
2013 | af->so_accept_filter->accf_destroy(so); | |
2014 | } | |
2015 | if (af->so_accept_filter_str != NULL) { | |
884717e1 | 2016 | kfree(af->so_accept_filter_str, M_ACCF); |
984263bc | 2017 | } |
884717e1 | 2018 | kfree(af, M_ACCF); |
984263bc MD |
2019 | so->so_accf = NULL; |
2020 | } | |
2021 | so->so_options &= ~SO_ACCEPTFILTER; | |
2022 | return (0); | |
2023 | } | |
2024 | /* adding a filter */ | |
2025 | /* must remove previous filter first */ | |
2026 | if (af != NULL) { | |
2027 | error = EINVAL; | |
2028 | goto out; | |
2029 | } | |
2030 | /* don't put large objects on the kernel stack */ | |
884717e1 | 2031 | afap = kmalloc(sizeof(*afap), M_TEMP, M_WAITOK); |
984263bc MD |
2032 | error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); |
2033 | afap->af_name[sizeof(afap->af_name)-1] = '\0'; | |
2034 | afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; | |
2035 | if (error) | |
2036 | goto out; | |
2037 | afp = accept_filt_get(afap->af_name); | |
2038 | if (afp == NULL) { | |
2039 | error = ENOENT; | |
2040 | goto out; | |
2041 | } | |
884717e1 | 2042 | af = kmalloc(sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); |
984263bc MD |
2043 | if (afp->accf_create != NULL) { |
2044 | if (afap->af_name[0] != '\0') { | |
2045 | int len = strlen(afap->af_name) + 1; | |
2046 | ||
884717e1 SW |
2047 | af->so_accept_filter_str = kmalloc(len, M_ACCF, |
2048 | M_WAITOK); | |
984263bc MD |
2049 | strcpy(af->so_accept_filter_str, afap->af_name); |
2050 | } | |
2051 | af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); | |
2052 | if (af->so_accept_filter_arg == NULL) { | |
884717e1 SW |
2053 | kfree(af->so_accept_filter_str, M_ACCF); |
2054 | kfree(af, M_ACCF); | |
984263bc MD |
2055 | so->so_accf = NULL; |
2056 | error = EINVAL; | |
2057 | goto out; | |
2058 | } | |
2059 | } | |
2060 | af->so_accept_filter = afp; | |
2061 | so->so_accf = af; | |
2062 | so->so_options |= SO_ACCEPTFILTER; | |
2063 | out: | |
2064 | if (afap != NULL) | |
884717e1 | 2065 | kfree(afap, M_TEMP); |
984263bc MD |
2066 | return (error); |
2067 | } | |
2068 | #endif /* INET */ | |
2069 | ||
2070 | /* | |
2071 | * Perhaps this routine, and sooptcopyout(), below, ought to come in | |
2072 | * an additional variant to handle the case where the option value needs | |
2073 | * to be some kind of integer, but not a specific size. | |
2074 | * In addition to their use here, these functions are also called by the | |
2075 | * protocol-level pr_ctloutput() routines. | |
2076 | */ | |
2077 | int | |
c972a82f | 2078 | sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) |
de0003fe AE |
2079 | { |
2080 | return soopt_to_kbuf(sopt, buf, len, minlen); | |
2081 | } | |
2082 | ||
2083 | int | |
2084 | soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) | |
984263bc MD |
2085 | { |
2086 | size_t valsize; | |
2087 | ||
792239df | 2088 | KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); |
de0003fe AE |
2089 | KKASSERT(kva_p(buf)); |
2090 | ||
984263bc MD |
2091 | /* |
2092 | * If the user gives us more than we wanted, we ignore it, | |
2093 | * but if we don't get the minimum length the caller | |
2094 | * wants, we return EINVAL. On success, sopt->sopt_valsize | |
2095 | * is set to however much we actually retrieved. | |
2096 | */ | |
2097 | if ((valsize = sopt->sopt_valsize) < minlen) | |
2098 | return EINVAL; | |
2099 | if (valsize > len) | |
2100 | sopt->sopt_valsize = valsize = len; | |
2101 | ||
984263bc MD |
2102 | bcopy(sopt->sopt_val, buf, valsize); |
2103 | return 0; | |
2104 | } | |
2105 | ||
e71a125f | 2106 | |
984263bc | 2107 | int |
c972a82f | 2108 | sosetopt(struct socket *so, struct sockopt *sopt) |
984263bc MD |
2109 | { |
2110 | int error, optval; | |
2111 | struct linger l; | |
2112 | struct timeval tv; | |
2113 | u_long val; | |
5c694678 | 2114 | uint32_t val32; |
14343ad3 | 2115 | struct signalsockbuf *sotmp; |
984263bc MD |
2116 | |
2117 | error = 0; | |
e79d388f | 2118 | sopt->sopt_dir = SOPT_SET; |
984263bc | 2119 | if (sopt->sopt_level != SOL_SOCKET) { |
6b6e0885 | 2120 | if (so->so_proto && so->so_proto->pr_ctloutput) { |
002c1265 | 2121 | return (so_pr_ctloutput(so, sopt)); |
6b6e0885 | 2122 | } |
984263bc MD |
2123 | error = ENOPROTOOPT; |
2124 | } else { | |
2125 | switch (sopt->sopt_name) { | |
2126 | #ifdef INET | |
2127 | case SO_ACCEPTFILTER: | |
2128 | error = do_setopt_accept_filter(so, sopt); | |
2129 | if (error) | |
2130 | goto bad; | |
2131 | break; | |
2132 | #endif /* INET */ | |
2133 | case SO_LINGER: | |
2134 | error = sooptcopyin(sopt, &l, sizeof l, sizeof l); | |
2135 | if (error) | |
2136 | goto bad; | |
2137 | ||
2138 | so->so_linger = l.l_linger; | |
2139 | if (l.l_onoff) | |
2140 | so->so_options |= SO_LINGER; | |
2141 | else | |
2142 | so->so_options &= ~SO_LINGER; | |
2143 | break; | |
2144 | ||
2145 | case SO_DEBUG: | |
2146 | case SO_KEEPALIVE: | |
2147 | case SO_DONTROUTE: | |
2148 | case SO_USELOOPBACK: | |
2149 | case SO_BROADCAST: | |
2150 | case SO_REUSEADDR: | |
2151 | case SO_REUSEPORT: | |
2152 | case SO_OOBINLINE: | |
2153 | case SO_TIMESTAMP: | |
89233cfd | 2154 | case SO_NOSIGPIPE: |
7eaeff3d | 2155 | case SO_RERROR: |
3aa364b8 | 2156 | case SO_PASSCRED: |
984263bc MD |
2157 | error = sooptcopyin(sopt, &optval, sizeof optval, |
2158 | sizeof optval); | |
2159 | if (error) | |
2160 | goto bad; | |
2161 | if (optval) | |
2162 | so->so_options |= sopt->sopt_name; | |
2163 | else | |
2164 | so->so_options &= ~sopt->sopt_name; | |
2165 | break; | |
2166 | ||
2167 | case SO_SNDBUF: | |
2168 | case SO_RCVBUF: | |
2169 | case SO_SNDLOWAT: | |
2170 | case SO_RCVLOWAT: | |
2171 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2172 | sizeof optval); | |
2173 | if (error) | |
2174 | goto bad; | |
2175 | ||
2176 | /* | |
2177 | * Values < 1 make no sense for any of these | |
2178 | * options, so disallow them. | |
2179 | */ | |
2180 | if (optval < 1) { | |
2181 | error = EINVAL; | |
2182 | goto bad; | |
2183 | } | |
2184 | ||
2185 | switch (sopt->sopt_name) { | |
2186 | case SO_SNDBUF: | |
2187 | case SO_RCVBUF: | |
6d49aa6f | 2188 | if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? |
984263bc | 2189 | &so->so_snd : &so->so_rcv, (u_long)optval, |
e4700d00 JH |
2190 | so, |
2191 | &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { | |
984263bc MD |
2192 | error = ENOBUFS; |
2193 | goto bad; | |
2194 | } | |
14343ad3 MD |
2195 | sotmp = (sopt->sopt_name == SO_SNDBUF) ? |
2196 | &so->so_snd : &so->so_rcv; | |
2197 | atomic_clear_int(&sotmp->ssb_flags, | |
2198 | SSB_AUTOSIZE); | |
984263bc MD |
2199 | break; |
2200 | ||
2201 | /* | |
2202 | * Make sure the low-water is never greater than | |
2203 | * the high-water. | |
2204 | */ | |
2205 | case SO_SNDLOWAT: | |
6d49aa6f MD |
2206 | so->so_snd.ssb_lowat = |
2207 | (optval > so->so_snd.ssb_hiwat) ? | |
2208 | so->so_snd.ssb_hiwat : optval; | |
14343ad3 MD |
2209 | atomic_clear_int(&so->so_snd.ssb_flags, |
2210 | SSB_AUTOLOWAT); | |
984263bc MD |
2211 | break; |
2212 | case SO_RCVLOWAT: | |
6d49aa6f MD |
2213 | so->so_rcv.ssb_lowat = |
2214 | (optval > so->so_rcv.ssb_hiwat) ? | |
2215 | so->so_rcv.ssb_hiwat : optval; | |
14343ad3 MD |
2216 | atomic_clear_int(&so->so_rcv.ssb_flags, |
2217 | SSB_AUTOLOWAT); | |
984263bc MD |
2218 | break; |
2219 | } | |
2220 | break; | |
2221 | ||
2222 | case SO_SNDTIMEO: | |
2223 | case SO_RCVTIMEO: | |
2224 | error = sooptcopyin(sopt, &tv, sizeof tv, | |
2225 | sizeof tv); | |
2226 | if (error) | |
2227 | goto bad; | |
2228 | ||
2229 | /* assert(hz > 0); */ | |
45546849 | 2230 | if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || |
984263bc MD |
2231 | tv.tv_usec < 0 || tv.tv_usec >= 1000000) { |
2232 | error = EDOM; | |
2233 | goto bad; | |
2234 | } | |
2235 | /* assert(tick > 0); */ | |
45546849 | 2236 | /* assert(ULONG_MAX - INT_MAX >= 1000000); */ |
a591f597 | 2237 | val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; |
45546849 | 2238 | if (val > INT_MAX) { |
984263bc MD |
2239 | error = EDOM; |
2240 | goto bad; | |
2241 | } | |
2242 | if (val == 0 && tv.tv_usec != 0) | |
2243 | val = 1; | |
2244 | ||
2245 | switch (sopt->sopt_name) { | |
2246 | case SO_SNDTIMEO: | |
6d49aa6f | 2247 | so->so_snd.ssb_timeo = val; |
984263bc MD |
2248 | break; |
2249 | case SO_RCVTIMEO: | |
6d49aa6f | 2250 | so->so_rcv.ssb_timeo = val; |
984263bc MD |
2251 | break; |
2252 | } | |
2253 | break; | |
5c694678 AL |
2254 | |
2255 | case SO_USER_COOKIE: | |
2256 | error = sooptcopyin(sopt, &val32, sizeof val32, | |
2257 | sizeof val32); | |
2258 | if (error) | |
2259 | goto bad; | |
2260 | so->so_user_cookie = val32; | |
2261 | break; | |
2262 | ||
984263bc MD |
2263 | default: |
2264 | error = ENOPROTOOPT; | |
2265 | break; | |
2266 | } | |
2267 | if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { | |
002c1265 | 2268 | (void) so_pr_ctloutput(so, sopt); |
984263bc MD |
2269 | } |
2270 | } | |
2271 | bad: | |
2272 | return (error); | |
2273 | } | |
2274 | ||
2275 | /* Helper routine for getsockopt */ | |
2276 | int | |
f1f552f6 | 2277 | sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) |
984263bc | 2278 | { |
de0003fe AE |
2279 | soopt_from_kbuf(sopt, buf, len); |
2280 | return 0; | |
2281 | } | |
2282 | ||
2283 | void | |
2284 | soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) | |
2285 | { | |
984263bc MD |
2286 | size_t valsize; |
2287 | ||
565d9f6f SZ |
2288 | if (len == 0) { |
2289 | sopt->sopt_valsize = 0; | |
2290 | return; | |
2291 | } | |
2292 | ||
792239df | 2293 | KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); |
de0003fe | 2294 | KKASSERT(kva_p(buf)); |
984263bc MD |
2295 | |
2296 | /* | |
2297 | * Documented get behavior is that we always return a value, | |
2298 | * possibly truncated to fit in the user's buffer. | |
2299 | * Traditional behavior is that we always tell the user | |
2300 | * precisely how much we copied, rather than something useful | |
2301 | * like the total amount we had available for her. | |
2302 | * Note that this interface is not idempotent; the entire answer must | |
2303 | * generated ahead of time. | |
2304 | */ | |
231d276b | 2305 | valsize = szmin(len, sopt->sopt_valsize); |
984263bc MD |
2306 | sopt->sopt_valsize = valsize; |
2307 | if (sopt->sopt_val != 0) { | |
de0003fe | 2308 | bcopy(buf, sopt->sopt_val, valsize); |
984263bc | 2309 | } |
e71a125f AE |
2310 | } |
2311 | ||
984263bc | 2312 | int |
c972a82f | 2313 | sogetopt(struct socket *so, struct sockopt *sopt) |
984263bc MD |
2314 | { |
2315 | int error, optval; | |
755c519c | 2316 | long optval_l; |
984263bc MD |
2317 | struct linger l; |
2318 | struct timeval tv; | |
5c694678 | 2319 | uint32_t val32; |
51f4ca92 | 2320 | #ifdef INET |
984263bc | 2321 | struct accept_filter_arg *afap; |
51f4ca92 | 2322 | #endif |
984263bc MD |
2323 | |
2324 | error = 0; | |
e79d388f | 2325 | sopt->sopt_dir = SOPT_GET; |
984263bc MD |
2326 | if (sopt->sopt_level != SOL_SOCKET) { |
2327 | if (so->so_proto && so->so_proto->pr_ctloutput) { | |
002c1265 | 2328 | return (so_pr_ctloutput(so, sopt)); |
984263bc MD |
2329 | } else |
2330 | return (ENOPROTOOPT); | |
2331 | } else { | |
2332 | switch (sopt->sopt_name) { | |
2333 | #ifdef INET | |
2334 | case SO_ACCEPTFILTER: | |
2335 | if ((so->so_options & SO_ACCEPTCONN) == 0) | |
2336 | return (EINVAL); | |
884717e1 SW |
2337 | afap = kmalloc(sizeof(*afap), M_TEMP, |
2338 | M_WAITOK | M_ZERO); | |
984263bc MD |
2339 | if ((so->so_options & SO_ACCEPTFILTER) != 0) { |
2340 | strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); | |
2341 | if (so->so_accf->so_accept_filter_str != NULL) | |
2342 | strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); | |
2343 | } | |
2344 | error = sooptcopyout(sopt, afap, sizeof(*afap)); | |
884717e1 | 2345 | kfree(afap, M_TEMP); |
984263bc MD |
2346 | break; |
2347 | #endif /* INET */ | |
b272101a | 2348 | |
984263bc MD |
2349 | case SO_LINGER: |
2350 | l.l_onoff = so->so_options & SO_LINGER; | |
2351 | l.l_linger = so->so_linger; | |
2352 | error = sooptcopyout(sopt, &l, sizeof l); | |
2353 | break; | |
2354 | ||
2355 | case SO_USELOOPBACK: | |
2356 | case SO_DONTROUTE: | |
2357 | case SO_DEBUG: | |
2358 | case SO_KEEPALIVE: | |
2359 | case SO_REUSEADDR: | |
2360 | case SO_REUSEPORT: | |
2361 | case SO_BROADCAST: | |
2362 | case SO_OOBINLINE: | |
2363 | case SO_TIMESTAMP: | |
89233cfd | 2364 | case SO_NOSIGPIPE: |
7eaeff3d | 2365 | case SO_RERROR: |
3aa364b8 | 2366 | case SO_PASSCRED: |
984263bc MD |
2367 | optval = so->so_options & sopt->sopt_name; |
2368 | integer: | |
2369 | error = sooptcopyout(sopt, &optval, sizeof optval); | |
2370 | break; | |
2371 | ||
2372 | case SO_TYPE: | |
2373 | optval = so->so_type; | |
2374 | goto integer; | |
2375 | ||
2376 | case SO_ERROR: | |
7eaeff3d RM |
2377 | if (so->so_error) { |
2378 | optval = so->so_error; | |
2379 | so->so_error = 0; | |
2380 | } else { | |
2381 | optval = so->so_rerror; | |
2382 | so->so_rerror = 0; | |
2383 | } | |
984263bc MD |
2384 | goto integer; |
2385 | ||
2386 | case SO_SNDBUF: | |
6d49aa6f | 2387 | optval = so->so_snd.ssb_hiwat; |
984263bc MD |
2388 | goto integer; |
2389 | ||
2390 | case SO_RCVBUF: | |
6d49aa6f | 2391 | optval = so->so_rcv.ssb_hiwat; |
984263bc MD |
2392 | goto integer; |
2393 | ||
2394 | case SO_SNDLOWAT: | |
6d49aa6f | 2395 | optval = so->so_snd.ssb_lowat; |
984263bc MD |
2396 | goto integer; |
2397 | ||
2398 | case SO_RCVLOWAT: | |
6d49aa6f | 2399 | optval = so->so_rcv.ssb_lowat; |
984263bc MD |
2400 | goto integer; |
2401 | ||
2402 | case SO_SNDTIMEO: | |
2403 | case SO_RCVTIMEO: | |
2404 | optval = (sopt->sopt_name == SO_SNDTIMEO ? | |
6d49aa6f | 2405 | so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); |
984263bc MD |
2406 | |
2407 | tv.tv_sec = optval / hz; | |
a591f597 | 2408 | tv.tv_usec = (optval % hz) * ustick; |
984263bc | 2409 | error = sooptcopyout(sopt, &tv, sizeof tv); |
b272101a | 2410 | break; |
984263bc | 2411 | |
755c519c SZ |
2412 | case SO_SNDSPACE: |
2413 | optval_l = ssb_space(&so->so_snd); | |
2414 | error = sooptcopyout(sopt, &optval_l, sizeof(optval_l)); | |
2415 | break; | |
2416 | ||
8ba7dcb1 SZ |
2417 | case SO_CPUHINT: |
2418 | optval = -1; /* no hint */ | |
2419 | goto integer; | |
2420 | ||
5c694678 AL |
2421 | case SO_USER_COOKIE: |
2422 | val32 = so->so_user_cookie; | |
2423 | error = sooptcopyout(sopt, &val32, sizeof(val32)); | |
2424 | break; | |
2425 | ||
984263bc MD |
2426 | default: |
2427 | error = ENOPROTOOPT; | |
2428 | break; | |
2429 | } | |
8ba7dcb1 SZ |
2430 | if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) |
2431 | so_pr_ctloutput(so, sopt); | |
984263bc MD |
2432 | return (error); |
2433 | } | |
2434 | } | |
2435 | ||
2436 | /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ | |
2437 | int | |
2438 | soopt_getm(struct sockopt *sopt, struct mbuf **mp) | |
2439 | { | |
2440 | struct mbuf *m, *m_prev; | |
bf6ac9fa JH |
2441 | int sopt_size = sopt->sopt_valsize, msize; |
2442 | ||
b5523eac | 2443 | m = m_getl(sopt_size, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA, |
bf6ac9fa JH |
2444 | 0, &msize); |
2445 | if (m == NULL) | |
2446 | return (ENOBUFS); | |
2447 | m->m_len = min(msize, sopt_size); | |
984263bc MD |
2448 | sopt_size -= m->m_len; |
2449 | *mp = m; | |
2450 | m_prev = m; | |
2451 | ||
bf6ac9fa | 2452 | while (sopt_size > 0) { |
b5523eac | 2453 | m = m_getl(sopt_size, sopt->sopt_td ? M_WAITOK : M_NOWAIT, |
bf6ac9fa JH |
2454 | MT_DATA, 0, &msize); |
2455 | if (m == NULL) { | |
984263bc | 2456 | m_freem(*mp); |
bf6ac9fa | 2457 | return (ENOBUFS); |
984263bc | 2458 | } |
bf6ac9fa | 2459 | m->m_len = min(msize, sopt_size); |
984263bc MD |
2460 | sopt_size -= m->m_len; |
2461 | m_prev->m_next = m; | |
2462 | m_prev = m; | |
2463 | } | |
bf6ac9fa | 2464 | return (0); |
984263bc MD |
2465 | } |
2466 | ||
2467 | /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ | |
2468 | int | |
2469 | soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) | |
de0003fe AE |
2470 | { |
2471 | soopt_to_mbuf(sopt, m); | |
2472 | return 0; | |
2473 | } | |
2474 | ||
2475 | void | |
2476 | soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) | |
984263bc | 2477 | { |
c3e742f9 NT |
2478 | size_t valsize; |
2479 | void *val; | |
984263bc | 2480 | |
792239df | 2481 | KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); |
de0003fe | 2482 | KKASSERT(kva_p(m)); |
984263bc | 2483 | if (sopt->sopt_val == NULL) |
792239df | 2484 | return; |
c3e742f9 NT |
2485 | val = sopt->sopt_val; |
2486 | valsize = sopt->sopt_valsize; | |
2487 | while (m != NULL && valsize >= m->m_len) { | |
de0003fe | 2488 | bcopy(val, mtod(m, char *), m->m_len); |
c3e742f9 NT |
2489 | valsize -= m->m_len; |
2490 | val = (caddr_t)val + m->m_len; | |
984263bc MD |
2491 | m = m->m_next; |
2492 | } | |
2493 | if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ | |
2494 | panic("ip6_sooptmcopyin"); | |
984263bc MD |
2495 | } |
2496 | ||
de0003fe AE |
2497 | /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ |
2498 | int | |
2499 | soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) | |
e71a125f | 2500 | { |
de0003fe | 2501 | return soopt_from_mbuf(sopt, m); |
e71a125f AE |
2502 | } |
2503 | ||
984263bc | 2504 | int |
de0003fe | 2505 | soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) |
984263bc MD |
2506 | { |
2507 | struct mbuf *m0 = m; | |
2508 | size_t valsize = 0; | |
c3e742f9 NT |
2509 | size_t maxsize; |
2510 | void *val; | |
984263bc | 2511 | |
792239df | 2512 | KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); |
de0003fe | 2513 | KKASSERT(kva_p(m)); |
984263bc MD |
2514 | if (sopt->sopt_val == NULL) |
2515 | return 0; | |
c3e742f9 NT |
2516 | val = sopt->sopt_val; |
2517 | maxsize = sopt->sopt_valsize; | |
2518 | while (m != NULL && maxsize >= m->m_len) { | |
de0003fe | 2519 | bcopy(mtod(m, char *), val, m->m_len); |
c3e742f9 NT |
2520 | maxsize -= m->m_len; |
2521 | val = (caddr_t)val + m->m_len; | |
984263bc MD |
2522 | valsize += m->m_len; |
2523 | m = m->m_next; | |
2524 | } | |
2525 | if (m != NULL) { | |
2526 | /* enough soopt buffer should be given from user-land */ | |
2527 | m_freem(m0); | |
bf6ac9fa | 2528 | return (EINVAL); |
984263bc MD |
2529 | } |
2530 | sopt->sopt_valsize = valsize; | |
2531 | return 0; | |
2532 | } | |
2533 | ||
2534 | void | |
c972a82f | 2535 | sohasoutofband(struct socket *so) |
984263bc MD |
2536 | { |
2537 | if (so->so_sigio != NULL) | |
2538 | pgsigio(so->so_sigio, SIGURG, 0); | |
5d83d150 SZ |
2539 | /* |
2540 | * NOTE: | |
2541 | * There is no need to use NOTE_OOB as KNOTE hint here: | |
2542 | * soread filter depends on so_oobmark and SS_RCVATMARK | |
2543 | * so_state. NOTE_OOB would cause unnecessary penalty | |
2544 | * in KNOTE, if there was knote processing contention. | |
2545 | */ | |
2546 | KNOTE(&so->so_rcv.ssb_kq.ki_note, 0); | |
984263bc MD |
2547 | } |
2548 | ||
984263bc MD |
2549 | int |
2550 | sokqfilter(struct file *fp, struct knote *kn) | |
2551 | { | |
2552 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
6d49aa6f | 2553 | struct signalsockbuf *ssb; |
984263bc MD |
2554 | |
2555 | switch (kn->kn_filter) { | |
2556 | case EVFILT_READ: | |
2557 | if (so->so_options & SO_ACCEPTCONN) | |
2558 | kn->kn_fop = &solisten_filtops; | |
2559 | else | |
2560 | kn->kn_fop = &soread_filtops; | |
6d49aa6f | 2561 | ssb = &so->so_rcv; |
984263bc MD |
2562 | break; |
2563 | case EVFILT_WRITE: | |
2564 | kn->kn_fop = &sowrite_filtops; | |
6d49aa6f | 2565 | ssb = &so->so_snd; |
984263bc | 2566 | break; |
73c344d3 SG |
2567 | case EVFILT_EXCEPT: |
2568 | kn->kn_fop = &soexcept_filtops; | |
2569 | ssb = &so->so_rcv; | |
2570 | break; | |
984263bc | 2571 | default: |
b287d649 | 2572 | return (EOPNOTSUPP); |
984263bc MD |
2573 | } |
2574 | ||
5b22f1a7 | 2575 | knote_insert(&ssb->ssb_kq.ki_note, kn); |
14343ad3 | 2576 | atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); |
984263bc MD |
2577 | return (0); |
2578 | } | |
2579 | ||
2580 | static void | |
2581 | filt_sordetach(struct knote *kn) | |
2582 | { | |
2583 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
984263bc | 2584 | |
5b22f1a7 SG |
2585 | knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); |
2586 | if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) | |
14343ad3 | 2587 | atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); |
984263bc MD |
2588 | } |
2589 | ||
2590 | /*ARGSUSED*/ | |
2591 | static int | |
aea5d1bb | 2592 | filt_soread(struct knote *kn, long hint __unused) |
984263bc MD |
2593 | { |
2594 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
2595 | ||
73c344d3 SG |
2596 | if (kn->kn_sfflags & NOTE_OOB) { |
2597 | if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { | |
2598 | kn->kn_fflags |= NOTE_OOB; | |
2599 | return (1); | |
2600 | } | |
2601 | return (0); | |
70a4a30f | 2602 | } |
6d49aa6f | 2603 | kn->kn_data = so->so_rcv.ssb_cc; |
8c4ed426 | 2604 | |
3bcb6e5e SZ |
2605 | if (so->so_state & SS_CANTRCVMORE) { |
2606 | /* | |
2607 | * Only set NODATA if all data has been exhausted. | |
6df899ee MD |
2608 | * |
2609 | * If HUPONLY is flagged, linux only issues the HUP on | |
2610 | * a fully closed socket, not a half-closed socket. | |
2611 | * | |
2612 | * LOWAT is not applicable with a pending EOF. | |
2613 | * | |
2614 | * WARNING: If we issue a spurious event to poll() it will | |
2615 | * de-register the event. | |
3bcb6e5e SZ |
2616 | */ |
2617 | if (kn->kn_data == 0) | |
2618 | kn->kn_flags |= EV_NODATA; | |
b272101a | 2619 | kn->kn_flags |= EV_EOF; |
984263bc | 2620 | kn->kn_fflags = so->so_error; |
6df899ee MD |
2621 | if (so->so_state & SS_CANTSENDMORE) { |
2622 | kn->kn_flags |= EV_HUP; | |
2623 | return (1); | |
2624 | } | |
2625 | if ((kn->kn_sfflags & NOTE_HUPONLY) == 0) | |
2626 | return (1); | |
2627 | return 0; | |
984263bc | 2628 | } |
7eaeff3d | 2629 | if (so->so_error || so->so_rerror) |
984263bc | 2630 | return (1); |
6df899ee MD |
2631 | |
2632 | /* | |
2633 | * Normal operation if HUPONLY is not set. If HUPONLY is set | |
2634 | * we only return positive on EOF/HUP above. | |
2635 | * | |
2636 | * WARNING: If we issue a spurious event to poll() it will de-register | |
2637 | * the event. | |
2638 | */ | |
2639 | if ((kn->kn_sfflags & NOTE_HUPONLY) == 0) { | |
2640 | if (kn->kn_sfflags & NOTE_LOWAT) | |
2641 | return (kn->kn_data >= kn->kn_sdata); | |
2642 | return ((kn->kn_data >= so->so_rcv.ssb_lowat) || | |
2643 | !TAILQ_EMPTY(&so->so_comp)); | |
2644 | } | |
2645 | return 0; | |
984263bc MD |
2646 | } |
2647 | ||
2648 | static void | |
2649 | filt_sowdetach(struct knote *kn) | |
2650 | { | |
2651 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
984263bc | 2652 | |
5b22f1a7 SG |
2653 | knote_remove(&so->so_snd.ssb_kq.ki_note, kn); |
2654 | if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) | |
14343ad3 | 2655 | atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); |
984263bc MD |
2656 | } |
2657 | ||
2658 | /*ARGSUSED*/ | |
2659 | static int | |
aea5d1bb | 2660 | filt_sowrite(struct knote *kn, long hint __unused) |
984263bc MD |
2661 | { |
2662 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
2663 | ||
d54c9898 SZ |
2664 | if (so->so_snd.ssb_flags & SSB_PREALLOC) |
2665 | kn->kn_data = ssb_space_prealloc(&so->so_snd); | |
2666 | else | |
2667 | kn->kn_data = ssb_space(&so->so_snd); | |
2668 | ||
984263bc | 2669 | if (so->so_state & SS_CANTSENDMORE) { |
3bcb6e5e | 2670 | kn->kn_flags |= (EV_EOF | EV_NODATA); |
7323bef7 MD |
2671 | if (so->so_state & SS_CANTRCVMORE) |
2672 | kn->kn_flags |= EV_HUP; | |
984263bc MD |
2673 | kn->kn_fflags = so->so_error; |
2674 | return (1); | |
2675 | } | |
2676 | if (so->so_error) /* temporary udp error */ | |
2677 | return (1); | |
2678 | if (((so->so_state & SS_ISCONNECTED) == 0) && | |
2679 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) | |
2680 | return (0); | |
2681 | if (kn->kn_sfflags & NOTE_LOWAT) | |
2682 | return (kn->kn_data >= kn->kn_sdata); | |
6d49aa6f | 2683 | return (kn->kn_data >= so->so_snd.ssb_lowat); |
984263bc MD |
2684 | } |
2685 | ||
2686 | /*ARGSUSED*/ | |
2687 | static int | |
aea5d1bb | 2688 | filt_solisten(struct knote *kn, long hint __unused) |
984263bc MD |
2689 | { |
2690 | struct socket *so = (struct socket *)kn->kn_fp->f_data; | |
65e531c6 SZ |
2691 | int qlen = so->so_qlen; |
2692 | ||
2693 | if (soavailconn > 0 && qlen > soavailconn) | |
2694 | qlen = soavailconn; | |
2695 | kn->kn_data = qlen; | |
984263bc | 2696 | |
65e531c6 | 2697 | return (!TAILQ_EMPTY(&so->so_comp)); |
984263bc | 2698 | } |