network - Tokenize NFS, fix MP races
[dragonfly.git] / sys / vfs / nfs / nfs_socket.c
CommitLineData
984263bc
MD
1/*
2 * Copyright (c) 1989, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
37 * $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.60.2.6 2003/03/26 01:44:46 alfred Exp $
e5d03018 38 * $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.45 2007/05/18 17:05:13 dillon Exp $
984263bc
MD
39 */
40
41/*
42 * Socket operations for use by nfs
43 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/proc.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/kernel.h>
51#include <sys/mbuf.h>
52#include <sys/vnode.h>
9ba76b73 53#include <sys/fcntl.h>
984263bc 54#include <sys/protosw.h>
e4700d00 55#include <sys/resourcevar.h>
984263bc
MD
56#include <sys/socket.h>
57#include <sys/socketvar.h>
6b6e0885 58#include <sys/socketops.h>
984263bc 59#include <sys/syslog.h>
e4700d00 60#include <sys/thread.h>
984263bc
MD
61#include <sys/tprintf.h>
62#include <sys/sysctl.h>
63#include <sys/signalvar.h>
8684e6f9
MD
64#include <sys/mutex.h>
65
b1b4e5a6 66#include <sys/signal2.h>
8684e6f9 67#include <sys/mutex2.h>
6cef7136 68#include <sys/socketvar2.h>
984263bc
MD
69
70#include <netinet/in.h>
71#include <netinet/tcp.h>
54938b92 72#include <sys/thread2.h>
984263bc 73
1f2de5d4
MD
74#include "rpcv2.h"
75#include "nfsproto.h"
76#include "nfs.h"
77#include "xdr_subs.h"
78#include "nfsm_subs.h"
79#include "nfsmount.h"
80#include "nfsnode.h"
81#include "nfsrtt.h"
984263bc
MD
82
83#define TRUE 1
84#define FALSE 0
85
86/*
f8565b0f
MD
87 * RTT calculations are scaled by 256 (8 bits). A proper fractional
88 * RTT will still be calculated even with a slow NFS timer.
984263bc 89 */
f8565b0f
MD
90#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum]]
91#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum]]
92#define NFS_RTT_SCALE_BITS 8 /* bits */
93#define NFS_RTT_SCALE 256 /* value */
984263bc
MD
94
95/*
96 * Defines which timer to use for the procnum.
97 * 0 - default
98 * 1 - getattr
99 * 2 - lookup
100 * 3 - read
101 * 4 - write
102 */
103static int proct[NFS_NPROCS] = {
cc7d050e
MD
104 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, /* 00-09 */
105 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, /* 10-19 */
106 0, 5, 0, 0, 0, 0, /* 20-29 */
107};
108
109static int multt[NFS_NPROCS] = {
110 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-09 */
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10-19 */
112 1, 2, 1, 1, 1, 1, /* 20-29 */
984263bc
MD
113};
114
f8565b0f 115static int nfs_backoff[8] = { 2, 3, 5, 8, 13, 21, 34, 55 };
984263bc
MD
116static int nfs_realign_test;
117static int nfs_realign_count;
f8565b0f
MD
118static int nfs_showrtt;
119static int nfs_showrexmit;
cc7d050e 120int nfs_maxasyncbio = NFS_MAXASYNCBIO;
984263bc
MD
121
122SYSCTL_DECL(_vfs_nfs);
123
124SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
125SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
f8565b0f
MD
126SYSCTL_INT(_vfs_nfs, OID_AUTO, showrtt, CTLFLAG_RW, &nfs_showrtt, 0, "");
127SYSCTL_INT(_vfs_nfs, OID_AUTO, showrexmit, CTLFLAG_RW, &nfs_showrexmit, 0, "");
cc7d050e 128SYSCTL_INT(_vfs_nfs, OID_AUTO, maxasyncbio, CTLFLAG_RW, &nfs_maxasyncbio, 0, "");
984263bc 129
92540a7e
MD
130static int nfs_request_setup(nfsm_info_t info);
131static int nfs_request_auth(struct nfsreq *rep);
132static int nfs_request_try(struct nfsreq *rep);
133static int nfs_request_waitreply(struct nfsreq *rep);
134static int nfs_request_processreply(nfsm_info_t info, int);
984263bc 135
984263bc
MD
136int nfsrtton = 0;
137struct nfsrtt nfsrtt;
f786cc86 138struct callout nfs_timer_handle;
984263bc 139
a6ee311a 140static int nfs_msg (struct thread *,char *,char *);
edb90c22
MD
141static int nfs_rcvlock (struct nfsmount *nmp, struct nfsreq *myreq);
142static void nfs_rcvunlock (struct nfsmount *nmp);
a6ee311a 143static void nfs_realign (struct mbuf **pm, int hsiz);
edb90c22
MD
144static int nfs_receive (struct nfsmount *nmp, struct nfsreq *rep,
145 struct sockaddr **aname, struct mbuf **mp);
f8565b0f
MD
146static void nfs_softterm (struct nfsreq *rep, int islocked);
147static void nfs_hardterm (struct nfsreq *rep, int islocked);
edb90c22 148static int nfs_reconnect (struct nfsmount *nmp, struct nfsreq *rep);
984263bc 149#ifndef NFS_NOSERVER
52553028 150static int nfsrv_getstream (struct nfssvc_sock *, int, int *);
e21aec5b 151static void nfs_timer_req(struct nfsreq *req);
7f3ffbb4 152static void nfs_checkpkt(struct mbuf *m, int len);
984263bc 153
a6ee311a 154int (*nfsrv3_procs[NFS_NPROCS]) (struct nfsrv_descript *nd,
984263bc 155 struct nfssvc_sock *slp,
dadab5e9 156 struct thread *td,
a6ee311a 157 struct mbuf **mreqp) = {
984263bc
MD
158 nfsrv_null,
159 nfsrv_getattr,
160 nfsrv_setattr,
161 nfsrv_lookup,
162 nfsrv3_access,
163 nfsrv_readlink,
164 nfsrv_read,
165 nfsrv_write,
166 nfsrv_create,
167 nfsrv_mkdir,
168 nfsrv_symlink,
169 nfsrv_mknod,
170 nfsrv_remove,
171 nfsrv_rmdir,
172 nfsrv_rename,
173 nfsrv_link,
174 nfsrv_readdir,
175 nfsrv_readdirplus,
176 nfsrv_statfs,
177 nfsrv_fsinfo,
178 nfsrv_pathconf,
179 nfsrv_commit,
e07fef60
MD
180 nfsrv_noop,
181 nfsrv_noop,
984263bc
MD
182 nfsrv_noop,
183 nfsrv_noop
184};
185#endif /* NFS_NOSERVER */
186
187/*
188 * Initialize sockets and congestion for a new NFS connection.
189 * We do not free the sockaddr if error.
190 */
191int
dadab5e9 192nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
984263bc 193{
dadab5e9 194 struct socket *so;
cc7d050e 195 int error;
984263bc
MD
196 struct sockaddr *saddr;
197 struct sockaddr_in *sin;
dadab5e9 198 struct thread *td = &thread0; /* only used for socreate and sobind */
984263bc 199
978c934b 200 nmp->nm_so = so = NULL;
d9adbeaf
MD
201 if (nmp->nm_flag & NFSMNT_FORCE)
202 return (EINVAL);
984263bc 203 saddr = nmp->nm_nam;
978c934b 204 error = socreate(saddr->sa_family, &so, nmp->nm_sotype,
dadab5e9 205 nmp->nm_soproto, td);
984263bc
MD
206 if (error)
207 goto bad;
984263bc
MD
208 nmp->nm_soflags = so->so_proto->pr_flags;
209
210 /*
211 * Some servers require that the client port be a reserved port number.
212 */
213 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
214 struct sockopt sopt;
215 int ip;
216 struct sockaddr_in ssin;
217
218 bzero(&sopt, sizeof sopt);
219 ip = IP_PORTRANGE_LOW;
984263bc
MD
220 sopt.sopt_level = IPPROTO_IP;
221 sopt.sopt_name = IP_PORTRANGE;
222 sopt.sopt_val = (void *)&ip;
223 sopt.sopt_valsize = sizeof(ip);
dadab5e9 224 sopt.sopt_td = NULL;
984263bc
MD
225 error = sosetopt(so, &sopt);
226 if (error)
227 goto bad;
228 bzero(&ssin, sizeof ssin);
229 sin = &ssin;
230 sin->sin_len = sizeof (struct sockaddr_in);
231 sin->sin_family = AF_INET;
232 sin->sin_addr.s_addr = INADDR_ANY;
233 sin->sin_port = htons(0);
dadab5e9 234 error = sobind(so, (struct sockaddr *)sin, td);
984263bc
MD
235 if (error)
236 goto bad;
237 bzero(&sopt, sizeof sopt);
238 ip = IP_PORTRANGE_DEFAULT;
984263bc
MD
239 sopt.sopt_level = IPPROTO_IP;
240 sopt.sopt_name = IP_PORTRANGE;
241 sopt.sopt_val = (void *)&ip;
242 sopt.sopt_valsize = sizeof(ip);
dadab5e9 243 sopt.sopt_td = NULL;
984263bc
MD
244 error = sosetopt(so, &sopt);
245 if (error)
246 goto bad;
247 }
248
249 /*
250 * Protocols that do not require connections may be optionally left
251 * unconnected for servers that reply from a port other than NFS_PORT.
252 */
253 if (nmp->nm_flag & NFSMNT_NOCONN) {
254 if (nmp->nm_soflags & PR_CONNREQUIRED) {
255 error = ENOTCONN;
256 goto bad;
257 }
258 } else {
dadab5e9 259 error = soconnect(so, nmp->nm_nam, td);
984263bc
MD
260 if (error)
261 goto bad;
262
263 /*
264 * Wait for the connection to complete. Cribbed from the
265 * connect system call but with the wait timing out so
266 * that interruptible mounts don't hang here for a long time.
267 */
165dba55 268 crit_enter();
984263bc 269 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
377d4740 270 (void) tsleep((caddr_t)&so->so_timeo, 0,
984263bc
MD
271 "nfscon", 2 * hz);
272 if ((so->so_state & SS_ISCONNECTING) &&
273 so->so_error == 0 && rep &&
dadab5e9 274 (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0){
6cef7136 275 soclrstate(so, SS_ISCONNECTING);
165dba55 276 crit_exit();
984263bc
MD
277 goto bad;
278 }
279 }
280 if (so->so_error) {
281 error = so->so_error;
282 so->so_error = 0;
165dba55 283 crit_exit();
984263bc
MD
284 goto bad;
285 }
165dba55 286 crit_exit();
984263bc 287 }
6d49aa6f
MD
288 so->so_rcv.ssb_timeo = (5 * hz);
289 so->so_snd.ssb_timeo = (5 * hz);
984263bc
MD
290
291 /*
292 * Get buffer reservation size from sysctl, but impose reasonable
293 * limits.
294 */
cc7d050e 295 if (nmp->nm_sotype == SOCK_STREAM) {
984263bc
MD
296 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
297 struct sockopt sopt;
298 int val;
299
300 bzero(&sopt, sizeof sopt);
301 sopt.sopt_level = SOL_SOCKET;
302 sopt.sopt_name = SO_KEEPALIVE;
303 sopt.sopt_val = &val;
304 sopt.sopt_valsize = sizeof val;
305 val = 1;
306 sosetopt(so, &sopt);
307 }
308 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
309 struct sockopt sopt;
310 int val;
311
312 bzero(&sopt, sizeof sopt);
313 sopt.sopt_level = IPPROTO_TCP;
314 sopt.sopt_name = TCP_NODELAY;
315 sopt.sopt_val = &val;
316 sopt.sopt_valsize = sizeof val;
317 val = 1;
318 sosetopt(so, &sopt);
319 }
984263bc 320 }
cc7d050e 321 error = soreserve(so, nfs_soreserve, nfs_soreserve, NULL);
984263bc
MD
322 if (error)
323 goto bad;
14343ad3
MD
324 atomic_set_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
325 atomic_set_int(&so->so_snd.ssb_flags, SSB_NOINTR);
984263bc
MD
326
327 /* Initialize other non-zero congestion variables */
328 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
f8565b0f 329 nmp->nm_srtt[3] = (NFS_TIMEO << NFS_RTT_SCALE_BITS);
984263bc
MD
330 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
331 nmp->nm_sdrtt[3] = 0;
f8565b0f 332 nmp->nm_maxasync_scaled = NFS_MINASYNC_SCALED;
984263bc 333 nmp->nm_timeouts = 0;
978c934b
MD
334
335 /*
336 * Assign nm_so last. The moment nm_so is assigned the nfs_timer()
337 * can mess with the socket.
338 */
339 nmp->nm_so = so;
984263bc
MD
340 return (0);
341
342bad:
978c934b
MD
343 if (so) {
344 soshutdown(so, SHUT_RDWR);
345 soclose(so, FNONBLOCK);
346 }
984263bc
MD
347 return (error);
348}
349
350/*
351 * Reconnect routine:
352 * Called when a connection is broken on a reliable protocol.
353 * - clean up the old socket
354 * - nfs_connect() again
f8565b0f 355 * - set R_NEEDSXMIT for all outstanding requests on mount point
984263bc
MD
356 * If this fails the mount point is DEAD!
357 * nb: Must be called with the nfs_sndlock() set on the mount point.
358 */
359static int
edb90c22 360nfs_reconnect(struct nfsmount *nmp, struct nfsreq *rep)
984263bc 361{
e21aec5b 362 struct nfsreq *req;
984263bc
MD
363 int error;
364
365 nfs_disconnect(nmp);
a63246d1
MD
366 if (nmp->nm_rxstate >= NFSSVC_STOPPING)
367 return (EINTR);
984263bc
MD
368 while ((error = nfs_connect(nmp, rep)) != 0) {
369 if (error == EINTR || error == ERESTART)
370 return (EINTR);
d9adbeaf
MD
371 if (error == EINVAL)
372 return (error);
a63246d1
MD
373 if (nmp->nm_rxstate >= NFSSVC_STOPPING)
374 return (EINTR);
377d4740 375 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
984263bc
MD
376 }
377
378 /*
379 * Loop through outstanding request list and fix up all requests
380 * on old socket.
381 */
91f46891 382 crit_enter();
e21aec5b
MD
383 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
384 KKASSERT(req->r_nmp == nmp);
f8565b0f 385 req->r_flags |= R_NEEDSXMIT;
984263bc 386 }
91f46891 387 crit_exit();
984263bc
MD
388 return (0);
389}
390
391/*
392 * NFS disconnect. Clean up and unlink.
393 */
394void
e851b29e 395nfs_disconnect(struct nfsmount *nmp)
984263bc 396{
40393ded 397 struct socket *so;
984263bc
MD
398
399 if (nmp->nm_so) {
400 so = nmp->nm_so;
60233e58 401 nmp->nm_so = NULL;
e5d03018 402 soshutdown(so, SHUT_RDWR);
9ba76b73 403 soclose(so, FNONBLOCK);
984263bc
MD
404 }
405}
406
407void
e851b29e 408nfs_safedisconnect(struct nfsmount *nmp)
984263bc 409{
edb90c22 410 nfs_rcvlock(nmp, NULL);
984263bc 411 nfs_disconnect(nmp);
edb90c22 412 nfs_rcvunlock(nmp);
984263bc
MD
413}
414
415/*
416 * This is the nfs send routine. For connection based socket types, it
417 * must be called with an nfs_sndlock() on the socket.
418 * "rep == NULL" indicates that it has been called from a server.
419 * For the client side:
420 * - return EINTR if the RPC is terminated, 0 otherwise
f8565b0f 421 * - set R_NEEDSXMIT if the send fails for any reason
984263bc
MD
422 * - do any cleanup required by recoverable socket errors (?)
423 * For the server side:
424 * - return EINTR or ERESTART if interrupted by a signal
425 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
426 * - do any cleanup required by recoverable socket errors (?)
427 */
428int
e851b29e
CP
429nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
430 struct nfsreq *rep)
984263bc
MD
431{
432 struct sockaddr *sendnam;
433 int error, soflags, flags;
434
435 if (rep) {
436 if (rep->r_flags & R_SOFTTERM) {
437 m_freem(top);
438 return (EINTR);
439 }
440 if ((so = rep->r_nmp->nm_so) == NULL) {
f8565b0f 441 rep->r_flags |= R_NEEDSXMIT;
984263bc
MD
442 m_freem(top);
443 return (0);
444 }
f8565b0f 445 rep->r_flags &= ~R_NEEDSXMIT;
984263bc 446 soflags = rep->r_nmp->nm_soflags;
f8565b0f 447 } else {
984263bc 448 soflags = so->so_proto->pr_flags;
f8565b0f 449 }
984263bc 450 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
60233e58 451 sendnam = NULL;
984263bc
MD
452 else
453 sendnam = nam;
454 if (so->so_type == SOCK_SEQPACKET)
455 flags = MSG_EOR;
456 else
457 flags = 0;
458
7f3ffbb4
MD
459 /*
460 * calls pru_sosend -> sosend -> so_pru_send -> netrpc
461 */
6b6e0885 462 error = so_pru_sosend(so, sendnam, NULL, top, NULL, flags,
7f3ffbb4 463 curthread /*XXX*/);
984263bc
MD
464 /*
465 * ENOBUFS for dgram sockets is transient and non fatal.
466 * No need to log, and no need to break a soft mount.
467 */
468 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
469 error = 0;
f8565b0f
MD
470 /*
471 * do backoff retransmit on client
472 */
cc7d050e
MD
473 if (rep) {
474 if ((rep->r_nmp->nm_state & NFSSTA_SENDSPACE) == 0) {
475 rep->r_nmp->nm_state |= NFSSTA_SENDSPACE;
476 kprintf("Warning: NFS: Insufficient sendspace "
477 "(%lu),\n"
478 "\t You must increase vfs.nfs.soreserve"
479 "or decrease vfs.nfs.maxasyncbio\n",
480 so->so_snd.ssb_hiwat);
481 }
f8565b0f 482 rep->r_flags |= R_NEEDSXMIT;
cc7d050e 483 }
984263bc
MD
484 }
485
486 if (error) {
487 if (rep) {
488 log(LOG_INFO, "nfs send error %d for server %s\n",error,
489 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
490 /*
491 * Deal with errors for the client side.
492 */
493 if (rep->r_flags & R_SOFTTERM)
494 error = EINTR;
495 else
f8565b0f
MD
496 rep->r_flags |= R_NEEDSXMIT;
497 } else {
984263bc 498 log(LOG_INFO, "nfsd send error %d\n", error);
f8565b0f 499 }
984263bc
MD
500
501 /*
502 * Handle any recoverable (soft) socket errors here. (?)
503 */
504 if (error != EINTR && error != ERESTART &&
505 error != EWOULDBLOCK && error != EPIPE)
506 error = 0;
507 }
508 return (error);
509}
510
511/*
512 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
513 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
514 * Mark and consolidate the data into a new mbuf list.
515 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
516 * small mbufs.
517 * For SOCK_STREAM we must be very careful to read an entire record once
518 * we have read any of it, even if the system call has been interrupted.
519 */
520static int
edb90c22
MD
521nfs_receive(struct nfsmount *nmp, struct nfsreq *rep,
522 struct sockaddr **aname, struct mbuf **mp)
984263bc 523{
40393ded 524 struct socket *so;
6d49aa6f 525 struct sockbuf sio;
984263bc
MD
526 struct uio auio;
527 struct iovec aio;
40393ded 528 struct mbuf *m;
984263bc
MD
529 struct mbuf *control;
530 u_int32_t len;
531 struct sockaddr **getnam;
532 int error, sotype, rcvflg;
dadab5e9 533 struct thread *td = curthread; /* XXX */
984263bc
MD
534
535 /*
536 * Set up arguments for soreceive()
537 */
d8a9a23b
MD
538 *mp = NULL;
539 *aname = NULL;
edb90c22 540 sotype = nmp->nm_sotype;
984263bc
MD
541
542 /*
543 * For reliable protocols, lock against other senders/receivers
544 * in case a reconnect is necessary.
545 * For SOCK_STREAM, first get the Record Mark to find out how much
546 * more there is to get.
547 * We must lock the socket against other receivers
548 * until we have an entire rpc request/reply.
549 */
550 if (sotype != SOCK_DGRAM) {
edb90c22 551 error = nfs_sndlock(nmp, rep);
984263bc
MD
552 if (error)
553 return (error);
554tryagain:
555 /*
556 * Check for fatal errors and resending request.
557 */
558 /*
559 * Ugh: If a reconnect attempt just happened, nm_so
560 * would have changed. NULL indicates a failed
561 * attempt that has essentially shut down this
562 * mount point.
563 */
edb90c22
MD
564 if (rep && (rep->r_mrep || (rep->r_flags & R_SOFTTERM))) {
565 nfs_sndunlock(nmp);
984263bc
MD
566 return (EINTR);
567 }
edb90c22
MD
568 so = nmp->nm_so;
569 if (so == NULL) {
570 error = nfs_reconnect(nmp, rep);
984263bc 571 if (error) {
edb90c22 572 nfs_sndunlock(nmp);
984263bc
MD
573 return (error);
574 }
575 goto tryagain;
576 }
f8565b0f 577 while (rep && (rep->r_flags & R_NEEDSXMIT)) {
74f1caca 578 m = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT);
984263bc
MD
579 nfsstats.rpcretries++;
580 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
581 if (error) {
582 if (error == EINTR || error == ERESTART ||
edb90c22
MD
583 (error = nfs_reconnect(nmp, rep)) != 0) {
584 nfs_sndunlock(nmp);
984263bc
MD
585 return (error);
586 }
587 goto tryagain;
588 }
589 }
edb90c22 590 nfs_sndunlock(nmp);
984263bc 591 if (sotype == SOCK_STREAM) {
d8a9a23b
MD
592 /*
593 * Get the length marker from the stream
594 */
595 aio.iov_base = (caddr_t)&len;
984263bc
MD
596 aio.iov_len = sizeof(u_int32_t);
597 auio.uio_iov = &aio;
598 auio.uio_iovcnt = 1;
599 auio.uio_segflg = UIO_SYSSPACE;
600 auio.uio_rw = UIO_READ;
601 auio.uio_offset = 0;
602 auio.uio_resid = sizeof(u_int32_t);
dadab5e9 603 auio.uio_td = td;
984263bc
MD
604 do {
605 rcvflg = MSG_WAITALL;
6b6e0885 606 error = so_pru_soreceive(so, NULL, &auio, NULL,
d8a9a23b 607 NULL, &rcvflg);
984263bc
MD
608 if (error == EWOULDBLOCK && rep) {
609 if (rep->r_flags & R_SOFTTERM)
610 return (EINTR);
611 }
612 } while (error == EWOULDBLOCK);
d8a9a23b
MD
613
614 if (error == 0 && auio.uio_resid > 0) {
984263bc 615 /*
d8a9a23b 616 * Only log short packets if not EOF
984263bc 617 */
d8a9a23b 618 if (auio.uio_resid != sizeof(u_int32_t))
984263bc
MD
619 log(LOG_INFO,
620 "short receive (%d/%d) from nfs server %s\n",
621 (int)(sizeof(u_int32_t) - auio.uio_resid),
622 (int)sizeof(u_int32_t),
edb90c22 623 nmp->nm_mountp->mnt_stat.f_mntfromname);
984263bc
MD
624 error = EPIPE;
625 }
626 if (error)
627 goto errout;
628 len = ntohl(len) & ~0x80000000;
629 /*
630 * This is SERIOUS! We are out of sync with the sender
631 * and forcing a disconnect/reconnect is all I can do.
632 */
633 if (len > NFS_MAXPACKET) {
634 log(LOG_ERR, "%s (%d) from nfs server %s\n",
635 "impossible packet length",
636 len,
edb90c22 637 nmp->nm_mountp->mnt_stat.f_mntfromname);
984263bc
MD
638 error = EFBIG;
639 goto errout;
640 }
d8a9a23b
MD
641
642 /*
643 * Get the rest of the packet as an mbuf chain
644 */
6d49aa6f 645 sbinit(&sio, len);
984263bc
MD
646 do {
647 rcvflg = MSG_WAITALL;
d8a9a23b
MD
648 error = so_pru_soreceive(so, NULL, NULL, &sio,
649 NULL, &rcvflg);
984263bc
MD
650 } while (error == EWOULDBLOCK || error == EINTR ||
651 error == ERESTART);
6d49aa6f
MD
652 if (error == 0 && sio.sb_cc != len) {
653 if (sio.sb_cc != 0)
984263bc 654 log(LOG_INFO,
bfc09ba0
MD
655 "short receive (%zu/%d) from nfs server %s\n",
656 (size_t)len - auio.uio_resid, len,
edb90c22 657 nmp->nm_mountp->mnt_stat.f_mntfromname);
984263bc
MD
658 error = EPIPE;
659 }
6d49aa6f 660 *mp = sio.sb_mb;
984263bc
MD
661 } else {
662 /*
d8a9a23b
MD
663 * Non-stream, so get the whole packet by not
664 * specifying MSG_WAITALL and by specifying a large
665 * length.
666 *
984263bc
MD
667 * We have no use for control msg., but must grab them
668 * and then throw them away so we know what is going
669 * on.
670 */
6d49aa6f 671 sbinit(&sio, 100000000);
984263bc
MD
672 do {
673 rcvflg = 0;
d8a9a23b
MD
674 error = so_pru_soreceive(so, NULL, NULL, &sio,
675 &control, &rcvflg);
984263bc
MD
676 if (control)
677 m_freem(control);
678 if (error == EWOULDBLOCK && rep) {
d8a9a23b 679 if (rep->r_flags & R_SOFTTERM) {
6d49aa6f 680 m_freem(sio.sb_mb);
984263bc 681 return (EINTR);
d8a9a23b 682 }
984263bc
MD
683 }
684 } while (error == EWOULDBLOCK ||
6d49aa6f 685 (error == 0 && sio.sb_mb == NULL && control));
984263bc 686 if ((rcvflg & MSG_EOR) == 0)
086c1d7e 687 kprintf("Egad!!\n");
6d49aa6f 688 if (error == 0 && sio.sb_mb == NULL)
984263bc 689 error = EPIPE;
6d49aa6f
MD
690 len = sio.sb_cc;
691 *mp = sio.sb_mb;
984263bc
MD
692 }
693errout:
694 if (error && error != EINTR && error != ERESTART) {
695 m_freem(*mp);
d8a9a23b
MD
696 *mp = NULL;
697 if (error != EPIPE) {
984263bc
MD
698 log(LOG_INFO,
699 "receive error %d from nfs server %s\n",
700 error,
edb90c22 701 nmp->nm_mountp->mnt_stat.f_mntfromname);
d8a9a23b 702 }
edb90c22 703 error = nfs_sndlock(nmp, rep);
984263bc 704 if (!error) {
edb90c22 705 error = nfs_reconnect(nmp, rep);
984263bc
MD
706 if (!error)
707 goto tryagain;
708 else
edb90c22 709 nfs_sndunlock(nmp);
984263bc
MD
710 }
711 }
712 } else {
edb90c22 713 if ((so = nmp->nm_so) == NULL)
984263bc
MD
714 return (EACCES);
715 if (so->so_state & SS_ISCONNECTED)
d8a9a23b 716 getnam = NULL;
984263bc
MD
717 else
718 getnam = aname;
6d49aa6f 719 sbinit(&sio, 100000000);
984263bc
MD
720 do {
721 rcvflg = 0;
d8a9a23b
MD
722 error = so_pru_soreceive(so, getnam, NULL, &sio,
723 NULL, &rcvflg);
edb90c22 724 if (error == EWOULDBLOCK && rep &&
d8a9a23b 725 (rep->r_flags & R_SOFTTERM)) {
6d49aa6f 726 m_freem(sio.sb_mb);
984263bc 727 return (EINTR);
d8a9a23b 728 }
984263bc 729 } while (error == EWOULDBLOCK);
524e83aa 730
6d49aa6f
MD
731 len = sio.sb_cc;
732 *mp = sio.sb_mb;
524e83aa
MD
733
734 /*
735 * A shutdown may result in no error and no mbuf.
736 * Convert to EPIPE.
737 */
738 if (*mp == NULL && error == 0)
739 error = EPIPE;
984263bc
MD
740 }
741 if (error) {
742 m_freem(*mp);
d8a9a23b 743 *mp = NULL;
984263bc 744 }
524e83aa 745
984263bc
MD
746 /*
747 * Search for any mbufs that are not a multiple of 4 bytes long
748 * or with m_data not longword aligned.
749 * These could cause pointer alignment problems, so copy them to
750 * well aligned mbufs.
751 */
752 nfs_realign(mp, 5 * NFSX_UNSIGNED);
753 return (error);
754}
755
756/*
757 * Implement receipt of reply on a socket.
edb90c22 758 *
984263bc
MD
759 * We must search through the list of received datagrams matching them
760 * with outstanding requests using the xid, until ours is found.
edb90c22
MD
761 *
762 * If myrep is NULL we process packets on the socket until
763 * interrupted or until nm_reqrxq is non-empty.
984263bc
MD
764 */
765/* ARGSUSED */
766int
edb90c22 767nfs_reply(struct nfsmount *nmp, struct nfsreq *myrep)
984263bc 768{
40393ded 769 struct nfsreq *rep;
984263bc 770 struct sockaddr *nam;
42edf14f
MD
771 u_int32_t rxid;
772 u_int32_t *tl;
984263bc 773 int error;
42edf14f 774 struct nfsm_info info;
984263bc
MD
775
776 /*
777 * Loop around until we get our own reply
778 */
779 for (;;) {
780 /*
781 * Lock against other receivers so that I don't get stuck in
782 * sbwait() after someone else has received my reply for me.
783 * Also necessary for connection based protocols to avoid
784 * race conditions during a reconnect.
8684e6f9 785 *
984263bc
MD
786 * If nfs_rcvlock() returns EALREADY, that means that
787 * the reply has already been recieved by another
788 * process and we can return immediately. In this
789 * case, the lock is not taken to avoid races with
790 * other processes.
791 */
42edf14f
MD
792 info.mrep = NULL;
793
edb90c22 794 error = nfs_rcvlock(nmp, myrep);
984263bc
MD
795 if (error == EALREADY)
796 return (0);
797 if (error)
798 return (error);
edb90c22
MD
799
800 /*
801 * If myrep is NULL we are the receiver helper thread.
802 * Stop waiting for incoming replies if there are
13ddc895
MD
803 * messages sitting on reqrxq that we need to process,
804 * or if a shutdown request is pending.
edb90c22 805 */
13ddc895
MD
806 if (myrep == NULL && (TAILQ_FIRST(&nmp->nm_reqrxq) ||
807 nmp->nm_rxstate > NFSSVC_PENDING)) {
edb90c22
MD
808 nfs_rcvunlock(nmp);
809 return(EWOULDBLOCK);
810 }
811
984263bc
MD
812 /*
813 * Get the next Rpc reply off the socket
f8565b0f
MD
814 *
815 * We cannot release the receive lock until we've
816 * filled in rep->r_mrep, otherwise a waiting
817 * thread may deadlock in soreceive with no incoming
818 * packets expected.
984263bc 819 */
edb90c22 820 error = nfs_receive(nmp, myrep, &nam, &info.mrep);
984263bc 821 if (error) {
984263bc
MD
822 /*
823 * Ignore routing errors on connectionless protocols??
824 */
f8565b0f 825 nfs_rcvunlock(nmp);
984263bc 826 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
edb90c22
MD
827 if (nmp->nm_so == NULL)
828 return (error);
984263bc 829 nmp->nm_so->so_error = 0;
984263bc
MD
830 continue;
831 }
832 return (error);
833 }
834 if (nam)
835 FREE(nam, M_SONAME);
836
837 /*
838 * Get the xid and check that it is an rpc reply
839 */
42edf14f
MD
840 info.md = info.mrep;
841 info.dpos = mtod(info.md, caddr_t);
842 NULLOUT(tl = nfsm_dissect(&info, 2*NFSX_UNSIGNED));
984263bc
MD
843 rxid = *tl++;
844 if (*tl != rpc_reply) {
984263bc 845 nfsstats.rpcinvalid++;
42edf14f
MD
846 m_freem(info.mrep);
847 info.mrep = NULL;
984263bc 848nfsmout:
f8565b0f 849 nfs_rcvunlock(nmp);
984263bc
MD
850 continue;
851 }
852
853 /*
854 * Loop through the request list to match up the reply
91f46891
MD
855 * Iff no match, just drop the datagram. On match, set
856 * r_mrep atomically to prevent the timer from messing
857 * around with the request after we have exited the critical
858 * section.
984263bc 859 */
91f46891 860 crit_enter();
e21aec5b 861 TAILQ_FOREACH(rep, &nmp->nm_reqq, r_chain) {
8684e6f9 862 if (rep->r_mrep == NULL && rxid == rep->r_xid)
91f46891 863 break;
91f46891 864 }
91f46891
MD
865
866 /*
867 * Fill in the rest of the reply if we found a match.
cc7d050e
MD
868 *
869 * Deal with duplicate responses if there was no match.
91f46891
MD
870 */
871 if (rep) {
42edf14f
MD
872 rep->r_md = info.md;
873 rep->r_dpos = info.dpos;
91f46891
MD
874 if (nfsrtton) {
875 struct rttl *rt;
876
877 rt = &nfsrtt.rttl[nfsrtt.pos];
878 rt->proc = rep->r_procnum;
f8565b0f
MD
879 rt->rto = 0;
880 rt->sent = 0;
881 rt->cwnd = nmp->nm_maxasync_scaled;
91f46891
MD
882 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
883 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
884 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
885 getmicrotime(&rt->tstamp);
886 if (rep->r_flags & R_TIMING)
887 rt->rtt = rep->r_rtt;
888 else
889 rt->rtt = 1000000;
890 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
891 }
f8565b0f 892
91f46891 893 /*
f8565b0f
MD
894 * New congestion control is based only on async
895 * requests.
91f46891 896 */
f8565b0f
MD
897 if (nmp->nm_maxasync_scaled < NFS_MAXASYNC_SCALED)
898 ++nmp->nm_maxasync_scaled;
91f46891
MD
899 if (rep->r_flags & R_SENT) {
900 rep->r_flags &= ~R_SENT;
91f46891 901 }
91f46891
MD
902 /*
903 * Update rtt using a gain of 0.125 on the mean
904 * and a gain of 0.25 on the deviation.
f8565b0f
MD
905 *
906 * NOTE SRTT/SDRTT are only good if R_TIMING is set.
91f46891 907 */
cc7d050e 908 if ((rep->r_flags & R_TIMING) && rep->r_rexmit == 0) {
984263bc 909 /*
91f46891
MD
910 * Since the timer resolution of
911 * NFS_HZ is so course, it can often
912 * result in r_rtt == 0. Since
913 * r_rtt == N means that the actual
914 * rtt is between N+dt and N+2-dt ticks,
915 * add 1.
984263bc 916 */
f8565b0f
MD
917 int n;
918 int d;
edb90c22 919
f8565b0f
MD
920#define NFSRSB NFS_RTT_SCALE_BITS
921 n = ((NFS_SRTT(rep) * 7) +
922 (rep->r_rtt << NFSRSB)) >> 3;
923 d = n - NFS_SRTT(rep);
924 NFS_SRTT(rep) = n;
edb90c22 925
f8565b0f
MD
926 /*
927 * Don't let the jitter calculation decay
928 * too quickly, but we want a fast rampup.
929 */
930 if (d < 0)
931 d = -d;
932 d <<= NFSRSB;
933 if (d < NFS_SDRTT(rep))
934 n = ((NFS_SDRTT(rep) * 15) + d) >> 4;
935 else
936 n = ((NFS_SDRTT(rep) * 3) + d) >> 2;
937 NFS_SDRTT(rep) = n;
938#undef NFSRSB
edb90c22 939 }
f8565b0f
MD
940 nmp->nm_timeouts = 0;
941 rep->r_mrep = info.mrep;
942 nfs_hardterm(rep, 0);
cc7d050e
MD
943 } else {
944 /*
945 * Extract vers, prog, nfsver, procnum. A duplicate
946 * response means we didn't wait long enough so
947 * we increase the SRTT to avoid future spurious
948 * timeouts.
949 */
950 u_int procnum = nmp->nm_lastreprocnum;
951 int n;
952
953 if (procnum < NFS_NPROCS && proct[procnum]) {
954 if (nfs_showrexmit)
955 kprintf("D");
956 n = nmp->nm_srtt[proct[procnum]];
957 n += NFS_ASYSCALE * NFS_HZ;
958 if (n < NFS_ASYSCALE * NFS_HZ * 10)
959 n = NFS_ASYSCALE * NFS_HZ * 10;
960 nmp->nm_srtt[proct[procnum]] = n;
961 }
984263bc 962 }
f8565b0f
MD
963 nfs_rcvunlock(nmp);
964 crit_exit();
965
984263bc
MD
966 /*
967 * If not matched to a request, drop it.
968 * If it's mine, get out.
969 */
91f46891 970 if (rep == NULL) {
984263bc 971 nfsstats.rpcunexpected++;
42edf14f
MD
972 m_freem(info.mrep);
973 info.mrep = NULL;
984263bc
MD
974 } else if (rep == myrep) {
975 if (rep->r_mrep == NULL)
976 panic("nfsreply nil");
977 return (0);
978 }
984263bc
MD
979 }
980}
981
92540a7e
MD
982/*
983 * Run the request state machine until the target state is reached
984 * or a fatal error occurs. The target state is not run. Specifying
985 * a target of NFSM_STATE_DONE runs the state machine until the rpc
986 * is complete.
987 *
988 * EINPROGRESS is returned for all states other then the DONE state,
989 * indicating that the rpc is still in progress.
990 */
8684e6f9 991int
edb90c22 992nfs_request(struct nfsm_info *info, nfsm_state_t bstate, nfsm_state_t estate)
8684e6f9 993{
92540a7e 994 struct nfsreq *req;
8684e6f9 995
edb90c22 996 while (info->state >= bstate && info->state < estate) {
92540a7e
MD
997 switch(info->state) {
998 case NFSM_STATE_SETUP:
999 /*
1000 * Setup the nfsreq. Any error which occurs during
1001 * this state is fatal.
1002 */
1003 info->error = nfs_request_setup(info);
1004 if (info->error) {
1005 info->state = NFSM_STATE_DONE;
1006 return (info->error);
1007 } else {
1008 req = info->req;
1009 req->r_mrp = &info->mrep;
1010 req->r_mdp = &info->md;
1011 req->r_dposp = &info->dpos;
1012 info->state = NFSM_STATE_AUTH;
1013 }
1014 break;
1015 case NFSM_STATE_AUTH:
1016 /*
1017 * Authenticate the nfsreq. Any error which occurs
1018 * during this state is fatal.
1019 */
1020 info->error = nfs_request_auth(info->req);
1021 if (info->error) {
1022 info->state = NFSM_STATE_DONE;
1023 return (info->error);
1024 } else {
1025 info->state = NFSM_STATE_TRY;
1026 }
1027 break;
1028 case NFSM_STATE_TRY:
1029 /*
1030 * Transmit or retransmit attempt. An error in this
1031 * state is ignored and we always move on to the
1032 * next state.
edb90c22
MD
1033 *
1034 * This can trivially race the receiver if the
f8565b0f
MD
1035 * request is asynchronous. nfs_request_try()
1036 * will thus set the state for us and we
1037 * must also return immediately if we are
1038 * running an async state machine, because
1039 * info can become invalid due to races after
1040 * try() returns.
edb90c22 1041 */
f8565b0f
MD
1042 if (info->req->r_flags & R_ASYNC) {
1043 nfs_request_try(info->req);
1044 if (estate == NFSM_STATE_WAITREPLY)
1045 return (EINPROGRESS);
edb90c22 1046 } else {
f8565b0f 1047 nfs_request_try(info->req);
edb90c22
MD
1048 info->state = NFSM_STATE_WAITREPLY;
1049 }
92540a7e
MD
1050 break;
1051 case NFSM_STATE_WAITREPLY:
1052 /*
1053 * Wait for a reply or timeout and move on to the
1054 * next state. The error returned by this state
1055 * is passed to the processing code in the next
1056 * state.
1057 */
1058 info->error = nfs_request_waitreply(info->req);
1059 info->state = NFSM_STATE_PROCESSREPLY;
1060 break;
1061 case NFSM_STATE_PROCESSREPLY:
1062 /*
1063 * Process the reply or timeout. Errors which occur
1064 * in this state may cause the state machine to
1065 * go back to an earlier state, and are fatal
1066 * otherwise.
1067 */
1068 info->error = nfs_request_processreply(info,
1069 info->error);
1070 switch(info->error) {
1071 case ENEEDAUTH:
1072 info->state = NFSM_STATE_AUTH;
1073 break;
1074 case EAGAIN:
1075 info->state = NFSM_STATE_TRY;
1076 break;
1077 default:
1078 /*
1079 * Operation complete, with or without an
1080 * error. We are done.
1081 */
1082 info->req = NULL;
1083 info->state = NFSM_STATE_DONE;
1084 return (info->error);
1085 }
1086 break;
1087 case NFSM_STATE_DONE:
1088 /*
edb90c22 1089 * Shouldn't be reached
92540a7e
MD
1090 */
1091 return (info->error);
1092 /* NOT REACHED */
1093 }
1094 }
1095
1096 /*
edb90c22
MD
1097 * If we are done return the error code (if any).
1098 * Otherwise return EINPROGRESS.
92540a7e 1099 */
edb90c22
MD
1100 if (info->state == NFSM_STATE_DONE)
1101 return (info->error);
92540a7e 1102 return (EINPROGRESS);
8684e6f9
MD
1103}
1104
984263bc
MD
1105/*
1106 * nfs_request - goes something like this
1107 * - fill in request struct
1108 * - links it into list
1109 * - calls nfs_send() for first transmit
1110 * - calls nfs_receive() to get reply
1111 * - break down rpc header and return with nfs reply pointed to
1112 * by mrep or error
1113 * nb: always frees up mreq mbuf list
1114 */
92540a7e
MD
1115static int
1116nfs_request_setup(nfsm_info_t info)
984263bc 1117{
92540a7e 1118 struct nfsreq *req;
984263bc 1119 struct nfsmount *nmp;
8684e6f9
MD
1120 struct mbuf *m;
1121 int i;
984263bc 1122
92540a7e
MD
1123 /*
1124 * Reject requests while attempting a forced unmount.
1125 */
1126 if (info->vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
1127 m_freem(info->mreq);
1128 info->mreq = NULL;
984263bc
MD
1129 return (ESTALE);
1130 }
92540a7e
MD
1131 nmp = VFSTONFS(info->vp->v_mount);
1132 req = kmalloc(sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1133 req->r_nmp = nmp;
1134 req->r_vp = info->vp;
1135 req->r_td = info->td;
1136 req->r_procnum = info->procnum;
1137 req->r_mreq = NULL;
f8565b0f
MD
1138 req->r_cred = info->cred;
1139
984263bc 1140 i = 0;
92540a7e 1141 m = info->mreq;
984263bc
MD
1142 while (m) {
1143 i += m->m_len;
1144 m = m->m_next;
1145 }
92540a7e
MD
1146 req->r_mrest = info->mreq;
1147 req->r_mrest_len = i;
edb90c22
MD
1148
1149 /*
1150 * The presence of a non-NULL r_info in req indicates
1151 * async completion via our helper threads. See the receiver
1152 * code.
1153 */
f8565b0f
MD
1154 if (info->bio) {
1155 req->r_info = info;
1156 req->r_flags = R_ASYNC;
1157 } else {
1158 req->r_info = NULL;
1159 req->r_flags = 0;
1160 }
92540a7e 1161 info->req = req;
8684e6f9
MD
1162 return(0);
1163}
1164
92540a7e 1165static int
8684e6f9
MD
1166nfs_request_auth(struct nfsreq *rep)
1167{
1168 struct nfsmount *nmp = rep->r_nmp;
1169 struct mbuf *m;
1170 char nickv[RPCX_NICKVERF];
1171 int error = 0, auth_len, auth_type;
1172 int verf_len;
1173 u_int32_t xid;
1174 char *auth_str, *verf_str;
1175 struct ucred *cred;
1176
1177 cred = rep->r_cred;
1178 rep->r_failed_auth = 0;
984263bc
MD
1179
1180 /*
1181 * Get the RPC header with authorization.
1182 */
60233e58 1183 verf_str = auth_str = NULL;
984263bc
MD
1184 if (nmp->nm_flag & NFSMNT_KERB) {
1185 verf_str = nickv;
1186 verf_len = sizeof (nickv);
1187 auth_type = RPCAUTH_KERB4;
8684e6f9
MD
1188 bzero((caddr_t)rep->r_key, sizeof(rep->r_key));
1189 if (rep->r_failed_auth ||
1190 nfs_getnickauth(nmp, cred, &auth_str, &auth_len,
1191 verf_str, verf_len)) {
984263bc 1192 error = nfs_getauth(nmp, rep, cred, &auth_str,
8684e6f9 1193 &auth_len, verf_str, &verf_len, rep->r_key);
984263bc 1194 if (error) {
8684e6f9
MD
1195 m_freem(rep->r_mrest);
1196 rep->r_mrest = NULL;
efda3bd0 1197 kfree((caddr_t)rep, M_NFSREQ);
984263bc
MD
1198 return (error);
1199 }
1200 }
1201 } else {
1202 auth_type = RPCAUTH_UNIX;
1203 if (cred->cr_ngroups < 1)
1204 panic("nfsreq nogrps");
1205 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1206 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1207 5 * NFSX_UNSIGNED;
1208 }
7f3ffbb4
MD
1209 if (rep->r_mrest)
1210 nfs_checkpkt(rep->r_mrest, rep->r_mrest_len);
8684e6f9
MD
1211 m = nfsm_rpchead(cred, nmp->nm_flag, rep->r_procnum, auth_type,
1212 auth_len, auth_str, verf_len, verf_str,
1213 rep->r_mrest, rep->r_mrest_len, &rep->r_mheadend, &xid);
1214 rep->r_mrest = NULL;
984263bc 1215 if (auth_str)
efda3bd0 1216 kfree(auth_str, M_TEMP);
984263bc
MD
1217
1218 /*
1219 * For stream protocols, insert a Sun RPC Record Mark.
1220 */
1221 if (nmp->nm_sotype == SOCK_STREAM) {
74f1caca 1222 M_PREPEND(m, NFSX_UNSIGNED, MB_WAIT);
68db5ea4 1223 if (m == NULL) {
efda3bd0 1224 kfree(rep, M_NFSREQ);
1cee5817 1225 return (ENOBUFS);
68db5ea4 1226 }
984263bc
MD
1227 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1228 (m->m_pkthdr.len - NFSX_UNSIGNED));
1229 }
7f3ffbb4
MD
1230
1231 nfs_checkpkt(m, m->m_pkthdr.len);
1232
984263bc
MD
1233 rep->r_mreq = m;
1234 rep->r_xid = xid;
8684e6f9
MD
1235 return (0);
1236}
1237
92540a7e 1238static int
8684e6f9
MD
1239nfs_request_try(struct nfsreq *rep)
1240{
1241 struct nfsmount *nmp = rep->r_nmp;
1242 struct mbuf *m2;
1243 int error;
1244
f8565b0f
MD
1245 /*
1246 * Request is not on any queue, only the owner has access to it
1247 * so it should not be locked by anyone atm.
1248 *
1249 * Interlock to prevent races. While locked the only remote
1250 * action possible is for r_mrep to be set (once we enqueue it).
1251 */
1252 if (rep->r_flags == 0xdeadc0de) {
7ce2998e 1253 print_backtrace(-1);
f8565b0f
MD
1254 panic("flags nbad\n");
1255 }
1256 KKASSERT((rep->r_flags & (R_LOCKED | R_ONREQQ)) == 0);
984263bc
MD
1257 if (nmp->nm_flag & NFSMNT_SOFT)
1258 rep->r_retry = nmp->nm_retry;
1259 else
1260 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1261 rep->r_rtt = rep->r_rexmit = 0;
8684e6f9 1262 if (proct[rep->r_procnum] > 0)
f8565b0f 1263 rep->r_flags |= R_TIMING | R_LOCKED;
984263bc 1264 else
f8565b0f 1265 rep->r_flags |= R_LOCKED;
984263bc
MD
1266 rep->r_mrep = NULL;
1267
1268 /*
1269 * Do the client side RPC.
1270 */
1271 nfsstats.rpcrequests++;
54938b92 1272
d9adbeaf
MD
1273 if (nmp->nm_flag & NFSMNT_FORCE) {
1274 rep->r_flags |= R_SOFTTERM;
1275 rep->r_flags &= ~R_LOCKED;
1276 return (0);
1277 }
1278
984263bc
MD
1279 /*
1280 * Chain request into list of outstanding requests. Be sure
54938b92 1281 * to put it LAST so timer finds oldest requests first. Note
f8565b0f
MD
1282 * that our control of R_LOCKED prevents the request from
1283 * getting ripped out from under us or transmitted by the
1284 * timer code.
1285 *
1286 * For requests with info structures we must atomically set the
1287 * info's state because the structure could become invalid upon
1288 * return due to races (i.e., if async)
984263bc 1289 */
165dba55 1290 crit_enter();
8684e6f9 1291 mtx_link_init(&rep->r_link);
f8565b0f
MD
1292 TAILQ_INSERT_TAIL(&nmp->nm_reqq, rep, r_chain);
1293 rep->r_flags |= R_ONREQQ;
edb90c22 1294 ++nmp->nm_reqqlen;
f8565b0f
MD
1295 if (rep->r_flags & R_ASYNC)
1296 rep->r_info->state = NFSM_STATE_WAITREPLY;
1297 crit_exit();
8684e6f9
MD
1298
1299 error = 0;
984263bc 1300
984263bc 1301 /*
f8565b0f
MD
1302 * Send if we can. Congestion control is not handled here any more
1303 * becausing trying to defer the initial send based on the nfs_timer
1304 * requires having a very fast nfs_timer, which is silly.
984263bc 1305 */
f8565b0f 1306 if (nmp->nm_so) {
984263bc 1307 if (nmp->nm_soflags & PR_CONNREQUIRED)
edb90c22 1308 error = nfs_sndlock(nmp, rep);
f8565b0f 1309 if (error == 0) {
8684e6f9 1310 m2 = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT);
984263bc
MD
1311 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1312 if (nmp->nm_soflags & PR_CONNREQUIRED)
edb90c22 1313 nfs_sndunlock(nmp);
f8565b0f
MD
1314 rep->r_flags &= ~R_NEEDSXMIT;
1315 if ((rep->r_flags & R_SENT) == 0) {
1316 rep->r_flags |= R_SENT;
1317 }
1318 } else {
1319 rep->r_flags |= R_NEEDSXMIT;
984263bc
MD
1320 }
1321 } else {
f8565b0f 1322 rep->r_flags |= R_NEEDSXMIT;
984263bc
MD
1323 rep->r_rtt = -1;
1324 }
8684e6f9
MD
1325 if (error == EPIPE)
1326 error = 0;
f8565b0f 1327
984263bc 1328 /*
f8565b0f
MD
1329 * Release the lock. The only remote action that may have occurred
1330 * would have been the setting of rep->r_mrep. If this occured
1331 * and the request was async we have to move it to the reader
1332 * thread's queue for action.
1333 *
1334 * For async requests also make sure the reader is woken up so
1335 * it gets on the socket to read responses.
984263bc 1336 */
f8565b0f
MD
1337 crit_enter();
1338 if (rep->r_flags & R_ASYNC) {
1339 if (rep->r_mrep)
1340 nfs_hardterm(rep, 1);
1341 rep->r_flags &= ~R_LOCKED;
1342 nfssvc_iod_reader_wakeup(nmp);
1343 } else {
1344 rep->r_flags &= ~R_LOCKED;
1345 }
1346 if (rep->r_flags & R_WANTED) {
1347 rep->r_flags &= ~R_WANTED;
1348 wakeup(rep);
1349 }
8684e6f9
MD
1350 crit_exit();
1351 return (error);
1352}
1353
f8565b0f
MD
1354/*
1355 * This code is only called for synchronous requests. Completed synchronous
1356 * requests are left on reqq and we remove them before moving on to the
1357 * processing state.
1358 */
92540a7e 1359static int
8684e6f9
MD
1360nfs_request_waitreply(struct nfsreq *rep)
1361{
1362 struct nfsmount *nmp = rep->r_nmp;
1363 int error;
1364
f8565b0f
MD
1365 KKASSERT((rep->r_flags & R_ASYNC) == 0);
1366
1367 /*
1368 * Wait until the request is finished.
1369 */
edb90c22 1370 error = nfs_reply(nmp, rep);
984263bc
MD
1371
1372 /*
91f46891
MD
1373 * RPC done, unlink the request, but don't rip it out from under
1374 * the callout timer.
f8565b0f
MD
1375 *
1376 * Once unlinked no other receiver or the timer will have
1377 * visibility, so we do not have to set R_LOCKED.
984263bc 1378 */
f8565b0f 1379 crit_enter();
91f46891 1380 while (rep->r_flags & R_LOCKED) {
f8565b0f
MD
1381 rep->r_flags |= R_WANTED;
1382 tsleep(rep, 0, "nfstrac", 0);
91f46891 1383 }
f8565b0f 1384 KKASSERT(rep->r_flags & R_ONREQQ);
e21aec5b 1385 TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain);
f8565b0f 1386 rep->r_flags &= ~R_ONREQQ;
edb90c22 1387 --nmp->nm_reqqlen;
cc7d050e 1388 if (TAILQ_FIRST(&nmp->nm_bioq) &&
b9a7a2bd 1389 nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) {
cc7d050e
MD
1390 nfssvc_iod_writer_wakeup(nmp);
1391 }
f8565b0f 1392 crit_exit();
984263bc
MD
1393
1394 /*
1395 * Decrement the outstanding request count.
1396 */
1397 if (rep->r_flags & R_SENT) {
54938b92 1398 rep->r_flags &= ~R_SENT;
984263bc 1399 }
8684e6f9
MD
1400 return (error);
1401}
1402
1403/*
1404 * Process reply with error returned from nfs_requet_waitreply().
1405 *
1406 * Returns EAGAIN if it wants us to loop up to nfs_request_try() again.
1407 * Returns ENEEDAUTH if it wants us to loop up to nfs_request_auth() again.
1408 */
92540a7e
MD
1409static int
1410nfs_request_processreply(nfsm_info_t info, int error)
8684e6f9 1411{
92540a7e
MD
1412 struct nfsreq *req = info->req;
1413 struct nfsmount *nmp = req->r_nmp;
8684e6f9 1414 u_int32_t *tl;
8684e6f9 1415 int verf_type;
8684e6f9
MD
1416 int i;
1417
984263bc
MD
1418 /*
1419 * If there was a successful reply and a tprintf msg.
1420 * tprintf a response.
1421 */
92540a7e
MD
1422 if (error == 0 && (req->r_flags & R_TPRINTFMSG)) {
1423 nfs_msg(req->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname,
984263bc 1424 "is alive again");
92540a7e
MD
1425 }
1426 info->mrep = req->r_mrep;
1427 info->md = req->r_md;
1428 info->dpos = req->r_dpos;
984263bc 1429 if (error) {
92540a7e
MD
1430 m_freem(req->r_mreq);
1431 req->r_mreq = NULL;
1432 kfree(req, M_NFSREQ);
1433 info->req = NULL;
984263bc
MD
1434 return (error);
1435 }
1436
1437 /*
1438 * break down the rpc header and check if ok
1439 */
92540a7e 1440 NULLOUT(tl = nfsm_dissect(info, 3 * NFSX_UNSIGNED));
984263bc 1441 if (*tl++ == rpc_msgdenied) {
8684e6f9 1442 if (*tl == rpc_mismatch) {
984263bc 1443 error = EOPNOTSUPP;
8684e6f9
MD
1444 } else if ((nmp->nm_flag & NFSMNT_KERB) &&
1445 *tl++ == rpc_autherr) {
92540a7e
MD
1446 if (req->r_failed_auth == 0) {
1447 req->r_failed_auth++;
1448 req->r_mheadend->m_next = NULL;
1449 m_freem(info->mrep);
1450 info->mrep = NULL;
1451 m_freem(req->r_mreq);
7f3ffbb4 1452 req->r_mreq = NULL;
8684e6f9
MD
1453 return (ENEEDAUTH);
1454 } else {
984263bc 1455 error = EAUTH;
8684e6f9
MD
1456 }
1457 } else {
984263bc 1458 error = EACCES;
8684e6f9 1459 }
92540a7e
MD
1460 m_freem(info->mrep);
1461 info->mrep = NULL;
1462 m_freem(req->r_mreq);
1463 req->r_mreq = NULL;
1464 kfree(req, M_NFSREQ);
1465 info->req = NULL;
984263bc
MD
1466 return (error);
1467 }
1468
1469 /*
1470 * Grab any Kerberos verifier, otherwise just throw it away.
1471 */
1472 verf_type = fxdr_unsigned(int, *tl++);
1473 i = fxdr_unsigned(int32_t, *tl);
1474 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
92540a7e
MD
1475 error = nfs_savenickauth(nmp, req->r_cred, i, req->r_key,
1476 &info->md, &info->dpos, info->mrep);
984263bc
MD
1477 if (error)
1478 goto nfsmout;
42edf14f 1479 } else if (i > 0) {
92540a7e 1480 ERROROUT(nfsm_adv(info, nfsm_rndup(i)));
42edf14f 1481 }
92540a7e 1482 NULLOUT(tl = nfsm_dissect(info, NFSX_UNSIGNED));
984263bc
MD
1483 /* 0 == ok */
1484 if (*tl == 0) {
92540a7e 1485 NULLOUT(tl = nfsm_dissect(info, NFSX_UNSIGNED));
984263bc
MD
1486 if (*tl != 0) {
1487 error = fxdr_unsigned(int, *tl);
f8565b0f
MD
1488
1489 /*
1490 * Does anyone even implement this? Just impose
1491 * a 1-second delay.
1492 */
984263bc
MD
1493 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1494 error == NFSERR_TRYLATER) {
92540a7e
MD
1495 m_freem(info->mrep);
1496 info->mrep = NULL;
984263bc 1497 error = 0;
f8565b0f
MD
1498
1499 tsleep((caddr_t)&lbolt, 0, "nqnfstry", 0);
8684e6f9 1500 return (EAGAIN); /* goto tryagain */
984263bc
MD
1501 }
1502
1503 /*
1504 * If the File Handle was stale, invalidate the
1505 * lookup cache, just in case.
6739ac6b
MD
1506 *
1507 * To avoid namecache<->vnode deadlocks we must
1508 * release the vnode lock if we hold it.
984263bc 1509 */
dc1be39c 1510 if (error == ESTALE) {
92540a7e 1511 struct vnode *vp = req->r_vp;
6739ac6b
MD
1512 int ltype;
1513
1514 ltype = lockstatus(&vp->v_lock, curthread);
1515 if (ltype == LK_EXCLUSIVE || ltype == LK_SHARED)
1516 lockmgr(&vp->v_lock, LK_RELEASE);
6b008938 1517 cache_inval_vp(vp, CINV_CHILDREN);
6739ac6b
MD
1518 if (ltype == LK_EXCLUSIVE || ltype == LK_SHARED)
1519 lockmgr(&vp->v_lock, ltype);
dc1be39c 1520 }
984263bc 1521 if (nmp->nm_flag & NFSMNT_NFSV3) {
92540a7e
MD
1522 KKASSERT(*req->r_mrp == info->mrep);
1523 KKASSERT(*req->r_mdp == info->md);
1524 KKASSERT(*req->r_dposp == info->dpos);
984263bc 1525 error |= NFSERR_RETERR;
42edf14f 1526 } else {
92540a7e
MD
1527 m_freem(info->mrep);
1528 info->mrep = NULL;
42edf14f 1529 }
92540a7e
MD
1530 m_freem(req->r_mreq);
1531 req->r_mreq = NULL;
1532 kfree(req, M_NFSREQ);
1533 info->req = NULL;
984263bc
MD
1534 return (error);
1535 }
1536
92540a7e
MD
1537 KKASSERT(*req->r_mrp == info->mrep);
1538 KKASSERT(*req->r_mdp == info->md);
1539 KKASSERT(*req->r_dposp == info->dpos);
1540 m_freem(req->r_mreq);
1541 req->r_mreq = NULL;
1542 FREE(req, M_NFSREQ);
984263bc
MD
1543 return (0);
1544 }
92540a7e
MD
1545 m_freem(info->mrep);
1546 info->mrep = NULL;
984263bc
MD
1547 error = EPROTONOSUPPORT;
1548nfsmout:
92540a7e
MD
1549 m_freem(req->r_mreq);
1550 req->r_mreq = NULL;
1551 kfree(req, M_NFSREQ);
1552 info->req = NULL;
984263bc
MD
1553 return (error);
1554}
1555
1556#ifndef NFS_NOSERVER
1557/*
1558 * Generate the rpc reply header
1559 * siz arg. is used to decide if adding a cluster is worthwhile
1560 */
1561int
e851b29e 1562nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp,
e07fef60 1563 int err, struct mbuf **mrq, struct mbuf **mbp, caddr_t *bposp)
984263bc 1564{
40393ded 1565 u_int32_t *tl;
42edf14f 1566 struct nfsm_info info;
984263bc 1567
7771277f 1568 siz += RPC_REPLYSIZ;
42edf14f
MD
1569 info.mb = m_getl(max_hdr + siz, MB_WAIT, MT_DATA, M_PKTHDR, NULL);
1570 info.mreq = info.mb;
1571 info.mreq->m_pkthdr.len = 0;
984263bc 1572 /*
7771277f
JH
1573 * If this is not a cluster, try and leave leading space
1574 * for the lower level headers.
984263bc 1575 */
7771277f 1576 if ((max_hdr + siz) < MINCLSIZE)
42edf14f
MD
1577 info.mreq->m_data += max_hdr;
1578 tl = mtod(info.mreq, u_int32_t *);
1579 info.mreq->m_len = 6 * NFSX_UNSIGNED;
1580 info.bpos = ((caddr_t)tl) + info.mreq->m_len;
984263bc
MD
1581 *tl++ = txdr_unsigned(nd->nd_retxid);
1582 *tl++ = rpc_reply;
1583 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1584 *tl++ = rpc_msgdenied;
1585 if (err & NFSERR_AUTHERR) {
1586 *tl++ = rpc_autherr;
1587 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
42edf14f
MD
1588 info.mreq->m_len -= NFSX_UNSIGNED;
1589 info.bpos -= NFSX_UNSIGNED;
984263bc
MD
1590 } else {
1591 *tl++ = rpc_mismatch;
1592 *tl++ = txdr_unsigned(RPC_VER2);
1593 *tl = txdr_unsigned(RPC_VER2);
1594 }
1595 } else {
1596 *tl++ = rpc_msgaccepted;
1597
1598 /*
1599 * For Kerberos authentication, we must send the nickname
1600 * verifier back, otherwise just RPCAUTH_NULL.
1601 */
1602 if (nd->nd_flag & ND_KERBFULL) {
40393ded 1603 struct nfsuid *nuidp;
984263bc
MD
1604 struct timeval ktvin, ktvout;
1605
1606 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1607 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1608 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1609 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1610 &nuidp->nu_haddr, nd->nd_nam2)))
1611 break;
1612 }
1613 if (nuidp) {
1614 ktvin.tv_sec =
1615 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1616 ktvin.tv_usec =
1617 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1618
1619 /*
1620 * Encrypt the timestamp in ecb mode using the
1621 * session key.
1622 */
1623#ifdef NFSKERB
1624 XXX
d557216f
MD
1625#else
1626 ktvout.tv_sec = 0;
1627 ktvout.tv_usec = 0;
984263bc
MD
1628#endif
1629
1630 *tl++ = rpc_auth_kerb;
1631 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1632 *tl = ktvout.tv_sec;
42edf14f 1633 tl = nfsm_build(&info, 3 * NFSX_UNSIGNED);
984263bc
MD
1634 *tl++ = ktvout.tv_usec;
1635 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1636 } else {
1637 *tl++ = 0;
1638 *tl++ = 0;
1639 }
1640 } else {
1641 *tl++ = 0;
1642 *tl++ = 0;
1643 }
1644 switch (err) {
1645 case EPROGUNAVAIL:
1646 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1647 break;
1648 case EPROGMISMATCH:
1649 *tl = txdr_unsigned(RPC_PROGMISMATCH);
42edf14f 1650 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
e07fef60
MD
1651 *tl++ = txdr_unsigned(2);
1652 *tl = txdr_unsigned(3);
984263bc
MD
1653 break;
1654 case EPROCUNAVAIL:
1655 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1656 break;
1657 case EBADRPC:
1658 *tl = txdr_unsigned(RPC_GARBAGE);
1659 break;
1660 default:
1661 *tl = 0;
1662 if (err != NFSERR_RETVOID) {
42edf14f 1663 tl = nfsm_build(&info, NFSX_UNSIGNED);
984263bc
MD
1664 if (err)
1665 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1666 else
1667 *tl = 0;
1668 }
1669 break;
1670 };
1671 }
1672
984263bc 1673 if (mrq != NULL)
42edf14f
MD
1674 *mrq = info.mreq;
1675 *mbp = info.mb;
1676 *bposp = info.bpos;
984263bc
MD
1677 if (err != 0 && err != NFSERR_RETVOID)
1678 nfsstats.srvrpc_errs++;
1679 return (0);
1680}
1681
1682
1683#endif /* NFS_NOSERVER */
f8565b0f 1684
984263bc 1685/*
f8565b0f
MD
1686 * Nfs timer routine.
1687 *
984263bc
MD
1688 * Scan the nfsreq list and retranmit any requests that have timed out
1689 * To avoid retransmission attempts on STREAM sockets (in the future) make
1690 * sure to set the r_retry field to 0 (implies nm_retry == 0).
f8565b0f
MD
1691 *
1692 * Requests with attached responses, terminated requests, and
1693 * locked requests are ignored. Locked requests will be picked up
1694 * in a later timer call.
984263bc
MD
1695 */
1696void
c6b43e93 1697nfs_timer_callout(void *arg /* never used */)
984263bc 1698{
40393ded 1699 struct nfsmount *nmp;
e21aec5b 1700 struct nfsreq *req;
984263bc 1701#ifndef NFS_NOSERVER
40393ded 1702 struct nfssvc_sock *slp;
984263bc
MD
1703 u_quad_t cur_usec;
1704#endif /* NFS_NOSERVER */
984263bc 1705
c6b43e93 1706 lwkt_gettoken(&nfs_token);
e21aec5b 1707 TAILQ_FOREACH(nmp, &nfs_mountq, nm_entry) {
c6b43e93 1708 lwkt_gettoken(&nmp->nm_token);
e21aec5b
MD
1709 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
1710 KKASSERT(nmp == req->r_nmp);
f8565b0f
MD
1711 if (req->r_mrep)
1712 continue;
1713 if (req->r_flags & (R_SOFTTERM | R_LOCKED))
e21aec5b 1714 continue;
e21aec5b
MD
1715 req->r_flags |= R_LOCKED;
1716 if (nfs_sigintr(nmp, req, req->r_td)) {
f8565b0f 1717 nfs_softterm(req, 1);
e21aec5b
MD
1718 } else {
1719 nfs_timer_req(req);
1720 }
1721 req->r_flags &= ~R_LOCKED;
f8565b0f
MD
1722 if (req->r_flags & R_WANTED) {
1723 req->r_flags &= ~R_WANTED;
1724 wakeup(req);
1725 }
984263bc 1726 }
c6b43e93 1727 lwkt_reltoken(&nmp->nm_token);
984263bc
MD
1728 }
1729#ifndef NFS_NOSERVER
984263bc
MD
1730
1731 /*
1732 * Scan the write gathering queues for writes that need to be
1733 * completed now.
1734 */
1735 cur_usec = nfs_curusec();
c6b43e93 1736
ecd80f47 1737 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
c6b43e93
MD
1738 /* XXX race against removal */
1739 lwkt_gettoken(&slp->ns_token);
984263bc 1740 if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
52553028 1741 nfsrv_wakenfsd(slp, 1);
c6b43e93 1742 lwkt_reltoken(&slp->ns_token);
984263bc
MD
1743 }
1744#endif /* NFS_NOSERVER */
c6b43e93
MD
1745
1746 callout_reset(&nfs_timer_handle, nfs_ticks, nfs_timer_callout, NULL);
1747 lwkt_reltoken(&nfs_token);
984263bc
MD
1748}
1749
e21aec5b
MD
1750static
1751void
1752nfs_timer_req(struct nfsreq *req)
1753{
1754 struct thread *td = &thread0; /* XXX for creds, will break if sleep */
1755 struct nfsmount *nmp = req->r_nmp;
1756 struct mbuf *m;
1757 struct socket *so;
1758 int timeo;
1759 int error;
1760
f8565b0f
MD
1761 /*
1762 * rtt ticks and timeout calculation. Return if the timeout
1763 * has not been reached yet, unless the packet is flagged
1764 * for an immediate send.
1765 *
1766 * The mean rtt doesn't help when we get random I/Os, we have
1767 * to multiply by fairly large numbers.
1768 */
e21aec5b 1769 if (req->r_rtt >= 0) {
cc7d050e
MD
1770 /*
1771 * Calculate the timeout to test against.
1772 */
e21aec5b 1773 req->r_rtt++;
f8565b0f
MD
1774 if (nmp->nm_flag & NFSMNT_DUMBTIMR) {
1775 timeo = nmp->nm_timeo << NFS_RTT_SCALE_BITS;
1776 } else if (req->r_flags & R_TIMING) {
1777 timeo = NFS_SRTT(req) + NFS_SDRTT(req);
1778 } else {
1779 timeo = nmp->nm_timeo << NFS_RTT_SCALE_BITS;
1780 }
cc7d050e 1781 timeo *= multt[req->r_procnum];
f8565b0f
MD
1782 /* timeo is still scaled by SCALE_BITS */
1783
1784#define NFSFS (NFS_RTT_SCALE * NFS_HZ)
1785 if (req->r_flags & R_TIMING) {
1786 static long last_time;
1787 if (nfs_showrtt && last_time != time_second) {
1788 kprintf("rpccmd %d NFS SRTT %d SDRTT %d "
1789 "timeo %d.%03d\n",
1790 proct[req->r_procnum],
1791 NFS_SRTT(req), NFS_SDRTT(req),
1792 timeo / NFSFS,
1793 timeo % NFSFS * 1000 / NFSFS);
1794 last_time = time_second;
1795 }
1796 }
1797#undef NFSFS
1798
1799 /*
1800 * deal with nfs_timer jitter.
1801 */
1802 timeo = (timeo >> NFS_RTT_SCALE_BITS) + 1;
1803 if (timeo < 2)
1804 timeo = 2;
1805
e21aec5b
MD
1806 if (nmp->nm_timeouts > 0)
1807 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
f8565b0f
MD
1808 if (timeo > NFS_MAXTIMEO)
1809 timeo = NFS_MAXTIMEO;
1810 if (req->r_rtt <= timeo) {
1811 if ((req->r_flags & R_NEEDSXMIT) == 0)
1812 return;
1813 } else if (nmp->nm_timeouts < 8) {
e21aec5b 1814 nmp->nm_timeouts++;
f8565b0f 1815 }
e21aec5b 1816 }
f8565b0f 1817
e21aec5b
MD
1818 /*
1819 * Check for server not responding
1820 */
1821 if ((req->r_flags & R_TPRINTFMSG) == 0 &&
1822 req->r_rexmit > nmp->nm_deadthresh) {
f8565b0f
MD
1823 nfs_msg(req->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname,
1824 "not responding");
e21aec5b
MD
1825 req->r_flags |= R_TPRINTFMSG;
1826 }
1827 if (req->r_rexmit >= req->r_retry) { /* too many */
1828 nfsstats.rpctimeouts++;
f8565b0f 1829 nfs_softterm(req, 1);
e21aec5b
MD
1830 return;
1831 }
f8565b0f
MD
1832
1833 /*
1834 * Generally disable retransmission on reliable sockets,
1835 * unless the request is flagged for immediate send.
1836 */
e21aec5b
MD
1837 if (nmp->nm_sotype != SOCK_DGRAM) {
1838 if (++req->r_rexmit > NFS_MAXREXMIT)
1839 req->r_rexmit = NFS_MAXREXMIT;
f8565b0f
MD
1840 if ((req->r_flags & R_NEEDSXMIT) == 0)
1841 return;
e21aec5b 1842 }
f8565b0f
MD
1843
1844 /*
1845 * Stop here if we do not have a socket!
1846 */
e21aec5b
MD
1847 if ((so = nmp->nm_so) == NULL)
1848 return;
1849
1850 /*
f8565b0f
MD
1851 * If there is enough space and the window allows.. resend it.
1852 *
cc7d050e
MD
1853 * r_rtt is left intact in case we get an answer after the
1854 * retry that was a reply to the original packet.
7f3ffbb4
MD
1855 *
1856 * NOTE: so_pru_send()
e21aec5b 1857 */
e21aec5b 1858 if (ssb_space(&so->so_snd) >= req->r_mreq->m_pkthdr.len &&
f8565b0f 1859 (req->r_flags & (R_SENT | R_NEEDSXMIT)) &&
e21aec5b
MD
1860 (m = m_copym(req->r_mreq, 0, M_COPYALL, MB_DONTWAIT))){
1861 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1862 error = so_pru_send(so, 0, m, NULL, NULL, td);
1863 else
7f3ffbb4 1864 error = so_pru_send(so, 0, m, nmp->nm_nam, NULL, td);
e21aec5b
MD
1865 if (error) {
1866 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1867 so->so_error = 0;
f8565b0f 1868 req->r_flags |= R_NEEDSXMIT;
e21aec5b
MD
1869 } else if (req->r_mrep == NULL) {
1870 /*
1871 * Iff first send, start timing
1872 * else turn timing off, backoff timer
1873 * and divide congestion window by 2.
1874 *
1875 * It is possible for the so_pru_send() to
1876 * block and for us to race a reply so we
1877 * only do this if the reply field has not
1878 * been filled in. R_LOCKED will prevent
1879 * the request from being ripped out from under
1880 * us entirely.
cc7d050e
MD
1881 *
1882 * Record the last resent procnum to aid us
1883 * in duplicate detection on receive.
e21aec5b 1884 */
cc7d050e 1885 if ((req->r_flags & R_NEEDSXMIT) == 0) {
f8565b0f
MD
1886 if (nfs_showrexmit)
1887 kprintf("X");
e21aec5b
MD
1888 if (++req->r_rexmit > NFS_MAXREXMIT)
1889 req->r_rexmit = NFS_MAXREXMIT;
f8565b0f
MD
1890 nmp->nm_maxasync_scaled >>= 1;
1891 if (nmp->nm_maxasync_scaled < NFS_MINASYNC_SCALED)
1892 nmp->nm_maxasync_scaled = NFS_MINASYNC_SCALED;
e21aec5b 1893 nfsstats.rpcretries++;
cc7d050e 1894 nmp->nm_lastreprocnum = req->r_procnum;
e21aec5b
MD
1895 } else {
1896 req->r_flags |= R_SENT;
cc7d050e 1897 req->r_flags &= ~R_NEEDSXMIT;
e21aec5b 1898 }
e21aec5b
MD
1899 }
1900 }
1901}
1902
984263bc
MD
1903/*
1904 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1905 * wait for all requests to complete. This is used by forced unmounts
1906 * to terminate any outstanding RPCs.
f8565b0f
MD
1907 *
1908 * Locked requests cannot be canceled but will be marked for
1909 * soft-termination.
984263bc
MD
1910 */
1911int
e851b29e 1912nfs_nmcancelreqs(struct nfsmount *nmp)
984263bc
MD
1913{
1914 struct nfsreq *req;
165dba55 1915 int i;
984263bc 1916
165dba55 1917 crit_enter();
e21aec5b 1918 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
f8565b0f 1919 if (req->r_mrep != NULL || (req->r_flags & R_SOFTTERM))
984263bc 1920 continue;
f8565b0f 1921 nfs_softterm(req, 0);
984263bc 1922 }
edb90c22 1923 /* XXX the other two queues as well */
165dba55 1924 crit_exit();
984263bc
MD
1925
1926 for (i = 0; i < 30; i++) {
165dba55 1927 crit_enter();
e21aec5b 1928 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
984263bc
MD
1929 if (nmp == req->r_nmp)
1930 break;
1931 }
165dba55 1932 crit_exit();
984263bc
MD
1933 if (req == NULL)
1934 return (0);
377d4740 1935 tsleep(&lbolt, 0, "nfscancel", 0);
984263bc
MD
1936 }
1937 return (EBUSY);
1938}
1939
f8565b0f
MD
1940/*
1941 * Soft-terminate a request, effectively marking it as failed.
1942 *
1943 * Must be called from within a critical section.
1944 */
edb90c22 1945static void
f8565b0f 1946nfs_softterm(struct nfsreq *rep, int islocked)
edb90c22 1947{
f8565b0f
MD
1948 rep->r_flags |= R_SOFTTERM;
1949 nfs_hardterm(rep, islocked);
edb90c22
MD
1950}
1951
984263bc 1952/*
f8565b0f 1953 * Hard-terminate a request, typically after getting a response.
54938b92 1954 *
f8565b0f
MD
1955 * The state machine can still decide to re-issue it later if necessary.
1956 *
1957 * Must be called from within a critical section.
984263bc 1958 */
984263bc 1959static void
f8565b0f 1960nfs_hardterm(struct nfsreq *rep, int islocked)
984263bc 1961{
edb90c22
MD
1962 struct nfsmount *nmp = rep->r_nmp;
1963
f8565b0f
MD
1964 /*
1965 * The nm_send count is decremented now to avoid deadlocks
1966 * when the process in soreceive() hasn't yet managed to send
1967 * its own request.
1968 */
984263bc 1969 if (rep->r_flags & R_SENT) {
984263bc
MD
1970 rep->r_flags &= ~R_SENT;
1971 }
edb90c22
MD
1972
1973 /*
f8565b0f
MD
1974 * If we locked the request or nobody else has locked the request,
1975 * and the request is async, we can move it to the reader thread's
1976 * queue now and fix up the state.
1977 *
1978 * If we locked the request or nobody else has locked the request,
1979 * we can wake up anyone blocked waiting for a response on the
1980 * request.
edb90c22 1981 */
f8565b0f
MD
1982 if (islocked || (rep->r_flags & R_LOCKED) == 0) {
1983 if ((rep->r_flags & (R_ONREQQ | R_ASYNC)) ==
1984 (R_ONREQQ | R_ASYNC)) {
1985 rep->r_flags &= ~R_ONREQQ;
1986 TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain);
1987 --nmp->nm_reqqlen;
1988 TAILQ_INSERT_TAIL(&nmp->nm_reqrxq, rep, r_chain);
1989 KKASSERT(rep->r_info->state == NFSM_STATE_TRY ||
1990 rep->r_info->state == NFSM_STATE_WAITREPLY);
1991 rep->r_info->state = NFSM_STATE_PROCESSREPLY;
1992 nfssvc_iod_reader_wakeup(nmp);
cc7d050e 1993 if (TAILQ_FIRST(&nmp->nm_bioq) &&
b9a7a2bd 1994 nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) {
cc7d050e
MD
1995 nfssvc_iod_writer_wakeup(nmp);
1996 }
f8565b0f
MD
1997 }
1998 mtx_abort_ex_link(&nmp->nm_rxlock, &rep->r_link);
1999 }
984263bc
MD
2000}
2001
2002/*
2003 * Test for a termination condition pending on the process.
2004 * This is used for NFSMNT_INT mounts.
2005 */
2006int
dadab5e9 2007nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
984263bc
MD
2008{
2009 sigset_t tmpset;
dadab5e9 2010 struct proc *p;
08f2f1bb 2011 struct lwp *lp;
984263bc
MD
2012
2013 if (rep && (rep->r_flags & R_SOFTTERM))
2014 return (EINTR);
2015 /* Terminate all requests while attempting a forced unmount. */
2016 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
2017 return (EINTR);
2018 if (!(nmp->nm_flag & NFSMNT_INT))
2019 return (0);
cd990953
MD
2020 /* td might be NULL YYY */
2021 if (td == NULL || (p = td->td_proc) == NULL)
984263bc
MD
2022 return (0);
2023
08f2f1bb 2024 lp = td->td_lwp;
aa6c3de6 2025 tmpset = lwp_sigpend(lp);
08f2f1bb 2026 SIGSETNAND(tmpset, lp->lwp_sigmask);
984263bc 2027 SIGSETNAND(tmpset, p->p_sigignore);
08f2f1bb 2028 if (SIGNOTEMPTY(tmpset) && NFSINT_SIGMASK(tmpset))
984263bc
MD
2029 return (EINTR);
2030
2031 return (0);
2032}
2033
2034/*
2035 * Lock a socket against others.
2036 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
2037 * and also to avoid race conditions between the processes with nfs requests
2038 * in progress when a reconnect is necessary.
2039 */
2040int
edb90c22 2041nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
984263bc 2042{
edb90c22 2043 mtx_t mtx = &nmp->nm_txlock;
dadab5e9 2044 struct thread *td;
b2eb81cd
MD
2045 int slptimeo;
2046 int slpflag;
2047 int error;
984263bc 2048
b2eb81cd
MD
2049 slpflag = 0;
2050 slptimeo = 0;
edb90c22
MD
2051 td = rep ? rep->r_td : NULL;
2052 if (nmp->nm_flag & NFSMNT_INT)
984263bc 2053 slpflag = PCATCH;
b2eb81cd 2054
8684e6f9 2055 while ((error = mtx_lock_ex_try(mtx)) != 0) {
edb90c22 2056 if (nfs_sigintr(nmp, rep, td)) {
b2eb81cd
MD
2057 error = EINTR;
2058 break;
2059 }
8684e6f9
MD
2060 error = mtx_lock_ex(mtx, "nfsndlck", slpflag, slptimeo);
2061 if (error == 0)
2062 break;
984263bc
MD
2063 if (slpflag == PCATCH) {
2064 slpflag = 0;
2065 slptimeo = 2 * hz;
2066 }
2067 }
2068 /* Always fail if our request has been cancelled. */
edb90c22 2069 if (rep && (rep->r_flags & R_SOFTTERM)) {
8684e6f9
MD
2070 if (error == 0)
2071 mtx_unlock(mtx);
b2eb81cd 2072 error = EINTR;
8684e6f9 2073 }
b2eb81cd 2074 return (error);
984263bc
MD
2075}
2076
2077/*
2078 * Unlock the stream socket for others.
2079 */
2080void
edb90c22 2081nfs_sndunlock(struct nfsmount *nmp)
984263bc 2082{
edb90c22 2083 mtx_unlock(&nmp->nm_txlock);
984263bc
MD
2084}
2085
edb90c22
MD
2086/*
2087 * Lock the receiver side of the socket.
2088 *
2089 * rep may be NULL.
2090 */
984263bc 2091static int
edb90c22 2092nfs_rcvlock(struct nfsmount *nmp, struct nfsreq *rep)
984263bc 2093{
edb90c22 2094 mtx_t mtx = &nmp->nm_rxlock;
b2eb81cd
MD
2095 int slpflag;
2096 int slptimeo;
2097 int error;
984263bc 2098
54938b92
MD
2099 /*
2100 * Unconditionally check for completion in case another nfsiod
2101 * get the packet while the caller was blocked, before the caller
2102 * called us. Packet reception is handled by mainline code which
2103 * is protected by the BGL at the moment.
b2eb81cd
MD
2104 *
2105 * We do not strictly need the second check just before the
2106 * tsleep(), but it's good defensive programming.
54938b92 2107 */
edb90c22 2108 if (rep && rep->r_mrep != NULL)
54938b92
MD
2109 return (EALREADY);
2110
edb90c22 2111 if (nmp->nm_flag & NFSMNT_INT)
984263bc
MD
2112 slpflag = PCATCH;
2113 else
2114 slpflag = 0;
b2eb81cd 2115 slptimeo = 0;
8684e6f9
MD
2116
2117 while ((error = mtx_lock_ex_try(mtx)) != 0) {
edb90c22 2118 if (nfs_sigintr(nmp, rep, (rep ? rep->r_td : NULL))) {
b2eb81cd
MD
2119 error = EINTR;
2120 break;
2121 }
edb90c22 2122 if (rep && rep->r_mrep != NULL) {
b2eb81cd
MD
2123 error = EALREADY;
2124 break;
2125 }
8684e6f9
MD
2126
2127 /*
2128 * NOTE: can return ENOLCK, but in that case rep->r_mrep
2129 * will already be set.
2130 */
edb90c22
MD
2131 if (rep) {
2132 error = mtx_lock_ex_link(mtx, &rep->r_link,
2133 "nfsrcvlk",
2134 slpflag, slptimeo);
2135 } else {
2136 error = mtx_lock_ex(mtx, "nfsrcvlk", slpflag, slptimeo);
2137 }
8684e6f9
MD
2138 if (error == 0)
2139 break;
2140
984263bc
MD
2141 /*
2142 * If our reply was recieved while we were sleeping,
2143 * then just return without taking the lock to avoid a
2144 * situation where a single iod could 'capture' the
2145 * recieve lock.
2146 */
edb90c22 2147 if (rep && rep->r_mrep != NULL) {
b2eb81cd
MD
2148 error = EALREADY;
2149 break;
2150 }
984263bc
MD
2151 if (slpflag == PCATCH) {
2152 slpflag = 0;
2153 slptimeo = 2 * hz;
2154 }
2155 }
b2eb81cd 2156 if (error == 0) {
edb90c22 2157 if (rep && rep->r_mrep != NULL) {
8684e6f9
MD
2158 error = EALREADY;
2159 mtx_unlock(mtx);
2160 }
b2eb81cd 2161 }
b2eb81cd 2162 return (error);
984263bc
MD
2163}
2164
2165/*
2166 * Unlock the stream socket for others.
2167 */
2168static void
edb90c22 2169nfs_rcvunlock(struct nfsmount *nmp)
984263bc 2170{
edb90c22 2171 mtx_unlock(&nmp->nm_rxlock);
984263bc
MD
2172}
2173
2174/*
146c31a9 2175 * nfs_realign:
984263bc 2176 *
146c31a9
MD
2177 * Check for badly aligned mbuf data and realign by copying the unaligned
2178 * portion of the data into a new mbuf chain and freeing the portions
2179 * of the old chain that were replaced.
984263bc 2180 *
146c31a9
MD
2181 * We cannot simply realign the data within the existing mbuf chain
2182 * because the underlying buffers may contain other rpc commands and
2183 * we cannot afford to overwrite them.
984263bc 2184 *
146c31a9
MD
2185 * We would prefer to avoid this situation entirely. The situation does
2186 * not occur with NFS/UDP and is supposed to only occassionally occur
2187 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
2188 *
2189 * NOTE! MB_DONTWAIT cannot be used here. The mbufs must be acquired
2190 * because the rpc request OR reply cannot be thrown away. TCP NFS
2191 * mounts do not retry their RPCs unless the TCP connection itself
2192 * is dropped so throwing away a RPC will basically cause the NFS
2193 * operation to lockup indefinitely.
984263bc
MD
2194 */
2195static void
e851b29e 2196nfs_realign(struct mbuf **pm, int hsiz)
984263bc
MD
2197{
2198 struct mbuf *m;
2199 struct mbuf *n = NULL;
984263bc 2200
3bf6fec3
MD
2201 /*
2202 * Check for misalignemnt
2203 */
984263bc 2204 ++nfs_realign_test;
984263bc 2205 while ((m = *pm) != NULL) {
3bf6fec3 2206 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3))
984263bc 2207 break;
984263bc
MD
2208 pm = &m->m_next;
2209 }
2210
2211 /*
3bf6fec3 2212 * If misalignment found make a completely new copy.
984263bc 2213 */
3bf6fec3 2214 if (m) {
984263bc 2215 ++nfs_realign_count;
3bf6fec3 2216 n = m_dup_data(m, MB_WAIT);
984263bc
MD
2217 m_freem(*pm);
2218 *pm = n;
2219 }
2220}
2221
2222#ifndef NFS_NOSERVER
2223
2224/*
2225 * Parse an RPC request
2226 * - verify it
2227 * - fill in the cred struct.
2228 */
2229int
e851b29e 2230nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
984263bc 2231{
40393ded
RG
2232 int len, i;
2233 u_int32_t *tl;
984263bc
MD
2234 struct uio uio;
2235 struct iovec iov;
42edf14f 2236 caddr_t cp;
984263bc
MD
2237 u_int32_t nfsvers, auth_type;
2238 uid_t nickuid;
e07fef60 2239 int error = 0, ticklen;
40393ded 2240 struct nfsuid *nuidp;
984263bc 2241 struct timeval tvin, tvout;
42edf14f 2242 struct nfsm_info info;
984263bc
MD
2243#if 0 /* until encrypted keys are implemented */
2244 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2245#endif
2246
42edf14f
MD
2247 info.mrep = nd->nd_mrep;
2248 info.md = nd->nd_md;
2249 info.dpos = nd->nd_dpos;
2250
984263bc 2251 if (has_header) {
42edf14f 2252 NULLOUT(tl = nfsm_dissect(&info, 10 * NFSX_UNSIGNED));
984263bc
MD
2253 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
2254 if (*tl++ != rpc_call) {
42edf14f 2255 m_freem(info.mrep);
984263bc
MD
2256 return (EBADRPC);
2257 }
42edf14f
MD
2258 } else {
2259 NULLOUT(tl = nfsm_dissect(&info, 8 * NFSX_UNSIGNED));
2260 }
984263bc
MD
2261 nd->nd_repstat = 0;
2262 nd->nd_flag = 0;
2263 if (*tl++ != rpc_vers) {
2264 nd->nd_repstat = ERPCMISMATCH;
2265 nd->nd_procnum = NFSPROC_NOOP;
2266 return (0);
2267 }
2268 if (*tl != nfs_prog) {
e07fef60
MD
2269 nd->nd_repstat = EPROGUNAVAIL;
2270 nd->nd_procnum = NFSPROC_NOOP;
2271 return (0);
984263bc
MD
2272 }
2273 tl++;
2274 nfsvers = fxdr_unsigned(u_int32_t, *tl++);
e07fef60 2275 if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
984263bc
MD
2276 nd->nd_repstat = EPROGMISMATCH;
2277 nd->nd_procnum = NFSPROC_NOOP;
2278 return (0);
2279 }
e07fef60 2280 if (nfsvers == NFS_VER3)
984263bc
MD
2281 nd->nd_flag = ND_NFSV3;
2282 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
2283 if (nd->nd_procnum == NFSPROC_NULL)
2284 return (0);
2285 if (nd->nd_procnum >= NFS_NPROCS ||
e07fef60 2286 (nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
984263bc
MD
2287 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2288 nd->nd_repstat = EPROCUNAVAIL;
2289 nd->nd_procnum = NFSPROC_NOOP;
2290 return (0);
2291 }
2292 if ((nd->nd_flag & ND_NFSV3) == 0)
2293 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2294 auth_type = *tl++;
2295 len = fxdr_unsigned(int, *tl++);
2296 if (len < 0 || len > RPCAUTH_MAXSIZ) {
42edf14f 2297 m_freem(info.mrep);
984263bc
MD
2298 return (EBADRPC);
2299 }
2300
2301 nd->nd_flag &= ~ND_KERBAUTH;
2302 /*
2303 * Handle auth_unix or auth_kerb.
2304 */
2305 if (auth_type == rpc_auth_unix) {
2306 len = fxdr_unsigned(int, *++tl);
2307 if (len < 0 || len > NFS_MAXNAMLEN) {
42edf14f 2308 m_freem(info.mrep);
984263bc
MD
2309 return (EBADRPC);
2310 }
42edf14f
MD
2311 ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2312 NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
984263bc
MD
2313 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2314 nd->nd_cr.cr_ref = 1;
2315 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
c83849b3 2316 nd->nd_cr.cr_ruid = nd->nd_cr.cr_svuid = nd->nd_cr.cr_uid;
984263bc 2317 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
c83849b3 2318 nd->nd_cr.cr_rgid = nd->nd_cr.cr_svgid = nd->nd_cr.cr_gid;
984263bc
MD
2319 len = fxdr_unsigned(int, *tl);
2320 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
42edf14f 2321 m_freem(info.mrep);
984263bc
MD
2322 return (EBADRPC);
2323 }
42edf14f 2324 NULLOUT(tl = nfsm_dissect(&info, (len + 2) * NFSX_UNSIGNED));
984263bc
MD
2325 for (i = 1; i <= len; i++)
2326 if (i < NGROUPS)
2327 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2328 else
2329 tl++;
2330 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2331 if (nd->nd_cr.cr_ngroups > 1)
2332 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2333 len = fxdr_unsigned(int, *++tl);
2334 if (len < 0 || len > RPCAUTH_MAXSIZ) {
42edf14f 2335 m_freem(info.mrep);
984263bc
MD
2336 return (EBADRPC);
2337 }
42edf14f
MD
2338 if (len > 0) {
2339 ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2340 }
984263bc
MD
2341 } else if (auth_type == rpc_auth_kerb) {
2342 switch (fxdr_unsigned(int, *tl++)) {
2343 case RPCAKN_FULLNAME:
2344 ticklen = fxdr_unsigned(int, *tl);
2345 *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
2346 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2347 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2348 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
42edf14f 2349 m_freem(info.mrep);
984263bc
MD
2350 return (EBADRPC);
2351 }
2352 uio.uio_offset = 0;
2353 uio.uio_iov = &iov;
2354 uio.uio_iovcnt = 1;
2355 uio.uio_segflg = UIO_SYSSPACE;
2356 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2357 iov.iov_len = RPCAUTH_MAXSIZ - 4;
42edf14f
MD
2358 ERROROUT(nfsm_mtouio(&info, &uio, uio.uio_resid));
2359 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
984263bc
MD
2360 if (*tl++ != rpc_auth_kerb ||
2361 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
086c1d7e 2362 kprintf("Bad kerb verifier\n");
984263bc
MD
2363 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2364 nd->nd_procnum = NFSPROC_NOOP;
2365 return (0);
2366 }
42edf14f 2367 NULLOUT(cp = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
984263bc
MD
2368 tl = (u_int32_t *)cp;
2369 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
086c1d7e 2370 kprintf("Not fullname kerb verifier\n");
984263bc
MD
2371 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2372 nd->nd_procnum = NFSPROC_NOOP;
2373 return (0);
2374 }
2375 cp += NFSX_UNSIGNED;
2376 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2377 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2378 nd->nd_flag |= ND_KERBFULL;
2379 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2380 break;
2381 case RPCAKN_NICKNAME:
2382 if (len != 2 * NFSX_UNSIGNED) {
086c1d7e 2383 kprintf("Kerb nickname short\n");
984263bc
MD
2384 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2385 nd->nd_procnum = NFSPROC_NOOP;
2386 return (0);
2387 }
2388 nickuid = fxdr_unsigned(uid_t, *tl);
42edf14f 2389 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
984263bc
MD
2390 if (*tl++ != rpc_auth_kerb ||
2391 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
086c1d7e 2392 kprintf("Kerb nick verifier bad\n");
984263bc
MD
2393 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2394 nd->nd_procnum = NFSPROC_NOOP;
2395 return (0);
2396 }
42edf14f 2397 NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
984263bc
MD
2398 tvin.tv_sec = *tl++;
2399 tvin.tv_usec = *tl;
2400
2401 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2402 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2403 if (nuidp->nu_cr.cr_uid == nickuid &&
2404 (!nd->nd_nam2 ||
2405 netaddr_match(NU_NETFAM(nuidp),
2406 &nuidp->nu_haddr, nd->nd_nam2)))
2407 break;
2408 }
2409 if (!nuidp) {
2410 nd->nd_repstat =
2411 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2412 nd->nd_procnum = NFSPROC_NOOP;
2413 return (0);
2414 }
2415
2416 /*
2417 * Now, decrypt the timestamp using the session key
2418 * and validate it.
2419 */
2420#ifdef NFSKERB
2421 XXX
d557216f
MD
2422#else
2423 tvout.tv_sec = 0;
2424 tvout.tv_usec = 0;
984263bc
MD
2425#endif
2426
2427 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2428 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2429 if (nuidp->nu_expire < time_second ||
2430 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2431 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2432 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2433 nuidp->nu_expire = 0;
2434 nd->nd_repstat =
2435 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2436 nd->nd_procnum = NFSPROC_NOOP;
2437 return (0);
2438 }
2439 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2440 nd->nd_flag |= ND_KERBNICK;
2441 };
2442 } else {
2443 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2444 nd->nd_procnum = NFSPROC_NOOP;
2445 return (0);
2446 }
2447
42edf14f
MD
2448 nd->nd_md = info.md;
2449 nd->nd_dpos = info.dpos;
984263bc
MD
2450 return (0);
2451nfsmout:
2452 return (error);
2453}
2454
2455#endif
2456
b70ddfbf
MD
2457/*
2458 * Send a message to the originating process's terminal. The thread and/or
2459 * process may be NULL. YYY the thread should not be NULL but there may
2460 * still be some uio_td's that are still being passed as NULL through to
2461 * nfsm_request().
2462 */
984263bc 2463static int
dadab5e9 2464nfs_msg(struct thread *td, char *server, char *msg)
984263bc
MD
2465{
2466 tpr_t tpr;
2467
b70ddfbf 2468 if (td && td->td_proc)
dadab5e9 2469 tpr = tprintf_open(td->td_proc);
984263bc
MD
2470 else
2471 tpr = NULL;
2472 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2473 tprintf_close(tpr);
2474 return (0);
2475}
2476
2477#ifndef NFS_NOSERVER
c6b43e93
MD
2478
2479void
2480nfsrv_rcv_upcall(struct socket *so, void *arg, int waitflag)
2481{
2482 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2483
2484 lwkt_gettoken(&slp->ns_token);
2485 nfsrv_rcv(so, arg, waitflag);
2486 lwkt_reltoken(&slp->ns_token);
2487}
2488
984263bc
MD
2489/*
2490 * Socket upcall routine for the nfsd sockets.
2491 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2492 * Essentially do as much as possible non-blocking, else punt and it will
74f1caca 2493 * be called with MB_WAIT from an nfsd.
c6b43e93
MD
2494 *
2495 * slp->ns_token is held on call
984263bc
MD
2496 */
2497void
e851b29e 2498nfsrv_rcv(struct socket *so, void *arg, int waitflag)
984263bc 2499{
40393ded
RG
2500 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2501 struct mbuf *m;
984263bc 2502 struct sockaddr *nam;
6d49aa6f 2503 struct sockbuf sio;
984263bc 2504 int flags, error;
52553028 2505 int nparallel_wakeup = 0;
984263bc 2506
c6b43e93
MD
2507 ASSERT_LWKT_TOKEN_HELD(&slp->ns_token);
2508
984263bc
MD
2509 if ((slp->ns_flag & SLP_VALID) == 0)
2510 return;
3b101e2e 2511
984263bc 2512 /*
52553028
MD
2513 * Do not allow an infinite number of completed RPC records to build
2514 * up before we stop reading data from the socket. Otherwise we could
2515 * end up holding onto an unreasonable number of mbufs for requests
2516 * waiting for service.
2517 *
2518 * This should give pretty good feedback to the TCP
3b101e2e
MD
2519 * layer and prevents a memory crunch for other protocols.
2520 *
2521 * Note that the same service socket can be dispatched to several
2522 * nfs servers simultaniously.
2523 *
2524 * the tcp protocol callback calls us with MB_DONTWAIT.
2525 * nfsd calls us with MB_WAIT (typically).
984263bc 2526 */
52553028 2527 if (waitflag == MB_DONTWAIT && slp->ns_numrec >= nfsd_waiting / 2 + 1) {
3b101e2e
MD
2528 slp->ns_flag |= SLP_NEEDQ;
2529 goto dorecs;
984263bc 2530 }
3b101e2e
MD
2531
2532 /*
2533 * Handle protocol specifics to parse an RPC request. We always
2534 * pull from the socket using non-blocking I/O.
2535 */
984263bc
MD
2536 if (so->so_type == SOCK_STREAM) {
2537 /*
3b101e2e
MD
2538 * The data has to be read in an orderly fashion from a TCP
2539 * stream, unlike a UDP socket. It is possible for soreceive
2540 * and/or nfsrv_getstream() to block, so make sure only one
2541 * entity is messing around with the TCP stream at any given
2542 * moment. The receive sockbuf's lock in soreceive is not
2543 * sufficient.
2544 *
2545 * Note that this procedure can be called from any number of
2546 * NFS severs *OR* can be upcalled directly from a TCP
c6b43e93 2547 * protocol thread without the lock.
984263bc 2548 */
3b101e2e 2549 if (slp->ns_flag & SLP_GETSTREAM) {
984263bc
MD
2550 slp->ns_flag |= SLP_NEEDQ;
2551 goto dorecs;
2552 }
3b101e2e 2553 slp->ns_flag |= SLP_GETSTREAM;
984263bc
MD
2554
2555 /*
d8a9a23b
MD
2556 * Do soreceive(). Pull out as much data as possible without
2557 * blocking.
984263bc 2558 */
6d49aa6f 2559 sbinit(&sio, 1000000000);
984263bc 2560 flags = MSG_DONTWAIT;
d8a9a23b 2561 error = so_pru_soreceive(so, &nam, NULL, &sio, NULL, &flags);
6d49aa6f 2562 if (error || sio.sb_mb == NULL) {
984263bc
MD
2563 if (error == EWOULDBLOCK)
2564 slp->ns_flag |= SLP_NEEDQ;
2565 else
2566 slp->ns_flag |= SLP_DISCONN;
3b101e2e 2567 slp->ns_flag &= ~SLP_GETSTREAM;
984263bc
MD
2568 goto dorecs;
2569 }
6d49aa6f 2570 m = sio.sb_mb;
984263bc
MD
2571 if (slp->ns_rawend) {
2572 slp->ns_rawend->m_next = m;
6d49aa6f 2573 slp->ns_cc += sio.sb_cc;
984263bc
MD
2574 } else {
2575 slp->ns_raw = m;
6d49aa6f 2576 slp->ns_cc = sio.sb_cc;
984263bc
MD
2577 }
2578 while (m->m_next)
2579 m = m->m_next;
2580 slp->ns_rawend = m;
2581
2582 /*
3b101e2e
MD
2583 * Now try and parse as many record(s) as we can out of the
2584 * raw stream data.
984263bc 2585 */
52553028 2586 error = nfsrv_getstream(slp, waitflag, &nparallel_wakeup);
984263bc
MD
2587 if (error) {
2588 if (error == EPERM)
2589 slp->ns_flag |= SLP_DISCONN;
2590 else
2591 slp->ns_flag |= SLP_NEEDQ;
2592 }
3b101e2e 2593 slp->ns_flag &= ~SLP_GETSTREAM;
984263bc 2594 } else {
3b101e2e
MD
2595 /*
2596 * For UDP soreceive typically pulls just one packet, loop
2597 * to get the whole batch.
2598 */
984263bc 2599 do {
6d49aa6f 2600 sbinit(&sio, 1000000000);
984263bc 2601 flags = MSG_DONTWAIT;
d8a9a23b
MD
2602 error = so_pru_soreceive(so, &nam, NULL, &sio,
2603 NULL, &flags);
6d49aa6f 2604 if (sio.sb_mb) {
984263bc 2605 struct nfsrv_rec *rec;
74f1caca 2606 int mf = (waitflag & MB_DONTWAIT) ?
da004c97 2607 M_NOWAIT : M_WAITOK;
77652cad 2608 rec = kmalloc(sizeof(struct nfsrv_rec),
da004c97 2609 M_NFSRVDESC, mf);
984263bc
MD
2610 if (!rec) {
2611 if (nam)
2612 FREE(nam, M_SONAME);
6d49aa6f 2613 m_freem(sio.sb_mb);
984263bc
MD
2614 continue;
2615 }
6d49aa6f 2616 nfs_realign(&sio.sb_mb, 10 * NFSX_UNSIGNED);
984263bc 2617 rec->nr_address = nam;
6d49aa6f 2618 rec->nr_packet = sio.sb_mb;
984263bc 2619 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
52553028
MD
2620 ++slp->ns_numrec;
2621 ++nparallel_wakeup;
984263bc
MD
2622 }
2623 if (error) {
2624 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2625 && error != EWOULDBLOCK) {
2626 slp->ns_flag |= SLP_DISCONN;
2627 goto dorecs;
2628 }
2629 }
6d49aa6f 2630 } while (sio.sb_mb);
984263bc
MD
2631 }
2632
2633 /*
3b101e2e
MD
2634 * If we were upcalled from the tcp protocol layer and we have
2635 * fully parsed records ready to go, or there is new data pending,
2636 * or something went wrong, try to wake up an nfsd thread to deal
2637 * with it.
984263bc
MD
2638 */
2639dorecs:
52553028 2640 if (waitflag == MB_DONTWAIT && (slp->ns_numrec > 0
3b101e2e 2641 || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) {
52553028 2642 nfsrv_wakenfsd(slp, nparallel_wakeup);
3b101e2e 2643 }
984263bc
MD
2644}
2645
2646/*
2647 * Try and extract an RPC request from the mbuf data list received on a
2648 * stream socket. The "waitflag" argument indicates whether or not it
2649 * can sleep.
2650 */
2651static int
52553028 2652nfsrv_getstream(struct nfssvc_sock *slp, int waitflag, int *countp)
984263bc 2653{
40393ded
RG
2654 struct mbuf *m, **mpp;
2655 char *cp1, *cp2;
2656 int len;
984263bc
MD
2657 struct mbuf *om, *m2, *recm;
2658 u_int32_t recmark;
2659
984263bc
MD
2660 for (;;) {
2661 if (slp->ns_reclen == 0) {
3b101e2e 2662 if (slp->ns_cc < NFSX_UNSIGNED)
984263bc 2663 return (0);
984263bc
MD
2664 m = slp->ns_raw;
2665 if (m->m_len >= NFSX_UNSIGNED) {
2666 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2667 m->m_data += NFSX_UNSIGNED;
2668 m->m_len -= NFSX_UNSIGNED;
2669 } else {
2670 cp1 = (caddr_t)&recmark;
2671 cp2 = mtod(m, caddr_t);
2672 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2673 while (m->m_len == 0) {
2674 m = m->m_next;
2675 cp2 = mtod(m, caddr_t);
2676 }
2677 *cp1++ = *cp2++;
2678 m->m_data++;
2679 m->m_len--;
2680 }
2681 }
2682 slp->ns_cc -= NFSX_UNSIGNED;
2683 recmark = ntohl(recmark);
2684 slp->ns_reclen = recmark & ~0x80000000;
2685 if (recmark & 0x80000000)
2686 slp->ns_flag |= SLP_LASTFRAG;
2687 else
2688 slp->ns_flag &= ~SLP_LASTFRAG;
43884890 2689 if (slp->ns_reclen > NFS_MAXPACKET || slp->ns_reclen <= 0) {
63cd2150
MD
2690 log(LOG_ERR, "%s (%d) from nfs client\n",
2691 "impossible packet length",
2692 slp->ns_reclen);
984263bc
MD
2693 return (EPERM);
2694 }
2695 }
2696
2697 /*
2698 * Now get the record part.
2699 *
2700 * Note that slp->ns_reclen may be 0. Linux sometimes
2701 * generates 0-length RPCs
2702 */
2703 recm = NULL;
2704 if (slp->ns_cc == slp->ns_reclen) {
2705 recm = slp->ns_raw;
60233e58 2706 slp->ns_raw = slp->ns_rawend = NULL;
984263bc
MD
2707 slp->ns_cc = slp->ns_reclen = 0;
2708 } else if (slp->ns_cc > slp->ns_reclen) {
2709 len = 0;
2710 m = slp->ns_raw;
60233e58 2711 om = NULL;
984263bc
MD
2712
2713 while (len < slp->ns_reclen) {
2714 if ((len + m->m_len) > slp->ns_reclen) {
2715 m2 = m_copym(m, 0, slp->ns_reclen - len,
2716 waitflag);
2717 if (m2) {
2718 if (om) {
2719 om->m_next = m2;
2720 recm = slp->ns_raw;
2721 } else
2722 recm = m2;
2723 m->m_data += slp->ns_reclen - len;
2724 m->m_len -= slp->ns_reclen - len;
2725 len = slp->ns_reclen;
2726 } else {
984263bc
MD
2727 return (EWOULDBLOCK);
2728 }
2729 } else if ((len + m->m_len) == slp->ns_reclen) {
2730 om = m;
2731 len += m->m_len;
2732 m = m->m_next;
2733 recm = slp->ns_raw;
60233e58 2734 om->m_next = NULL;
984263bc
MD
2735 } else {
2736 om = m;
2737 len += m->m_len;
2738 m = m->m_next;
2739 }
2740 }
2741 slp->ns_raw = m;
2742 slp->ns_cc -= len;
2743 slp->ns_reclen = 0;
2744 } else {
984263bc
MD
2745 return (0);
2746 }
2747
2748 /*
2749 * Accumulate the fragments into a record.
2750 */
2751 mpp = &slp->ns_frag;
2752 while (*mpp)
2753 mpp = &((*mpp)->m_next);
2754 *mpp = recm;
2755 if (slp->ns_flag & SLP_LASTFRAG) {
2756 struct nfsrv_rec *rec;
74f1caca 2757 int mf = (waitflag & MB_DONTWAIT) ? M_NOWAIT : M_WAITOK;
efda3bd0 2758 rec = kmalloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, mf);
984263bc
MD
2759 if (!rec) {
2760 m_freem(slp->ns_frag);
2761 } else {
2762 nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
60233e58 2763 rec->nr_address = NULL;
984263bc
MD
2764 rec->nr_packet = slp->ns_frag;
2765 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
52553028
MD
2766 ++slp->ns_numrec;
2767 ++*countp;
984263bc 2768 }
60233e58 2769 slp->ns_frag = NULL;
984263bc
MD
2770 }
2771 }
2772}
2773
7f3ffbb4
MD
2774#ifdef INVARIANTS
2775
2776/*
2777 * Sanity check our mbuf chain.
2778 */
2779static void
2780nfs_checkpkt(struct mbuf *m, int len)
2781{
2782 int xlen = 0;
2783 while (m) {
2784 xlen += m->m_len;
2785 m = m->m_next;
2786 }
2787 if (xlen != len) {
2788 panic("nfs_checkpkt: len mismatch %d/%d mbuf %p\n",
2789 xlen, len, m);
2790 }
2791}
2792
2793#else
2794
2795static void
2796nfs_checkpkt(struct mbuf *m __unused, int len __unused)
2797{
2798}
2799
2800#endif
2801
984263bc
MD
2802/*
2803 * Parse an RPC header.
2804 */
2805int
e851b29e
CP
2806nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
2807 struct nfsrv_descript **ndp)
984263bc
MD
2808{
2809 struct nfsrv_rec *rec;
40393ded 2810 struct mbuf *m;
984263bc 2811 struct sockaddr *nam;
40393ded 2812 struct nfsrv_descript *nd;
984263bc
MD
2813 int error;
2814
2815 *ndp = NULL;
2816 if ((slp->ns_flag & SLP_VALID) == 0 || !STAILQ_FIRST(&slp->ns_rec))
2817 return (ENOBUFS);
2818 rec = STAILQ_FIRST(&slp->ns_rec);
2819 STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
52553028
MD
2820 KKASSERT(slp->ns_numrec > 0);
2821 --slp->ns_numrec;
984263bc
MD
2822 nam = rec->nr_address;
2823 m = rec->nr_packet;
efda3bd0 2824 kfree(rec, M_NFSRVDESC);
984263bc
MD
2825 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
2826 M_NFSRVDESC, M_WAITOK);
2827 nd->nd_md = nd->nd_mrep = m;
2828 nd->nd_nam2 = nam;
2829 nd->nd_dpos = mtod(m, caddr_t);
2830 error = nfs_getreq(nd, nfsd, TRUE);
2831 if (error) {
2832 if (nam) {
2833 FREE(nam, M_SONAME);
2834 }
efda3bd0 2835 kfree((caddr_t)nd, M_NFSRVDESC);
984263bc
MD
2836 return (error);
2837 }
2838 *ndp = nd;
2839 nfsd->nfsd_nd = nd;
2840 return (0);
2841}
2842
2843/*
52553028
MD
2844 * Try to assign service sockets to nfsd threads based on the number
2845 * of new rpc requests that have been queued on the service socket.
2846 *
2847 * If no nfsd's are available or additonal requests are pending, set the
2848 * NFSD_CHECKSLP flag so that one of the running nfsds will go look for
2849 * the work in the nfssvc_sock list when it is finished processing its
2850 * current work. This flag is only cleared when an nfsd can not find
2851 * any new work to perform.
984263bc
MD
2852 */
2853void
52553028 2854nfsrv_wakenfsd(struct nfssvc_sock *slp, int nparallel)
984263bc 2855{
40393ded 2856 struct nfsd *nd;
984263bc
MD
2857
2858 if ((slp->ns_flag & SLP_VALID) == 0)
2859 return;
52553028
MD
2860 if (nparallel <= 1)
2861 nparallel = 1;
ecd80f47 2862 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
984263bc
MD
2863 if (nd->nfsd_flag & NFSD_WAITING) {
2864 nd->nfsd_flag &= ~NFSD_WAITING;
2865 if (nd->nfsd_slp)
2866 panic("nfsd wakeup");
2867 slp->ns_sref++;
2868 nd->nfsd_slp = slp;
2869 wakeup((caddr_t)nd);
52553028
MD
2870 if (--nparallel == 0)
2871 break;
984263bc
MD
2872 }
2873 }
52553028
MD
2874 if (nparallel) {
2875 slp->ns_flag |= SLP_DOREC;
2876 nfsd_head_flag |= NFSD_CHECKSLP;
2877 }
984263bc
MD
2878}
2879#endif /* NFS_NOSERVER */