2 * Copyright (c) 1989, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
37 * $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.60.2.6 2003/03/26 01:44:46 alfred Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.45 2007/05/18 17:05:13 dillon Exp $
42 * Socket operations for use by nfs
45 #include <sys/param.h>
46 #include <sys/systm.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/kernel.h>
52 #include <sys/vnode.h>
53 #include <sys/fcntl.h>
54 #include <sys/protosw.h>
55 #include <sys/resourcevar.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/socketops.h>
59 #include <sys/syslog.h>
60 #include <sys/thread.h>
61 #include <sys/tprintf.h>
62 #include <sys/sysctl.h>
63 #include <sys/signalvar.h>
64 #include <sys/mutex.h>
66 #include <sys/signal2.h>
67 #include <sys/mutex2.h>
69 #include <netinet/in.h>
70 #include <netinet/tcp.h>
71 #include <sys/thread2.h>
77 #include "nfsm_subs.h"
86 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
87 * Use the mean and mean deviation of rtt for the appropriate type of rpc
88 * for the frequent rpcs and a default for the others.
89 * The justification for doing "other" this way is that these rpcs
90 * happen so infrequently that timer est. would probably be stale.
91 * Also, since many of these rpcs are
92 * non-idempotent, a conservative timeout is desired.
93 * getattr, lookup - A+2D
97 #define NFS_RTO(n, t) \
98 ((t) == 0 ? (n)->nm_timeo : \
100 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
101 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
102 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
103 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
106 * Defines which timer to use for the procnum.
113 static int proct[NFS_NPROCS] = {
114 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
118 static int nfs_realign_test;
119 static int nfs_realign_count;
120 static int nfs_bufpackets = 4;
121 static int nfs_timer_raced;
123 SYSCTL_DECL(_vfs_nfs);
125 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
126 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
127 SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "");
129 static int nfs_request_setup(nfsm_info_t info);
130 static int nfs_request_auth(struct nfsreq *rep);
131 static int nfs_request_try(struct nfsreq *rep);
132 static int nfs_request_waitreply(struct nfsreq *rep);
133 static int nfs_request_processreply(nfsm_info_t info, int);
134 static void nfs_async_return(struct nfsmount *nmp, struct nfsreq *rep);
137 * There is a congestion window for outstanding rpcs maintained per mount
138 * point. The cwnd size is adjusted in roughly the way that:
139 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
140 * SIGCOMM '88". ACM, August 1988.
141 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
142 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
143 * of rpcs is in progress.
144 * (The sent count and cwnd are scaled for integer arith.)
145 * Variants of "slow start" were tried and were found to be too much of a
146 * performance hit (ave. rtt 3 times larger),
147 * I suspect due to the large rtt that nfs rpcs have.
149 #define NFS_CWNDSCALE 256
150 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
151 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
153 struct nfsrtt nfsrtt;
154 struct callout nfs_timer_handle;
156 static int nfs_msg (struct thread *,char *,char *);
157 static int nfs_rcvlock (struct nfsmount *nmp, struct nfsreq *myreq);
158 static void nfs_rcvunlock (struct nfsmount *nmp);
159 static void nfs_realign (struct mbuf **pm, int hsiz);
160 static int nfs_receive (struct nfsmount *nmp, struct nfsreq *rep,
161 struct sockaddr **aname, struct mbuf **mp);
162 static void nfs_softterm (struct nfsreq *rep);
163 static int nfs_reconnect (struct nfsmount *nmp, struct nfsreq *rep);
165 static int nfsrv_getstream (struct nfssvc_sock *, int, int *);
166 static void nfs_timer_req(struct nfsreq *req);
168 int (*nfsrv3_procs[NFS_NPROCS]) (struct nfsrv_descript *nd,
169 struct nfssvc_sock *slp,
171 struct mbuf **mreqp) = {
199 #endif /* NFS_NOSERVER */
202 * Initialize sockets and congestion for a new NFS connection.
203 * We do not free the sockaddr if error.
206 nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
209 int error, rcvreserve, sndreserve;
211 struct sockaddr *saddr;
212 struct sockaddr_in *sin;
213 struct thread *td = &thread0; /* only used for socreate and sobind */
217 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
218 nmp->nm_soproto, td);
222 nmp->nm_soflags = so->so_proto->pr_flags;
225 * Some servers require that the client port be a reserved port number.
227 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
230 struct sockaddr_in ssin;
232 bzero(&sopt, sizeof sopt);
233 ip = IP_PORTRANGE_LOW;
234 sopt.sopt_level = IPPROTO_IP;
235 sopt.sopt_name = IP_PORTRANGE;
236 sopt.sopt_val = (void *)&ip;
237 sopt.sopt_valsize = sizeof(ip);
239 error = sosetopt(so, &sopt);
242 bzero(&ssin, sizeof ssin);
244 sin->sin_len = sizeof (struct sockaddr_in);
245 sin->sin_family = AF_INET;
246 sin->sin_addr.s_addr = INADDR_ANY;
247 sin->sin_port = htons(0);
248 error = sobind(so, (struct sockaddr *)sin, td);
251 bzero(&sopt, sizeof sopt);
252 ip = IP_PORTRANGE_DEFAULT;
253 sopt.sopt_level = IPPROTO_IP;
254 sopt.sopt_name = IP_PORTRANGE;
255 sopt.sopt_val = (void *)&ip;
256 sopt.sopt_valsize = sizeof(ip);
258 error = sosetopt(so, &sopt);
264 * Protocols that do not require connections may be optionally left
265 * unconnected for servers that reply from a port other than NFS_PORT.
267 if (nmp->nm_flag & NFSMNT_NOCONN) {
268 if (nmp->nm_soflags & PR_CONNREQUIRED) {
273 error = soconnect(so, nmp->nm_nam, td);
278 * Wait for the connection to complete. Cribbed from the
279 * connect system call but with the wait timing out so
280 * that interruptible mounts don't hang here for a long time.
283 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
284 (void) tsleep((caddr_t)&so->so_timeo, 0,
286 if ((so->so_state & SS_ISCONNECTING) &&
287 so->so_error == 0 && rep &&
288 (error = nfs_sigintr(nmp, rep, rep->r_td)) != 0){
289 so->so_state &= ~SS_ISCONNECTING;
295 error = so->so_error;
302 so->so_rcv.ssb_timeo = (5 * hz);
303 so->so_snd.ssb_timeo = (5 * hz);
306 * Get buffer reservation size from sysctl, but impose reasonable
309 pktscale = nfs_bufpackets;
315 if (nmp->nm_sotype == SOCK_DGRAM) {
316 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
317 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
318 NFS_MAXPKTHDR) * pktscale;
319 } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
320 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
321 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
322 NFS_MAXPKTHDR) * pktscale;
324 if (nmp->nm_sotype != SOCK_STREAM)
325 panic("nfscon sotype");
326 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
330 bzero(&sopt, sizeof sopt);
331 sopt.sopt_level = SOL_SOCKET;
332 sopt.sopt_name = SO_KEEPALIVE;
333 sopt.sopt_val = &val;
334 sopt.sopt_valsize = sizeof val;
338 if (so->so_proto->pr_protocol == IPPROTO_TCP) {
342 bzero(&sopt, sizeof sopt);
343 sopt.sopt_level = IPPROTO_TCP;
344 sopt.sopt_name = TCP_NODELAY;
345 sopt.sopt_val = &val;
346 sopt.sopt_valsize = sizeof val;
350 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
351 sizeof (u_int32_t)) * pktscale;
352 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
353 sizeof (u_int32_t)) * pktscale;
355 error = soreserve(so, sndreserve, rcvreserve,
356 &td->td_proc->p_rlimit[RLIMIT_SBSIZE]);
359 so->so_rcv.ssb_flags |= SSB_NOINTR;
360 so->so_snd.ssb_flags |= SSB_NOINTR;
362 /* Initialize other non-zero congestion variables */
363 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
364 nmp->nm_srtt[3] = (NFS_TIMEO << 3);
365 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
366 nmp->nm_sdrtt[3] = 0;
367 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
369 nmp->nm_timeouts = 0;
379 * Called when a connection is broken on a reliable protocol.
380 * - clean up the old socket
381 * - nfs_connect() again
382 * - set R_MUSTRESEND for all outstanding requests on mount point
383 * If this fails the mount point is DEAD!
384 * nb: Must be called with the nfs_sndlock() set on the mount point.
387 nfs_reconnect(struct nfsmount *nmp, struct nfsreq *rep)
393 while ((error = nfs_connect(nmp, rep)) != 0) {
394 if (error == EINTR || error == ERESTART)
396 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
400 * Loop through outstanding request list and fix up all requests
404 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
405 KKASSERT(req->r_nmp == nmp);
406 req->r_flags |= R_MUSTRESEND;
413 * NFS disconnect. Clean up and unlink.
416 nfs_disconnect(struct nfsmount *nmp)
423 soshutdown(so, SHUT_RDWR);
424 soclose(so, FNONBLOCK);
429 nfs_safedisconnect(struct nfsmount *nmp)
431 nfs_rcvlock(nmp, NULL);
437 * This is the nfs send routine. For connection based socket types, it
438 * must be called with an nfs_sndlock() on the socket.
439 * "rep == NULL" indicates that it has been called from a server.
440 * For the client side:
441 * - return EINTR if the RPC is terminated, 0 otherwise
442 * - set R_MUSTRESEND if the send fails for any reason
443 * - do any cleanup required by recoverable socket errors (?)
444 * For the server side:
445 * - return EINTR or ERESTART if interrupted by a signal
446 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
447 * - do any cleanup required by recoverable socket errors (?)
450 nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
453 struct sockaddr *sendnam;
454 int error, soflags, flags;
457 if (rep->r_flags & R_SOFTTERM) {
461 if ((so = rep->r_nmp->nm_so) == NULL) {
462 rep->r_flags |= R_MUSTRESEND;
466 rep->r_flags &= ~R_MUSTRESEND;
467 soflags = rep->r_nmp->nm_soflags;
469 soflags = so->so_proto->pr_flags;
470 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
474 if (so->so_type == SOCK_SEQPACKET)
479 error = so_pru_sosend(so, sendnam, NULL, top, NULL, flags,
482 * ENOBUFS for dgram sockets is transient and non fatal.
483 * No need to log, and no need to break a soft mount.
485 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
487 if (rep) /* do backoff retransmit on client */
488 rep->r_flags |= R_MUSTRESEND;
493 log(LOG_INFO, "nfs send error %d for server %s\n",error,
494 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
496 * Deal with errors for the client side.
498 if (rep->r_flags & R_SOFTTERM)
501 rep->r_flags |= R_MUSTRESEND;
503 log(LOG_INFO, "nfsd send error %d\n", error);
506 * Handle any recoverable (soft) socket errors here. (?)
508 if (error != EINTR && error != ERESTART &&
509 error != EWOULDBLOCK && error != EPIPE)
516 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
517 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
518 * Mark and consolidate the data into a new mbuf list.
519 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
521 * For SOCK_STREAM we must be very careful to read an entire record once
522 * we have read any of it, even if the system call has been interrupted.
525 nfs_receive(struct nfsmount *nmp, struct nfsreq *rep,
526 struct sockaddr **aname, struct mbuf **mp)
533 struct mbuf *control;
535 struct sockaddr **getnam;
536 int error, sotype, rcvflg;
537 struct thread *td = curthread; /* XXX */
540 * Set up arguments for soreceive()
544 sotype = nmp->nm_sotype;
547 * For reliable protocols, lock against other senders/receivers
548 * in case a reconnect is necessary.
549 * For SOCK_STREAM, first get the Record Mark to find out how much
550 * more there is to get.
551 * We must lock the socket against other receivers
552 * until we have an entire rpc request/reply.
554 if (sotype != SOCK_DGRAM) {
555 error = nfs_sndlock(nmp, rep);
560 * Check for fatal errors and resending request.
563 * Ugh: If a reconnect attempt just happened, nm_so
564 * would have changed. NULL indicates a failed
565 * attempt that has essentially shut down this
568 if (rep && (rep->r_mrep || (rep->r_flags & R_SOFTTERM))) {
574 error = nfs_reconnect(nmp, rep);
581 while (rep && (rep->r_flags & R_MUSTRESEND)) {
582 m = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT);
583 nfsstats.rpcretries++;
584 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
586 if (error == EINTR || error == ERESTART ||
587 (error = nfs_reconnect(nmp, rep)) != 0) {
595 if (sotype == SOCK_STREAM) {
597 * Get the length marker from the stream
599 aio.iov_base = (caddr_t)&len;
600 aio.iov_len = sizeof(u_int32_t);
603 auio.uio_segflg = UIO_SYSSPACE;
604 auio.uio_rw = UIO_READ;
606 auio.uio_resid = sizeof(u_int32_t);
609 rcvflg = MSG_WAITALL;
610 error = so_pru_soreceive(so, NULL, &auio, NULL,
612 if (error == EWOULDBLOCK && rep) {
613 if (rep->r_flags & R_SOFTTERM)
616 } while (error == EWOULDBLOCK);
618 if (error == 0 && auio.uio_resid > 0) {
620 * Only log short packets if not EOF
622 if (auio.uio_resid != sizeof(u_int32_t))
624 "short receive (%d/%d) from nfs server %s\n",
625 (int)(sizeof(u_int32_t) - auio.uio_resid),
626 (int)sizeof(u_int32_t),
627 nmp->nm_mountp->mnt_stat.f_mntfromname);
632 len = ntohl(len) & ~0x80000000;
634 * This is SERIOUS! We are out of sync with the sender
635 * and forcing a disconnect/reconnect is all I can do.
637 if (len > NFS_MAXPACKET) {
638 log(LOG_ERR, "%s (%d) from nfs server %s\n",
639 "impossible packet length",
641 nmp->nm_mountp->mnt_stat.f_mntfromname);
647 * Get the rest of the packet as an mbuf chain
651 rcvflg = MSG_WAITALL;
652 error = so_pru_soreceive(so, NULL, NULL, &sio,
654 } while (error == EWOULDBLOCK || error == EINTR ||
656 if (error == 0 && sio.sb_cc != len) {
659 "short receive (%d/%d) from nfs server %s\n",
660 len - auio.uio_resid, len,
661 nmp->nm_mountp->mnt_stat.f_mntfromname);
667 * Non-stream, so get the whole packet by not
668 * specifying MSG_WAITALL and by specifying a large
671 * We have no use for control msg., but must grab them
672 * and then throw them away so we know what is going
675 sbinit(&sio, 100000000);
678 error = so_pru_soreceive(so, NULL, NULL, &sio,
682 if (error == EWOULDBLOCK && rep) {
683 if (rep->r_flags & R_SOFTTERM) {
688 } while (error == EWOULDBLOCK ||
689 (error == 0 && sio.sb_mb == NULL && control));
690 if ((rcvflg & MSG_EOR) == 0)
692 if (error == 0 && sio.sb_mb == NULL)
698 if (error && error != EINTR && error != ERESTART) {
701 if (error != EPIPE) {
703 "receive error %d from nfs server %s\n",
705 nmp->nm_mountp->mnt_stat.f_mntfromname);
707 error = nfs_sndlock(nmp, rep);
709 error = nfs_reconnect(nmp, rep);
717 if ((so = nmp->nm_so) == NULL)
719 if (so->so_state & SS_ISCONNECTED)
723 sbinit(&sio, 100000000);
726 error = so_pru_soreceive(so, getnam, NULL, &sio,
728 if (error == EWOULDBLOCK && rep &&
729 (rep->r_flags & R_SOFTTERM)) {
733 } while (error == EWOULDBLOCK);
742 * Search for any mbufs that are not a multiple of 4 bytes long
743 * or with m_data not longword aligned.
744 * These could cause pointer alignment problems, so copy them to
745 * well aligned mbufs.
747 nfs_realign(mp, 5 * NFSX_UNSIGNED);
752 * Implement receipt of reply on a socket.
754 * We must search through the list of received datagrams matching them
755 * with outstanding requests using the xid, until ours is found.
757 * If myrep is NULL we process packets on the socket until
758 * interrupted or until nm_reqrxq is non-empty.
762 nfs_reply(struct nfsmount *nmp, struct nfsreq *myrep)
765 struct sockaddr *nam;
769 struct nfsm_info info;
773 * Loop around until we get our own reply
777 * Lock against other receivers so that I don't get stuck in
778 * sbwait() after someone else has received my reply for me.
779 * Also necessary for connection based protocols to avoid
780 * race conditions during a reconnect.
782 * If nfs_rcvlock() returns EALREADY, that means that
783 * the reply has already been recieved by another
784 * process and we can return immediately. In this
785 * case, the lock is not taken to avoid races with
790 error = nfs_rcvlock(nmp, myrep);
791 if (error == EALREADY)
797 * If myrep is NULL we are the receiver helper thread.
798 * Stop waiting for incoming replies if there are
799 * replies sitting on reqrxq.
801 if (myrep == NULL && TAILQ_FIRST(&nmp->nm_reqrxq)) {
807 * Get the next Rpc reply off the socket
809 error = nfs_receive(nmp, myrep, &nam, &info.mrep);
813 * Ignore routing errors on connectionless protocols??
815 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
816 if (nmp->nm_so == NULL)
818 nmp->nm_so->so_error = 0;
827 * Get the xid and check that it is an rpc reply
830 info.dpos = mtod(info.md, caddr_t);
831 NULLOUT(tl = nfsm_dissect(&info, 2*NFSX_UNSIGNED));
833 if (*tl != rpc_reply) {
834 nfsstats.rpcinvalid++;
842 * Loop through the request list to match up the reply
843 * Iff no match, just drop the datagram. On match, set
844 * r_mrep atomically to prevent the timer from messing
845 * around with the request after we have exited the critical
849 TAILQ_FOREACH(rep, &nmp->nm_reqq, r_chain) {
850 if (rep->r_mrep == NULL && rxid == rep->r_xid)
856 * Fill in the rest of the reply if we found a match.
860 rep->r_dpos = info.dpos;
864 rt = &nfsrtt.rttl[nfsrtt.pos];
865 rt->proc = rep->r_procnum;
866 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
867 rt->sent = nmp->nm_sent;
868 rt->cwnd = nmp->nm_cwnd;
869 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
870 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
871 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
872 getmicrotime(&rt->tstamp);
873 if (rep->r_flags & R_TIMING)
874 rt->rtt = rep->r_rtt;
877 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
880 * Update congestion window.
881 * Do the additive increase of
884 if (nmp->nm_cwnd <= nmp->nm_sent) {
886 (NFS_CWNDSCALE * NFS_CWNDSCALE +
887 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
888 if (nmp->nm_cwnd > NFS_MAXCWND)
889 nmp->nm_cwnd = NFS_MAXCWND;
891 crit_enter(); /* nfs_timer interlock for nm_sent */
892 if (rep->r_flags & R_SENT) {
893 rep->r_flags &= ~R_SENT;
894 nmp->nm_sent -= NFS_CWNDSCALE;
898 * Update rtt using a gain of 0.125 on the mean
899 * and a gain of 0.25 on the deviation.
901 if (rep->r_flags & R_TIMING) {
903 * Since the timer resolution of
904 * NFS_HZ is so course, it can often
905 * result in r_rtt == 0. Since
906 * r_rtt == N means that the actual
907 * rtt is between N+dt and N+2-dt ticks,
911 t1 -= (NFS_SRTT(rep) >> 3);
915 t1 -= (NFS_SDRTT(rep) >> 2);
916 NFS_SDRTT(rep) += t1;
918 nmp->nm_timeouts = 0;
919 rep->r_mrep = info.mrep;
922 * Wakeup anyone waiting explicitly for this reply.
924 mtx_abort_ex_link(&rep->r_nmp->nm_rxlock, &rep->r_link);
927 * Asynchronous replies are bound-over to the
928 * rxthread. Note that nmp->nm_reqqlen is not
929 * decremented until the rxthread has finished
932 * async is sometimes temporarily turned off to
935 if (rep->r_info && rep->r_info->async) {
936 KKASSERT(rep->r_info->state ==
937 NFSM_STATE_WAITREPLY ||
938 rep->r_info->state ==
940 nfs_async_return(nmp, rep);
944 * If not matched to a request, drop it.
945 * If it's mine, get out.
948 nfsstats.rpcunexpected++;
951 } else if (rep == myrep) {
952 if (rep->r_mrep == NULL)
953 panic("nfsreply nil");
960 * Run the request state machine until the target state is reached
961 * or a fatal error occurs. The target state is not run. Specifying
962 * a target of NFSM_STATE_DONE runs the state machine until the rpc
965 * EINPROGRESS is returned for all states other then the DONE state,
966 * indicating that the rpc is still in progress.
969 nfs_request(struct nfsm_info *info, nfsm_state_t bstate, nfsm_state_t estate)
971 struct nfsmount *nmp = info->nmp;
974 while (info->state >= bstate && info->state < estate) {
975 switch(info->state) {
976 case NFSM_STATE_SETUP:
978 * Setup the nfsreq. Any error which occurs during
979 * this state is fatal.
981 info->error = nfs_request_setup(info);
983 info->state = NFSM_STATE_DONE;
984 return (info->error);
987 req->r_mrp = &info->mrep;
988 req->r_mdp = &info->md;
989 req->r_dposp = &info->dpos;
990 info->state = NFSM_STATE_AUTH;
993 case NFSM_STATE_AUTH:
995 * Authenticate the nfsreq. Any error which occurs
996 * during this state is fatal.
998 info->error = nfs_request_auth(info->req);
1000 info->state = NFSM_STATE_DONE;
1001 return (info->error);
1003 info->state = NFSM_STATE_TRY;
1006 case NFSM_STATE_TRY:
1008 * Transmit or retransmit attempt. An error in this
1009 * state is ignored and we always move on to the
1012 * This can trivially race the receiver if the
1013 * request is asynchronous. Temporarily turn
1014 * off async mode so the structure doesn't get
1015 * ripped out from under us, and resolve the
1020 info->error = nfs_request_try(info->req);
1023 KKASSERT(info->state == NFSM_STATE_TRY);
1024 if (info->req->r_mrep)
1025 nfs_async_return(nmp, info->req);
1027 info->state = NFSM_STATE_WAITREPLY;
1030 info->error = nfs_request_try(info->req);
1031 info->state = NFSM_STATE_WAITREPLY;
1035 * The backend can rip the request out from under
1036 * is at this point. If we were async the estate
1037 * will be set to WAITREPLY. Return immediately.
1039 if (estate == NFSM_STATE_WAITREPLY)
1040 return (EINPROGRESS);
1042 case NFSM_STATE_WAITREPLY:
1044 * Wait for a reply or timeout and move on to the
1045 * next state. The error returned by this state
1046 * is passed to the processing code in the next
1049 info->error = nfs_request_waitreply(info->req);
1050 info->state = NFSM_STATE_PROCESSREPLY;
1052 case NFSM_STATE_PROCESSREPLY:
1054 * Process the reply or timeout. Errors which occur
1055 * in this state may cause the state machine to
1056 * go back to an earlier state, and are fatal
1059 info->error = nfs_request_processreply(info,
1061 switch(info->error) {
1063 info->state = NFSM_STATE_AUTH;
1066 info->state = NFSM_STATE_TRY;
1070 * Operation complete, with or without an
1071 * error. We are done.
1074 info->state = NFSM_STATE_DONE;
1075 return (info->error);
1078 case NFSM_STATE_DONE:
1080 * Shouldn't be reached
1082 return (info->error);
1088 * If we are done return the error code (if any).
1089 * Otherwise return EINPROGRESS.
1091 if (info->state == NFSM_STATE_DONE)
1092 return (info->error);
1093 return (EINPROGRESS);
1097 * nfs_request - goes something like this
1098 * - fill in request struct
1099 * - links it into list
1100 * - calls nfs_send() for first transmit
1101 * - calls nfs_receive() to get reply
1102 * - break down rpc header and return with nfs reply pointed to
1104 * nb: always frees up mreq mbuf list
1107 nfs_request_setup(nfsm_info_t info)
1110 struct nfsmount *nmp;
1115 * Reject requests while attempting a forced unmount.
1117 if (info->vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
1118 m_freem(info->mreq);
1122 nmp = VFSTONFS(info->vp->v_mount);
1123 req = kmalloc(sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
1125 req->r_vp = info->vp;
1126 req->r_td = info->td;
1127 req->r_procnum = info->procnum;
1135 req->r_mrest = info->mreq;
1136 req->r_mrest_len = i;
1137 req->r_cred = info->cred;
1140 * The presence of a non-NULL r_info in req indicates
1141 * async completion via our helper threads. See the receiver
1144 req->r_info = info->async ? info : NULL;
1150 nfs_request_auth(struct nfsreq *rep)
1152 struct nfsmount *nmp = rep->r_nmp;
1154 char nickv[RPCX_NICKVERF];
1155 int error = 0, auth_len, auth_type;
1158 char *auth_str, *verf_str;
1162 rep->r_failed_auth = 0;
1165 * Get the RPC header with authorization.
1167 verf_str = auth_str = NULL;
1168 if (nmp->nm_flag & NFSMNT_KERB) {
1170 verf_len = sizeof (nickv);
1171 auth_type = RPCAUTH_KERB4;
1172 bzero((caddr_t)rep->r_key, sizeof(rep->r_key));
1173 if (rep->r_failed_auth ||
1174 nfs_getnickauth(nmp, cred, &auth_str, &auth_len,
1175 verf_str, verf_len)) {
1176 error = nfs_getauth(nmp, rep, cred, &auth_str,
1177 &auth_len, verf_str, &verf_len, rep->r_key);
1179 m_freem(rep->r_mrest);
1180 rep->r_mrest = NULL;
1181 kfree((caddr_t)rep, M_NFSREQ);
1186 auth_type = RPCAUTH_UNIX;
1187 if (cred->cr_ngroups < 1)
1188 panic("nfsreq nogrps");
1189 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
1190 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
1193 m = nfsm_rpchead(cred, nmp->nm_flag, rep->r_procnum, auth_type,
1194 auth_len, auth_str, verf_len, verf_str,
1195 rep->r_mrest, rep->r_mrest_len, &rep->r_mheadend, &xid);
1196 rep->r_mrest = NULL;
1198 kfree(auth_str, M_TEMP);
1201 * For stream protocols, insert a Sun RPC Record Mark.
1203 if (nmp->nm_sotype == SOCK_STREAM) {
1204 M_PREPEND(m, NFSX_UNSIGNED, MB_WAIT);
1206 kfree(rep, M_NFSREQ);
1209 *mtod(m, u_int32_t *) = htonl(0x80000000 |
1210 (m->m_pkthdr.len - NFSX_UNSIGNED));
1218 nfs_request_try(struct nfsreq *rep)
1220 struct nfsmount *nmp = rep->r_nmp;
1224 if (nmp->nm_flag & NFSMNT_SOFT)
1225 rep->r_retry = nmp->nm_retry;
1227 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
1228 rep->r_rtt = rep->r_rexmit = 0;
1229 if (proct[rep->r_procnum] > 0)
1230 rep->r_flags = R_TIMING | R_MASKTIMER;
1232 rep->r_flags = R_MASKTIMER;
1236 * Do the client side RPC.
1238 nfsstats.rpcrequests++;
1241 * Chain request into list of outstanding requests. Be sure
1242 * to put it LAST so timer finds oldest requests first. Note
1243 * that R_MASKTIMER is set at the moment to prevent any timer
1244 * action on this request while we are still doing processing on
1245 * it below. splsoftclock() primarily protects nm_sent. Note
1246 * that we may block in this code so there is no atomicy guarentee.
1249 mtx_link_init(&rep->r_link);
1250 TAILQ_INSERT_TAIL(&nmp->nm_reqq, rep, r_chain);/* XXX */
1252 nfssvc_iod_reader_wakeup(nmp);
1257 * If backing off another request or avoiding congestion, don't
1258 * send this one now but let timer do it. If not timing a request,
1261 * Even though the timer will not mess with our request there is
1262 * still the possibility that we will race a reply (which clears
1263 * R_SENT), especially on localhost connections, so be very careful
1264 * when setting R_SENT. We could set R_SENT prior to calling
1265 * nfs_send() but why bother if the response occurs that quickly?
1267 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
1268 (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1269 nmp->nm_sent < nmp->nm_cwnd)) {
1270 if (nmp->nm_soflags & PR_CONNREQUIRED)
1271 error = nfs_sndlock(nmp, rep);
1273 m2 = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT);
1274 error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
1275 if (nmp->nm_soflags & PR_CONNREQUIRED)
1278 if (!error && (rep->r_flags & R_MUSTRESEND) == 0 &&
1279 rep->r_mrep == NULL) {
1280 KASSERT((rep->r_flags & R_SENT) == 0,
1281 ("R_SENT ASSERT %p", rep));
1282 nmp->nm_sent += NFS_CWNDSCALE;
1283 rep->r_flags |= R_SENT;
1291 * Let the timer do what it will with the request, then
1292 * wait for the reply from our send or the timer's.
1295 rep->r_flags &= ~R_MASKTIMER;
1301 nfs_request_waitreply(struct nfsreq *rep)
1303 struct nfsmount *nmp = rep->r_nmp;
1306 error = nfs_reply(nmp, rep);
1310 * RPC done, unlink the request, but don't rip it out from under
1311 * the callout timer.
1313 while (rep->r_flags & R_LOCKED) {
1314 nfs_timer_raced = 1;
1315 tsleep(&nfs_timer_raced, 0, "nfstrac", 0);
1317 TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain);
1321 * Decrement the outstanding request count.
1323 if (rep->r_flags & R_SENT) {
1324 rep->r_flags &= ~R_SENT;
1325 nmp->nm_sent -= NFS_CWNDSCALE;
1333 * Process reply with error returned from nfs_requet_waitreply().
1335 * Returns EAGAIN if it wants us to loop up to nfs_request_try() again.
1336 * Returns ENEEDAUTH if it wants us to loop up to nfs_request_auth() again.
1339 nfs_request_processreply(nfsm_info_t info, int error)
1341 struct nfsreq *req = info->req;
1342 struct nfsmount *nmp = req->r_nmp;
1345 int trylater_delay = 15, trylater_cnt = 0;
1350 * If there was a successful reply and a tprintf msg.
1351 * tprintf a response.
1353 if (error == 0 && (req->r_flags & R_TPRINTFMSG)) {
1354 nfs_msg(req->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname,
1357 info->mrep = req->r_mrep;
1358 info->md = req->r_md;
1359 info->dpos = req->r_dpos;
1361 m_freem(req->r_mreq);
1363 kfree(req, M_NFSREQ);
1369 * break down the rpc header and check if ok
1371 NULLOUT(tl = nfsm_dissect(info, 3 * NFSX_UNSIGNED));
1372 if (*tl++ == rpc_msgdenied) {
1373 if (*tl == rpc_mismatch) {
1375 } else if ((nmp->nm_flag & NFSMNT_KERB) &&
1376 *tl++ == rpc_autherr) {
1377 if (req->r_failed_auth == 0) {
1378 req->r_failed_auth++;
1379 req->r_mheadend->m_next = NULL;
1380 m_freem(info->mrep);
1382 m_freem(req->r_mreq);
1390 m_freem(info->mrep);
1392 m_freem(req->r_mreq);
1394 kfree(req, M_NFSREQ);
1400 * Grab any Kerberos verifier, otherwise just throw it away.
1402 verf_type = fxdr_unsigned(int, *tl++);
1403 i = fxdr_unsigned(int32_t, *tl);
1404 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
1405 error = nfs_savenickauth(nmp, req->r_cred, i, req->r_key,
1406 &info->md, &info->dpos, info->mrep);
1410 ERROROUT(nfsm_adv(info, nfsm_rndup(i)));
1412 NULLOUT(tl = nfsm_dissect(info, NFSX_UNSIGNED));
1415 NULLOUT(tl = nfsm_dissect(info, NFSX_UNSIGNED));
1417 error = fxdr_unsigned(int, *tl);
1418 if ((nmp->nm_flag & NFSMNT_NFSV3) &&
1419 error == NFSERR_TRYLATER) {
1420 m_freem(info->mrep);
1423 waituntil = time_second + trylater_delay;
1424 while (time_second < waituntil)
1425 (void) tsleep((caddr_t)&lbolt,
1427 trylater_delay *= nfs_backoff[trylater_cnt];
1428 if (trylater_cnt < 7)
1430 req->r_flags &= ~R_MASKTIMER;
1431 return (EAGAIN); /* goto tryagain */
1435 * If the File Handle was stale, invalidate the
1436 * lookup cache, just in case.
1438 * To avoid namecache<->vnode deadlocks we must
1439 * release the vnode lock if we hold it.
1441 if (error == ESTALE) {
1442 struct vnode *vp = req->r_vp;
1445 ltype = lockstatus(&vp->v_lock, curthread);
1446 if (ltype == LK_EXCLUSIVE || ltype == LK_SHARED)
1447 lockmgr(&vp->v_lock, LK_RELEASE);
1448 cache_inval_vp(vp, CINV_CHILDREN);
1449 if (ltype == LK_EXCLUSIVE || ltype == LK_SHARED)
1450 lockmgr(&vp->v_lock, ltype);
1452 if (nmp->nm_flag & NFSMNT_NFSV3) {
1453 KKASSERT(*req->r_mrp == info->mrep);
1454 KKASSERT(*req->r_mdp == info->md);
1455 KKASSERT(*req->r_dposp == info->dpos);
1456 error |= NFSERR_RETERR;
1458 m_freem(info->mrep);
1461 m_freem(req->r_mreq);
1463 kfree(req, M_NFSREQ);
1468 KKASSERT(*req->r_mrp == info->mrep);
1469 KKASSERT(*req->r_mdp == info->md);
1470 KKASSERT(*req->r_dposp == info->dpos);
1471 m_freem(req->r_mreq);
1473 FREE(req, M_NFSREQ);
1476 m_freem(info->mrep);
1478 error = EPROTONOSUPPORT;
1480 m_freem(req->r_mreq);
1482 kfree(req, M_NFSREQ);
1487 #ifndef NFS_NOSERVER
1489 * Generate the rpc reply header
1490 * siz arg. is used to decide if adding a cluster is worthwhile
1493 nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp,
1494 int err, struct mbuf **mrq, struct mbuf **mbp, caddr_t *bposp)
1497 struct nfsm_info info;
1499 siz += RPC_REPLYSIZ;
1500 info.mb = m_getl(max_hdr + siz, MB_WAIT, MT_DATA, M_PKTHDR, NULL);
1501 info.mreq = info.mb;
1502 info.mreq->m_pkthdr.len = 0;
1504 * If this is not a cluster, try and leave leading space
1505 * for the lower level headers.
1507 if ((max_hdr + siz) < MINCLSIZE)
1508 info.mreq->m_data += max_hdr;
1509 tl = mtod(info.mreq, u_int32_t *);
1510 info.mreq->m_len = 6 * NFSX_UNSIGNED;
1511 info.bpos = ((caddr_t)tl) + info.mreq->m_len;
1512 *tl++ = txdr_unsigned(nd->nd_retxid);
1514 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) {
1515 *tl++ = rpc_msgdenied;
1516 if (err & NFSERR_AUTHERR) {
1517 *tl++ = rpc_autherr;
1518 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
1519 info.mreq->m_len -= NFSX_UNSIGNED;
1520 info.bpos -= NFSX_UNSIGNED;
1522 *tl++ = rpc_mismatch;
1523 *tl++ = txdr_unsigned(RPC_VER2);
1524 *tl = txdr_unsigned(RPC_VER2);
1527 *tl++ = rpc_msgaccepted;
1530 * For Kerberos authentication, we must send the nickname
1531 * verifier back, otherwise just RPCAUTH_NULL.
1533 if (nd->nd_flag & ND_KERBFULL) {
1534 struct nfsuid *nuidp;
1535 struct timeval ktvin, ktvout;
1537 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
1538 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1539 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
1540 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp),
1541 &nuidp->nu_haddr, nd->nd_nam2)))
1546 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
1548 txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1551 * Encrypt the timestamp in ecb mode using the
1558 *tl++ = rpc_auth_kerb;
1559 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED);
1560 *tl = ktvout.tv_sec;
1561 tl = nfsm_build(&info, 3 * NFSX_UNSIGNED);
1562 *tl++ = ktvout.tv_usec;
1563 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
1574 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
1577 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1578 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
1579 *tl++ = txdr_unsigned(2);
1580 *tl = txdr_unsigned(3);
1583 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
1586 *tl = txdr_unsigned(RPC_GARBAGE);
1590 if (err != NFSERR_RETVOID) {
1591 tl = nfsm_build(&info, NFSX_UNSIGNED);
1593 *tl = txdr_unsigned(nfsrv_errmap(nd, err));
1605 if (err != 0 && err != NFSERR_RETVOID)
1606 nfsstats.srvrpc_errs++;
1611 #endif /* NFS_NOSERVER */
1614 * Scan the nfsreq list and retranmit any requests that have timed out
1615 * To avoid retransmission attempts on STREAM sockets (in the future) make
1616 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1619 nfs_timer(void *arg /* never used */)
1621 struct nfsmount *nmp;
1623 #ifndef NFS_NOSERVER
1624 struct nfssvc_sock *slp;
1626 #endif /* NFS_NOSERVER */
1629 TAILQ_FOREACH(nmp, &nfs_mountq, nm_entry) {
1630 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
1631 KKASSERT(nmp == req->r_nmp);
1633 (req->r_flags & (R_SOFTTERM|R_MASKTIMER))) {
1636 req->r_flags |= R_LOCKED;
1637 if (nfs_sigintr(nmp, req, req->r_td)) {
1642 req->r_flags &= ~R_LOCKED;
1645 #ifndef NFS_NOSERVER
1648 * Scan the write gathering queues for writes that need to be
1651 cur_usec = nfs_curusec();
1652 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
1653 if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
1654 nfsrv_wakenfsd(slp, 1);
1656 #endif /* NFS_NOSERVER */
1659 * Due to possible blocking, a client operation may be waiting for
1660 * us to finish processing this request so it can remove it.
1662 if (nfs_timer_raced) {
1663 nfs_timer_raced = 0;
1664 wakeup(&nfs_timer_raced);
1667 callout_reset(&nfs_timer_handle, nfs_ticks, nfs_timer, NULL);
1672 nfs_timer_req(struct nfsreq *req)
1674 struct thread *td = &thread0; /* XXX for creds, will break if sleep */
1675 struct nfsmount *nmp = req->r_nmp;
1681 if (req->r_rtt >= 0) {
1683 if (nmp->nm_flag & NFSMNT_DUMBTIMR)
1684 timeo = nmp->nm_timeo;
1686 timeo = NFS_RTO(nmp, proct[req->r_procnum]);
1687 if (nmp->nm_timeouts > 0)
1688 timeo *= nfs_backoff[nmp->nm_timeouts - 1];
1689 if (req->r_rtt <= timeo)
1691 if (nmp->nm_timeouts < 8)
1695 * Check for server not responding
1697 if ((req->r_flags & R_TPRINTFMSG) == 0 &&
1698 req->r_rexmit > nmp->nm_deadthresh) {
1700 nmp->nm_mountp->mnt_stat.f_mntfromname,
1702 req->r_flags |= R_TPRINTFMSG;
1704 if (req->r_rexmit >= req->r_retry) { /* too many */
1705 nfsstats.rpctimeouts++;
1709 if (nmp->nm_sotype != SOCK_DGRAM) {
1710 if (++req->r_rexmit > NFS_MAXREXMIT)
1711 req->r_rexmit = NFS_MAXREXMIT;
1714 if ((so = nmp->nm_so) == NULL)
1718 * If there is enough space and the window allows..
1720 * Set r_rtt to -1 in case we fail to send it now.
1723 if (ssb_space(&so->so_snd) >= req->r_mreq->m_pkthdr.len &&
1724 ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
1725 (req->r_flags & R_SENT) ||
1726 nmp->nm_sent < nmp->nm_cwnd) &&
1727 (m = m_copym(req->r_mreq, 0, M_COPYALL, MB_DONTWAIT))){
1728 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1729 error = so_pru_send(so, 0, m, NULL, NULL, td);
1731 error = so_pru_send(so, 0, m, nmp->nm_nam,
1734 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1736 } else if (req->r_mrep == NULL) {
1738 * Iff first send, start timing
1739 * else turn timing off, backoff timer
1740 * and divide congestion window by 2.
1742 * It is possible for the so_pru_send() to
1743 * block and for us to race a reply so we
1744 * only do this if the reply field has not
1745 * been filled in. R_LOCKED will prevent
1746 * the request from being ripped out from under
1749 if (req->r_flags & R_SENT) {
1750 req->r_flags &= ~R_TIMING;
1751 if (++req->r_rexmit > NFS_MAXREXMIT)
1752 req->r_rexmit = NFS_MAXREXMIT;
1754 if (nmp->nm_cwnd < NFS_CWNDSCALE)
1755 nmp->nm_cwnd = NFS_CWNDSCALE;
1756 nfsstats.rpcretries++;
1758 req->r_flags |= R_SENT;
1759 nmp->nm_sent += NFS_CWNDSCALE;
1767 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1768 * wait for all requests to complete. This is used by forced unmounts
1769 * to terminate any outstanding RPCs.
1772 nfs_nmcancelreqs(struct nfsmount *nmp)
1778 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
1779 if (nmp != req->r_nmp || req->r_mrep != NULL ||
1780 (req->r_flags & R_SOFTTERM)) {
1785 /* XXX the other two queues as well */
1788 for (i = 0; i < 30; i++) {
1790 TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) {
1791 if (nmp == req->r_nmp)
1797 tsleep(&lbolt, 0, "nfscancel", 0);
1803 nfs_async_return(struct nfsmount *nmp, struct nfsreq *rep)
1805 KKASSERT(rep->r_info->state == NFSM_STATE_TRY ||
1806 rep->r_info->state == NFSM_STATE_WAITREPLY);
1807 rep->r_info->state = NFSM_STATE_PROCESSREPLY;
1808 TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain);
1809 if (rep->r_flags & R_SENT) {
1810 rep->r_flags &= ~R_SENT;
1811 nmp->nm_sent -= NFS_CWNDSCALE;
1814 TAILQ_INSERT_TAIL(&nmp->nm_reqrxq, rep, r_chain);
1815 nfssvc_iod_reader_wakeup(nmp);
1819 * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
1820 * The nm_send count is decremented now to avoid deadlocks when the process in
1821 * soreceive() hasn't yet managed to send its own request.
1823 * This routine must be called at splsoftclock() to protect r_flags and
1827 nfs_softterm(struct nfsreq *rep)
1829 struct nfsmount *nmp = rep->r_nmp;
1831 rep->r_flags |= R_SOFTTERM;
1833 if (rep->r_flags & R_SENT) {
1834 rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
1835 rep->r_flags &= ~R_SENT;
1839 * Asynchronous replies are bound-over to the
1840 * rxthread. Note that nmp->nm_reqqlen is not
1841 * decremented until the rxthread has finished
1844 if (rep->r_info && rep->r_info->async)
1845 nfs_async_return(nmp, rep);
1849 * Test for a termination condition pending on the process.
1850 * This is used for NFSMNT_INT mounts.
1853 nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
1859 if (rep && (rep->r_flags & R_SOFTTERM))
1861 /* Terminate all requests while attempting a forced unmount. */
1862 if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
1864 if (!(nmp->nm_flag & NFSMNT_INT))
1866 /* td might be NULL YYY */
1867 if (td == NULL || (p = td->td_proc) == NULL)
1871 tmpset = lwp_sigpend(lp);
1872 SIGSETNAND(tmpset, lp->lwp_sigmask);
1873 SIGSETNAND(tmpset, p->p_sigignore);
1874 if (SIGNOTEMPTY(tmpset) && NFSINT_SIGMASK(tmpset))
1881 * Lock a socket against others.
1882 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1883 * and also to avoid race conditions between the processes with nfs requests
1884 * in progress when a reconnect is necessary.
1887 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep)
1889 mtx_t mtx = &nmp->nm_txlock;
1897 td = rep ? rep->r_td : NULL;
1898 if (nmp->nm_flag & NFSMNT_INT)
1901 while ((error = mtx_lock_ex_try(mtx)) != 0) {
1902 if (nfs_sigintr(nmp, rep, td)) {
1906 error = mtx_lock_ex(mtx, "nfsndlck", slpflag, slptimeo);
1909 if (slpflag == PCATCH) {
1914 /* Always fail if our request has been cancelled. */
1915 if (rep && (rep->r_flags & R_SOFTTERM)) {
1924 * Unlock the stream socket for others.
1927 nfs_sndunlock(struct nfsmount *nmp)
1929 mtx_unlock(&nmp->nm_txlock);
1933 * Lock the receiver side of the socket.
1938 nfs_rcvlock(struct nfsmount *nmp, struct nfsreq *rep)
1940 mtx_t mtx = &nmp->nm_rxlock;
1946 * Unconditionally check for completion in case another nfsiod
1947 * get the packet while the caller was blocked, before the caller
1948 * called us. Packet reception is handled by mainline code which
1949 * is protected by the BGL at the moment.
1951 * We do not strictly need the second check just before the
1952 * tsleep(), but it's good defensive programming.
1954 if (rep && rep->r_mrep != NULL)
1957 if (nmp->nm_flag & NFSMNT_INT)
1963 while ((error = mtx_lock_ex_try(mtx)) != 0) {
1964 if (nfs_sigintr(nmp, rep, (rep ? rep->r_td : NULL))) {
1968 if (rep && rep->r_mrep != NULL) {
1974 * NOTE: can return ENOLCK, but in that case rep->r_mrep
1975 * will already be set.
1978 error = mtx_lock_ex_link(mtx, &rep->r_link,
1982 error = mtx_lock_ex(mtx, "nfsrcvlk", slpflag, slptimeo);
1988 * If our reply was recieved while we were sleeping,
1989 * then just return without taking the lock to avoid a
1990 * situation where a single iod could 'capture' the
1993 if (rep && rep->r_mrep != NULL) {
1997 if (slpflag == PCATCH) {
2003 if (rep && rep->r_mrep != NULL) {
2012 * Unlock the stream socket for others.
2015 nfs_rcvunlock(struct nfsmount *nmp)
2017 mtx_unlock(&nmp->nm_rxlock);
2023 * Check for badly aligned mbuf data and realign by copying the unaligned
2024 * portion of the data into a new mbuf chain and freeing the portions
2025 * of the old chain that were replaced.
2027 * We cannot simply realign the data within the existing mbuf chain
2028 * because the underlying buffers may contain other rpc commands and
2029 * we cannot afford to overwrite them.
2031 * We would prefer to avoid this situation entirely. The situation does
2032 * not occur with NFS/UDP and is supposed to only occassionally occur
2033 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
2036 nfs_realign(struct mbuf **pm, int hsiz)
2039 struct mbuf *n = NULL;
2044 while ((m = *pm) != NULL) {
2045 if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
2046 n = m_getl(m->m_len, MB_WAIT, MT_DATA, 0, NULL);
2054 * If n is non-NULL, loop on m copying data, then replace the
2055 * portion of the chain that had to be realigned.
2058 ++nfs_realign_count;
2060 m_copyback(n, off, m->m_len, mtod(m, caddr_t));
2069 #ifndef NFS_NOSERVER
2072 * Parse an RPC request
2074 * - fill in the cred struct.
2077 nfs_getreq(struct nfsrv_descript *nd, struct nfsd *nfsd, int has_header)
2084 u_int32_t nfsvers, auth_type;
2086 int error = 0, ticklen;
2087 struct nfsuid *nuidp;
2088 struct timeval tvin, tvout;
2089 struct nfsm_info info;
2090 #if 0 /* until encrypted keys are implemented */
2091 NFSKERBKEYSCHED_T keys; /* stores key schedule */
2094 info.mrep = nd->nd_mrep;
2095 info.md = nd->nd_md;
2096 info.dpos = nd->nd_dpos;
2099 NULLOUT(tl = nfsm_dissect(&info, 10 * NFSX_UNSIGNED));
2100 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
2101 if (*tl++ != rpc_call) {
2106 NULLOUT(tl = nfsm_dissect(&info, 8 * NFSX_UNSIGNED));
2110 if (*tl++ != rpc_vers) {
2111 nd->nd_repstat = ERPCMISMATCH;
2112 nd->nd_procnum = NFSPROC_NOOP;
2115 if (*tl != nfs_prog) {
2116 nd->nd_repstat = EPROGUNAVAIL;
2117 nd->nd_procnum = NFSPROC_NOOP;
2121 nfsvers = fxdr_unsigned(u_int32_t, *tl++);
2122 if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) {
2123 nd->nd_repstat = EPROGMISMATCH;
2124 nd->nd_procnum = NFSPROC_NOOP;
2127 if (nfsvers == NFS_VER3)
2128 nd->nd_flag = ND_NFSV3;
2129 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
2130 if (nd->nd_procnum == NFSPROC_NULL)
2132 if (nd->nd_procnum >= NFS_NPROCS ||
2133 (nd->nd_procnum >= NQNFSPROC_GETLEASE) ||
2134 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
2135 nd->nd_repstat = EPROCUNAVAIL;
2136 nd->nd_procnum = NFSPROC_NOOP;
2139 if ((nd->nd_flag & ND_NFSV3) == 0)
2140 nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
2142 len = fxdr_unsigned(int, *tl++);
2143 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2148 nd->nd_flag &= ~ND_KERBAUTH;
2150 * Handle auth_unix or auth_kerb.
2152 if (auth_type == rpc_auth_unix) {
2153 len = fxdr_unsigned(int, *++tl);
2154 if (len < 0 || len > NFS_MAXNAMLEN) {
2158 ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2159 NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2160 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
2161 nd->nd_cr.cr_ref = 1;
2162 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
2163 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
2164 len = fxdr_unsigned(int, *tl);
2165 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
2169 NULLOUT(tl = nfsm_dissect(&info, (len + 2) * NFSX_UNSIGNED));
2170 for (i = 1; i <= len; i++)
2172 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
2175 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
2176 if (nd->nd_cr.cr_ngroups > 1)
2177 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
2178 len = fxdr_unsigned(int, *++tl);
2179 if (len < 0 || len > RPCAUTH_MAXSIZ) {
2184 ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
2186 } else if (auth_type == rpc_auth_kerb) {
2187 switch (fxdr_unsigned(int, *tl++)) {
2188 case RPCAKN_FULLNAME:
2189 ticklen = fxdr_unsigned(int, *tl);
2190 *((u_int32_t *)nfsd->nfsd_authstr) = *tl;
2191 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
2192 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
2193 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
2200 uio.uio_segflg = UIO_SYSSPACE;
2201 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
2202 iov.iov_len = RPCAUTH_MAXSIZ - 4;
2203 ERROROUT(nfsm_mtouio(&info, &uio, uio.uio_resid));
2204 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2205 if (*tl++ != rpc_auth_kerb ||
2206 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) {
2207 kprintf("Bad kerb verifier\n");
2208 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2209 nd->nd_procnum = NFSPROC_NOOP;
2212 NULLOUT(cp = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
2213 tl = (u_int32_t *)cp;
2214 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
2215 kprintf("Not fullname kerb verifier\n");
2216 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2217 nd->nd_procnum = NFSPROC_NOOP;
2220 cp += NFSX_UNSIGNED;
2221 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
2222 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
2223 nd->nd_flag |= ND_KERBFULL;
2224 nfsd->nfsd_flag |= NFSD_NEEDAUTH;
2226 case RPCAKN_NICKNAME:
2227 if (len != 2 * NFSX_UNSIGNED) {
2228 kprintf("Kerb nickname short\n");
2229 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED);
2230 nd->nd_procnum = NFSPROC_NOOP;
2233 nickuid = fxdr_unsigned(uid_t, *tl);
2234 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2235 if (*tl++ != rpc_auth_kerb ||
2236 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) {
2237 kprintf("Kerb nick verifier bad\n");
2238 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF);
2239 nd->nd_procnum = NFSPROC_NOOP;
2242 NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
2243 tvin.tv_sec = *tl++;
2246 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
2247 nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
2248 if (nuidp->nu_cr.cr_uid == nickuid &&
2250 netaddr_match(NU_NETFAM(nuidp),
2251 &nuidp->nu_haddr, nd->nd_nam2)))
2256 (NFSERR_AUTHERR|AUTH_REJECTCRED);
2257 nd->nd_procnum = NFSPROC_NOOP;
2262 * Now, decrypt the timestamp using the session key
2269 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
2270 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
2271 if (nuidp->nu_expire < time_second ||
2272 nuidp->nu_timestamp.tv_sec > tvout.tv_sec ||
2273 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
2274 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
2275 nuidp->nu_expire = 0;
2277 (NFSERR_AUTHERR|AUTH_REJECTVERF);
2278 nd->nd_procnum = NFSPROC_NOOP;
2281 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
2282 nd->nd_flag |= ND_KERBNICK;
2285 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED);
2286 nd->nd_procnum = NFSPROC_NOOP;
2290 nd->nd_md = info.md;
2291 nd->nd_dpos = info.dpos;
2300 * Send a message to the originating process's terminal. The thread and/or
2301 * process may be NULL. YYY the thread should not be NULL but there may
2302 * still be some uio_td's that are still being passed as NULL through to
2306 nfs_msg(struct thread *td, char *server, char *msg)
2310 if (td && td->td_proc)
2311 tpr = tprintf_open(td->td_proc);
2314 tprintf(tpr, "nfs server %s: %s\n", server, msg);
2319 #ifndef NFS_NOSERVER
2321 * Socket upcall routine for the nfsd sockets.
2322 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2323 * Essentially do as much as possible non-blocking, else punt and it will
2324 * be called with MB_WAIT from an nfsd.
2327 nfsrv_rcv(struct socket *so, void *arg, int waitflag)
2329 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
2331 struct sockaddr *nam;
2334 int nparallel_wakeup = 0;
2336 if ((slp->ns_flag & SLP_VALID) == 0)
2340 * Do not allow an infinite number of completed RPC records to build
2341 * up before we stop reading data from the socket. Otherwise we could
2342 * end up holding onto an unreasonable number of mbufs for requests
2343 * waiting for service.
2345 * This should give pretty good feedback to the TCP
2346 * layer and prevents a memory crunch for other protocols.
2348 * Note that the same service socket can be dispatched to several
2349 * nfs servers simultaniously.
2351 * the tcp protocol callback calls us with MB_DONTWAIT.
2352 * nfsd calls us with MB_WAIT (typically).
2354 if (waitflag == MB_DONTWAIT && slp->ns_numrec >= nfsd_waiting / 2 + 1) {
2355 slp->ns_flag |= SLP_NEEDQ;
2360 * Handle protocol specifics to parse an RPC request. We always
2361 * pull from the socket using non-blocking I/O.
2363 if (so->so_type == SOCK_STREAM) {
2365 * The data has to be read in an orderly fashion from a TCP
2366 * stream, unlike a UDP socket. It is possible for soreceive
2367 * and/or nfsrv_getstream() to block, so make sure only one
2368 * entity is messing around with the TCP stream at any given
2369 * moment. The receive sockbuf's lock in soreceive is not
2372 * Note that this procedure can be called from any number of
2373 * NFS severs *OR* can be upcalled directly from a TCP
2376 if (slp->ns_flag & SLP_GETSTREAM) {
2377 slp->ns_flag |= SLP_NEEDQ;
2380 slp->ns_flag |= SLP_GETSTREAM;
2383 * Do soreceive(). Pull out as much data as possible without
2386 sbinit(&sio, 1000000000);
2387 flags = MSG_DONTWAIT;
2388 error = so_pru_soreceive(so, &nam, NULL, &sio, NULL, &flags);
2389 if (error || sio.sb_mb == NULL) {
2390 if (error == EWOULDBLOCK)
2391 slp->ns_flag |= SLP_NEEDQ;
2393 slp->ns_flag |= SLP_DISCONN;
2394 slp->ns_flag &= ~SLP_GETSTREAM;
2398 if (slp->ns_rawend) {
2399 slp->ns_rawend->m_next = m;
2400 slp->ns_cc += sio.sb_cc;
2403 slp->ns_cc = sio.sb_cc;
2410 * Now try and parse as many record(s) as we can out of the
2413 error = nfsrv_getstream(slp, waitflag, &nparallel_wakeup);
2416 slp->ns_flag |= SLP_DISCONN;
2418 slp->ns_flag |= SLP_NEEDQ;
2420 slp->ns_flag &= ~SLP_GETSTREAM;
2423 * For UDP soreceive typically pulls just one packet, loop
2424 * to get the whole batch.
2427 sbinit(&sio, 1000000000);
2428 flags = MSG_DONTWAIT;
2429 error = so_pru_soreceive(so, &nam, NULL, &sio,
2432 struct nfsrv_rec *rec;
2433 int mf = (waitflag & MB_DONTWAIT) ?
2434 M_NOWAIT : M_WAITOK;
2435 rec = kmalloc(sizeof(struct nfsrv_rec),
2439 FREE(nam, M_SONAME);
2443 nfs_realign(&sio.sb_mb, 10 * NFSX_UNSIGNED);
2444 rec->nr_address = nam;
2445 rec->nr_packet = sio.sb_mb;
2446 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
2451 if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
2452 && error != EWOULDBLOCK) {
2453 slp->ns_flag |= SLP_DISCONN;
2457 } while (sio.sb_mb);
2461 * If we were upcalled from the tcp protocol layer and we have
2462 * fully parsed records ready to go, or there is new data pending,
2463 * or something went wrong, try to wake up an nfsd thread to deal
2467 if (waitflag == MB_DONTWAIT && (slp->ns_numrec > 0
2468 || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) {
2469 nfsrv_wakenfsd(slp, nparallel_wakeup);
2474 * Try and extract an RPC request from the mbuf data list received on a
2475 * stream socket. The "waitflag" argument indicates whether or not it
2479 nfsrv_getstream(struct nfssvc_sock *slp, int waitflag, int *countp)
2481 struct mbuf *m, **mpp;
2484 struct mbuf *om, *m2, *recm;
2488 if (slp->ns_reclen == 0) {
2489 if (slp->ns_cc < NFSX_UNSIGNED)
2492 if (m->m_len >= NFSX_UNSIGNED) {
2493 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
2494 m->m_data += NFSX_UNSIGNED;
2495 m->m_len -= NFSX_UNSIGNED;
2497 cp1 = (caddr_t)&recmark;
2498 cp2 = mtod(m, caddr_t);
2499 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
2500 while (m->m_len == 0) {
2502 cp2 = mtod(m, caddr_t);
2509 slp->ns_cc -= NFSX_UNSIGNED;
2510 recmark = ntohl(recmark);
2511 slp->ns_reclen = recmark & ~0x80000000;
2512 if (recmark & 0x80000000)
2513 slp->ns_flag |= SLP_LASTFRAG;
2515 slp->ns_flag &= ~SLP_LASTFRAG;
2516 if (slp->ns_reclen > NFS_MAXPACKET || slp->ns_reclen <= 0) {
2517 log(LOG_ERR, "%s (%d) from nfs client\n",
2518 "impossible packet length",
2525 * Now get the record part.
2527 * Note that slp->ns_reclen may be 0. Linux sometimes
2528 * generates 0-length RPCs
2531 if (slp->ns_cc == slp->ns_reclen) {
2533 slp->ns_raw = slp->ns_rawend = NULL;
2534 slp->ns_cc = slp->ns_reclen = 0;
2535 } else if (slp->ns_cc > slp->ns_reclen) {
2540 while (len < slp->ns_reclen) {
2541 if ((len + m->m_len) > slp->ns_reclen) {
2542 m2 = m_copym(m, 0, slp->ns_reclen - len,
2550 m->m_data += slp->ns_reclen - len;
2551 m->m_len -= slp->ns_reclen - len;
2552 len = slp->ns_reclen;
2554 return (EWOULDBLOCK);
2556 } else if ((len + m->m_len) == slp->ns_reclen) {
2576 * Accumulate the fragments into a record.
2578 mpp = &slp->ns_frag;
2580 mpp = &((*mpp)->m_next);
2582 if (slp->ns_flag & SLP_LASTFRAG) {
2583 struct nfsrv_rec *rec;
2584 int mf = (waitflag & MB_DONTWAIT) ? M_NOWAIT : M_WAITOK;
2585 rec = kmalloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, mf);
2587 m_freem(slp->ns_frag);
2589 nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
2590 rec->nr_address = NULL;
2591 rec->nr_packet = slp->ns_frag;
2592 STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
2596 slp->ns_frag = NULL;
2602 * Parse an RPC header.
2605 nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
2606 struct nfsrv_descript **ndp)
2608 struct nfsrv_rec *rec;
2610 struct sockaddr *nam;
2611 struct nfsrv_descript *nd;
2615 if ((slp->ns_flag & SLP_VALID) == 0 || !STAILQ_FIRST(&slp->ns_rec))
2617 rec = STAILQ_FIRST(&slp->ns_rec);
2618 STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
2619 KKASSERT(slp->ns_numrec > 0);
2621 nam = rec->nr_address;
2623 kfree(rec, M_NFSRVDESC);
2624 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
2625 M_NFSRVDESC, M_WAITOK);
2626 nd->nd_md = nd->nd_mrep = m;
2628 nd->nd_dpos = mtod(m, caddr_t);
2629 error = nfs_getreq(nd, nfsd, TRUE);
2632 FREE(nam, M_SONAME);
2634 kfree((caddr_t)nd, M_NFSRVDESC);
2643 * Try to assign service sockets to nfsd threads based on the number
2644 * of new rpc requests that have been queued on the service socket.
2646 * If no nfsd's are available or additonal requests are pending, set the
2647 * NFSD_CHECKSLP flag so that one of the running nfsds will go look for
2648 * the work in the nfssvc_sock list when it is finished processing its
2649 * current work. This flag is only cleared when an nfsd can not find
2650 * any new work to perform.
2653 nfsrv_wakenfsd(struct nfssvc_sock *slp, int nparallel)
2657 if ((slp->ns_flag & SLP_VALID) == 0)
2661 TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) {
2662 if (nd->nfsd_flag & NFSD_WAITING) {
2663 nd->nfsd_flag &= ~NFSD_WAITING;
2665 panic("nfsd wakeup");
2668 wakeup((caddr_t)nd);
2669 if (--nparallel == 0)
2674 slp->ns_flag |= SLP_DOREC;
2675 nfsd_head_flag |= NFSD_CHECKSLP;
2678 #endif /* NFS_NOSERVER */