gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1991, 1993, 1995
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* This code is derived from software contributed to Berkeley by
	6	* Rick Macklem at The University of Guelph.
	7	*
	8	* Redistribution and use in source and binary forms, with or without
	9	* modification, are permitted provided that the following conditions
	10	* are met:
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* 3. All advertising materials mentioning features or use of this software
	17	* must display the following acknowledgement:
	18	* This product includes software developed by the University of
	19	* California, Berkeley and its contributors.
	20	* 4. Neither the name of the University nor the names of its contributors
	21	* may be used to endorse or promote products derived from this software
	22	* without specific prior written permission.
	23	*
	24	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	25	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	26	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	27	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	28	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	29	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	30	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	31	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	32	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	33	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	34	* SUCH DAMAGE.
	35	*
	36	* @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
	37	* $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.60.2.6 2003/03/26 01:44:46 alfred Exp $
	38	* $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.15 2004/04/07 05:15:48 dillon Exp $
	39	*/
	40
	41	/*
	42	* Socket operations for use by nfs
	43	*/
	44
	45	#include <sys/param.h>
	46	#include <sys/systm.h>
	47	#include <sys/proc.h>
	48	#include <sys/malloc.h>
	49	#include <sys/mount.h>
	50	#include <sys/kernel.h>
	51	#include <sys/mbuf.h>
	52	#include <sys/vnode.h>
	53	#include <sys/protosw.h>
	54	#include <sys/resourcevar.h>
	55	#include <sys/socket.h>
	56	#include <sys/socketvar.h>
	57	#include <sys/socketops.h>
	58	#include <sys/syslog.h>
	59	#include <sys/thread.h>
	60	#include <sys/tprintf.h>
	61	#include <sys/sysctl.h>
	62	#include <sys/signalvar.h>
	63
	64	#include <netinet/in.h>
	65	#include <netinet/tcp.h>
	66	#include <sys/thread2.h>
	67
	68	#include "rpcv2.h"
	69	#include "nfsproto.h"
	70	#include "nfs.h"
	71	#include "xdr_subs.h"
	72	#include "nfsm_subs.h"
	73	#include "nfsmount.h"
	74	#include "nfsnode.h"
	75	#include "nfsrtt.h"
	76	#include "nqnfs.h"
	77
	78	#define TRUE 1
	79	#define FALSE 0
	80
	81	/*
	82	* Estimate rto for an nfs rpc sent via. an unreliable datagram.
	83	* Use the mean and mean deviation of rtt for the appropriate type of rpc
	84	* for the frequent rpcs and a default for the others.
	85	* The justification for doing "other" this way is that these rpcs
	86	* happen so infrequently that timer est. would probably be stale.
	87	* Also, since many of these rpcs are
	88	* non-idempotent, a conservative timeout is desired.
	89	* getattr, lookup - A+2D
	90	* read, write - A+4D
	91	* other - nm_timeo
	92	*/
	93	#define NFS_RTO(n, t) \
	94	((t) == 0 ? (n)->nm_timeo : \
	95	((t) < 3 ? \
	96	(((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
	97	((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
	98	#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
	99	#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
	100	/*
	101	* External data, mostly RPC constants in XDR form
	102	*/
	103	extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers,
	104	rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr,
	105	rpc_auth_kerb;
	106	extern u_int32_t nfs_prog, nqnfs_prog;
	107	extern time_t nqnfsstarttime;
	108	extern struct nfsstats nfsstats;
	109	extern int nfsv3_procid[NFS_NPROCS];
	110	extern int nfs_ticks;
	111
	112	/*
	113	* Defines which timer to use for the procnum.
	114	* 0 - default
	115	* 1 - getattr
	116	* 2 - lookup
	117	* 3 - read
	118	* 4 - write
	119	*/
	120	static int proct[NFS_NPROCS] = {
	121	0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
	122	0, 0, 0,
	123	};
	124
	125	static int nfs_realign_test;
	126	static int nfs_realign_count;
	127	static int nfs_bufpackets = 4;
	128
	129	SYSCTL_DECL(_vfs_nfs);
	130
	131	SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, "");
	132	SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, "");
	133	SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, "");
	134
	135
	136	/*
	137	* There is a congestion window for outstanding rpcs maintained per mount
	138	* point. The cwnd size is adjusted in roughly the way that:
	139	* Van Jacobson, Congestion avoidance and Control, In "Proceedings of
	140	* SIGCOMM '88". ACM, August 1988.
	141	* describes for TCP. The cwnd size is chopped in half on a retransmit timeout
	142	* and incremented by 1/cwnd when each rpc reply is received and a full cwnd
	143	* of rpcs is in progress.
	144	* (The sent count and cwnd are scaled for integer arith.)
	145	* Variants of "slow start" were tried and were found to be too much of a
	146	* performance hit (ave. rtt 3 times larger),
	147	* I suspect due to the large rtt that nfs rpcs have.
	148	*/
	149	#define NFS_CWNDSCALE 256
	150	#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
	151	static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
	152	int nfsrtton = 0;
	153	struct nfsrtt nfsrtt;
	154	struct callout_handle nfs_timer_handle;
	155
	156	static int nfs_msg (struct thread ,char ,char *);
	157	static int nfs_rcvlock (struct nfsreq *);
	158	static void nfs_rcvunlock (struct nfsreq *);
	159	static void nfs_realign (struct mbuf **pm, int hsiz);
	160	static int nfs_receive (struct nfsreq rep, struct sockaddr *aname,
	161	struct mbuf **mp);
	162	static void nfs_softterm (struct nfsreq *rep);
	163	static int nfs_reconnect (struct nfsreq *rep);
	164	#ifndef NFS_NOSERVER
	165	static int nfsrv_getstream (struct nfssvc_sock *,int);
	166
	167	int (nfsrv3_procs[NFS_NPROCS]) (struct nfsrv_descript nd,
	168	struct nfssvc_sock *slp,
	169	struct thread *td,
	170	struct mbuf **mreqp) = {
	171	nfsrv_null,
	172	nfsrv_getattr,
	173	nfsrv_setattr,
	174	nfsrv_lookup,
	175	nfsrv3_access,
	176	nfsrv_readlink,
	177	nfsrv_read,
	178	nfsrv_write,
	179	nfsrv_create,
	180	nfsrv_mkdir,
	181	nfsrv_symlink,
	182	nfsrv_mknod,
	183	nfsrv_remove,
	184	nfsrv_rmdir,
	185	nfsrv_rename,
	186	nfsrv_link,
	187	nfsrv_readdir,
	188	nfsrv_readdirplus,
	189	nfsrv_statfs,
	190	nfsrv_fsinfo,
	191	nfsrv_pathconf,
	192	nfsrv_commit,
	193	nqnfsrv_getlease,
	194	nqnfsrv_vacated,
	195	nfsrv_noop,
	196	nfsrv_noop
	197	};
	198	#endif /* NFS_NOSERVER */
	199
	200	/*
	201	* Initialize sockets and congestion for a new NFS connection.
	202	* We do not free the sockaddr if error.
	203	*/
	204	int
	205	nfs_connect(struct nfsmount nmp, struct nfsreq rep)
	206	{
	207	struct socket *so;
	208	int s, error, rcvreserve, sndreserve;
	209	int pktscale;
	210	struct sockaddr *saddr;
	211	struct sockaddr_in *sin;
	212	struct thread td = &thread0; / only used for socreate and sobind */
	213
	214	nmp->nm_so = (struct socket *)0;
	215	saddr = nmp->nm_nam;
	216	error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
	217	nmp->nm_soproto, td);
	218	if (error)
	219	goto bad;
	220	so = nmp->nm_so;
	221	nmp->nm_soflags = so->so_proto->pr_flags;
	222
	223	/*
	224	* Some servers require that the client port be a reserved port number.
	225	*/
	226	if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
	227	struct sockopt sopt;
	228	int ip;
	229	struct sockaddr_in ssin;
	230
	231	bzero(&sopt, sizeof sopt);
	232	ip = IP_PORTRANGE_LOW;
	233	sopt.sopt_dir = SOPT_SET;
	234	sopt.sopt_level = IPPROTO_IP;
	235	sopt.sopt_name = IP_PORTRANGE;
	236	sopt.sopt_val = (void *)&ip;
	237	sopt.sopt_valsize = sizeof(ip);
	238	sopt.sopt_td = NULL;
	239	error = sosetopt(so, &sopt);
	240	if (error)
	241	goto bad;
	242	bzero(&ssin, sizeof ssin);
	243	sin = &ssin;
	244	sin->sin_len = sizeof (struct sockaddr_in);
	245	sin->sin_family = AF_INET;
	246	sin->sin_addr.s_addr = INADDR_ANY;
	247	sin->sin_port = htons(0);
	248	error = sobind(so, (struct sockaddr *)sin, td);
	249	if (error)
	250	goto bad;
	251	bzero(&sopt, sizeof sopt);
	252	ip = IP_PORTRANGE_DEFAULT;
	253	sopt.sopt_dir = SOPT_SET;
	254	sopt.sopt_level = IPPROTO_IP;
	255	sopt.sopt_name = IP_PORTRANGE;
	256	sopt.sopt_val = (void *)&ip;
	257	sopt.sopt_valsize = sizeof(ip);
	258	sopt.sopt_td = NULL;
	259	error = sosetopt(so, &sopt);
	260	if (error)
	261	goto bad;
	262	}
	263
	264	/*
	265	* Protocols that do not require connections may be optionally left
	266	* unconnected for servers that reply from a port other than NFS_PORT.
	267	*/
	268	if (nmp->nm_flag & NFSMNT_NOCONN) {
	269	if (nmp->nm_soflags & PR_CONNREQUIRED) {
	270	error = ENOTCONN;
	271	goto bad;
	272	}
	273	} else {
	274	error = soconnect(so, nmp->nm_nam, td);
	275	if (error)
	276	goto bad;
	277
	278	/*
	279	* Wait for the connection to complete. Cribbed from the
	280	* connect system call but with the wait timing out so
	281	* that interruptible mounts don't hang here for a long time.
	282	*/
	283	s = splnet();
	284	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
	285	(void) tsleep((caddr_t)&so->so_timeo, 0,
	286	"nfscon", 2 * hz);
	287	if ((so->so_state & SS_ISCONNECTING) &&
	288	so->so_error == 0 && rep &&
	289	(error = nfs_sigintr(nmp, rep, rep->r_td)) != 0){
	290	so->so_state &= ~SS_ISCONNECTING;
	291	splx(s);
	292	goto bad;
	293	}
	294	}
	295	if (so->so_error) {
	296	error = so->so_error;
	297	so->so_error = 0;
	298	splx(s);
	299	goto bad;
	300	}
	301	splx(s);
	302	}
	303	so->so_rcv.sb_timeo = (5 * hz);
	304	so->so_snd.sb_timeo = (5 * hz);
	305
	306	/*
	307	* Get buffer reservation size from sysctl, but impose reasonable
	308	* limits.
	309	*/
	310	pktscale = nfs_bufpackets;
	311	if (pktscale < 2)
	312	pktscale = 2;
	313	if (pktscale > 64)
	314	pktscale = 64;
	315
	316	if (nmp->nm_sotype == SOCK_DGRAM) {
	317	sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
	318	rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
	319	NFS_MAXPKTHDR) * pktscale;
	320	} else if (nmp->nm_sotype == SOCK_SEQPACKET) {
	321	sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * pktscale;
	322	rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) +
	323	NFS_MAXPKTHDR) * pktscale;
	324	} else {
	325	if (nmp->nm_sotype != SOCK_STREAM)
	326	panic("nfscon sotype");
	327	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
	328	struct sockopt sopt;
	329	int val;
	330
	331	bzero(&sopt, sizeof sopt);
	332	sopt.sopt_level = SOL_SOCKET;
	333	sopt.sopt_name = SO_KEEPALIVE;
	334	sopt.sopt_val = &val;
	335	sopt.sopt_valsize = sizeof val;
	336	val = 1;
	337	sosetopt(so, &sopt);
	338	}
	339	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
	340	struct sockopt sopt;
	341	int val;
	342
	343	bzero(&sopt, sizeof sopt);
	344	sopt.sopt_level = IPPROTO_TCP;
	345	sopt.sopt_name = TCP_NODELAY;
	346	sopt.sopt_val = &val;
	347	sopt.sopt_valsize = sizeof val;
	348	val = 1;
	349	sosetopt(so, &sopt);
	350	}
	351	sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR +
	352	sizeof (u_int32_t)) * pktscale;
	353	rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR +
	354	sizeof (u_int32_t)) * pktscale;
	355	}
	356	error = soreserve(so, sndreserve, rcvreserve,
	357	&td->td_proc->p_rlimit[RLIMIT_SBSIZE]);
	358	if (error)
	359	goto bad;
	360	so->so_rcv.sb_flags \|= SB_NOINTR;
	361	so->so_snd.sb_flags \|= SB_NOINTR;
	362
	363	/* Initialize other non-zero congestion variables */
	364	nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
	365	nmp->nm_srtt[3] = (NFS_TIMEO << 3);
	366	nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
	367	nmp->nm_sdrtt[3] = 0;
	368	nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
	369	nmp->nm_sent = 0;
	370	nmp->nm_timeouts = 0;
	371	return (0);
	372
	373	bad:
	374	nfs_disconnect(nmp);
	375	return (error);
	376	}
	377
	378	/*
	379	* Reconnect routine:
	380	* Called when a connection is broken on a reliable protocol.
	381	* - clean up the old socket
	382	* - nfs_connect() again
	383	* - set R_MUSTRESEND for all outstanding requests on mount point
	384	* If this fails the mount point is DEAD!
	385	* nb: Must be called with the nfs_sndlock() set on the mount point.
	386	*/
	387	static int
	388	nfs_reconnect(rep)
	389	struct nfsreq *rep;
	390	{
	391	struct nfsreq *rp;
	392	struct nfsmount *nmp = rep->r_nmp;
	393	int error;
	394
	395	nfs_disconnect(nmp);
	396	while ((error = nfs_connect(nmp, rep)) != 0) {
	397	if (error == EINTR \|\| error == ERESTART)
	398	return (EINTR);
	399	(void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
	400	}
	401
	402	/*
	403	* Loop through outstanding request list and fix up all requests
	404	* on old socket.
	405	*/
	406	for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) {
	407	if (rp->r_nmp == nmp)
	408	rp->r_flags \|= R_MUSTRESEND;
	409	}
	410	return (0);
	411	}
	412
	413	/*
	414	* NFS disconnect. Clean up and unlink.
	415	*/
	416	void
	417	nfs_disconnect(nmp)
	418	struct nfsmount *nmp;
	419	{
	420	struct socket *so;
	421
	422	if (nmp->nm_so) {
	423	so = nmp->nm_so;
	424	nmp->nm_so = (struct socket *)0;
	425	soshutdown(so, 2);
	426	soclose(so);
	427	}
	428	}
	429
	430	void
	431	nfs_safedisconnect(nmp)
	432	struct nfsmount *nmp;
	433	{
	434	struct nfsreq dummyreq;
	435
	436	bzero(&dummyreq, sizeof(dummyreq));
	437	dummyreq.r_nmp = nmp;
	438	dummyreq.r_td = NULL;
	439	nfs_rcvlock(&dummyreq);
	440	nfs_disconnect(nmp);
	441	nfs_rcvunlock(&dummyreq);
	442	}
	443
	444	/*
	445	* This is the nfs send routine. For connection based socket types, it
	446	* must be called with an nfs_sndlock() on the socket.
	447	* "rep == NULL" indicates that it has been called from a server.
	448	* For the client side:
	449	* - return EINTR if the RPC is terminated, 0 otherwise
	450	* - set R_MUSTRESEND if the send fails for any reason
	451	* - do any cleanup required by recoverable socket errors (?)
	452	* For the server side:
	453	* - return EINTR or ERESTART if interrupted by a signal
	454	* - return EPIPE if a connection is lost for connection based sockets (TCP...)
	455	* - do any cleanup required by recoverable socket errors (?)
	456	*/
	457	int
	458	nfs_send(so, nam, top, rep)
	459	struct socket *so;
	460	struct sockaddr *nam;
	461	struct mbuf *top;
	462	struct nfsreq *rep;
	463	{
	464	struct sockaddr *sendnam;
	465	int error, soflags, flags;
	466
	467	if (rep) {
	468	if (rep->r_flags & R_SOFTTERM) {
	469	m_freem(top);
	470	return (EINTR);
	471	}
	472	if ((so = rep->r_nmp->nm_so) == NULL) {
	473	rep->r_flags \|= R_MUSTRESEND;
	474	m_freem(top);
	475	return (0);
	476	}
	477	rep->r_flags &= ~R_MUSTRESEND;
	478	soflags = rep->r_nmp->nm_soflags;
	479	} else
	480	soflags = so->so_proto->pr_flags;
	481	if ((soflags & PR_CONNREQUIRED) \|\| (so->so_state & SS_ISCONNECTED))
	482	sendnam = (struct sockaddr *)0;
	483	else
	484	sendnam = nam;
	485	if (so->so_type == SOCK_SEQPACKET)
	486	flags = MSG_EOR;
	487	else
	488	flags = 0;
	489
	490	error = so_pru_sosend(so, sendnam, NULL, top, NULL, flags,
	491	curthread /XXX/);
	492	/*
	493	* ENOBUFS for dgram sockets is transient and non fatal.
	494	* No need to log, and no need to break a soft mount.
	495	*/
	496	if (error == ENOBUFS && so->so_type == SOCK_DGRAM) {
	497	error = 0;
	498	if (rep) /* do backoff retransmit on client */
	499	rep->r_flags \|= R_MUSTRESEND;
	500	}
	501
	502	if (error) {
	503	if (rep) {
	504	log(LOG_INFO, "nfs send error %d for server %s\n",error,
	505	rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
	506	/*
	507	* Deal with errors for the client side.
	508	*/
	509	if (rep->r_flags & R_SOFTTERM)
	510	error = EINTR;
	511	else
	512	rep->r_flags \|= R_MUSTRESEND;
	513	} else
	514	log(LOG_INFO, "nfsd send error %d\n", error);
	515
	516	/*
	517	* Handle any recoverable (soft) socket errors here. (?)
	518	*/
	519	if (error != EINTR && error != ERESTART &&
	520	error != EWOULDBLOCK && error != EPIPE)
	521	error = 0;
	522	}
	523	return (error);
	524	}
	525
	526	/*
	527	* Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
	528	* done by soreceive(), but for SOCK_STREAM we must deal with the Record
	529	* Mark and consolidate the data into a new mbuf list.
	530	* nb: Sometimes TCP passes the data up to soreceive() in long lists of
	531	* small mbufs.
	532	* For SOCK_STREAM we must be very careful to read an entire record once
	533	* we have read any of it, even if the system call has been interrupted.
	534	*/
	535	static int
	536	nfs_receive(struct nfsreq rep, struct sockaddr aname, struct mbuf *mp)
	537	{
	538	struct socket *so;
	539	struct uio auio;
	540	struct iovec aio;
	541	struct mbuf *m;
	542	struct mbuf *control;
	543	u_int32_t len;
	544	struct sockaddr **getnam;
	545	int error, sotype, rcvflg;
	546	struct thread td = curthread; / XXX */
	547
	548	/*
	549	* Set up arguments for soreceive()
	550	*/
	551	mp = (struct mbuf )0;
	552	aname = (struct sockaddr )0;
	553	sotype = rep->r_nmp->nm_sotype;
	554
	555	/*
	556	* For reliable protocols, lock against other senders/receivers
	557	* in case a reconnect is necessary.
	558	* For SOCK_STREAM, first get the Record Mark to find out how much
	559	* more there is to get.
	560	* We must lock the socket against other receivers
	561	* until we have an entire rpc request/reply.
	562	*/
	563	if (sotype != SOCK_DGRAM) {
	564	error = nfs_sndlock(rep);
	565	if (error)
	566	return (error);
	567	tryagain:
	568	/*
	569	* Check for fatal errors and resending request.
	570	*/
	571	/*
	572	* Ugh: If a reconnect attempt just happened, nm_so
	573	* would have changed. NULL indicates a failed
	574	* attempt that has essentially shut down this
	575	* mount point.
	576	*/
	577	if (rep->r_mrep \|\| (rep->r_flags & R_SOFTTERM)) {
	578	nfs_sndunlock(rep);
	579	return (EINTR);
	580	}
	581	so = rep->r_nmp->nm_so;
	582	if (!so) {
	583	error = nfs_reconnect(rep);
	584	if (error) {
	585	nfs_sndunlock(rep);
	586	return (error);
	587	}
	588	goto tryagain;
	589	}
	590	while (rep->r_flags & R_MUSTRESEND) {
	591	m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
	592	nfsstats.rpcretries++;
	593	error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
	594	if (error) {
	595	if (error == EINTR \|\| error == ERESTART \|\|
	596	(error = nfs_reconnect(rep)) != 0) {
	597	nfs_sndunlock(rep);
	598	return (error);
	599	}
	600	goto tryagain;
	601	}
	602	}
	603	nfs_sndunlock(rep);
	604	if (sotype == SOCK_STREAM) {
	605	aio.iov_base = (caddr_t) &len;
	606	aio.iov_len = sizeof(u_int32_t);
	607	auio.uio_iov = &aio;
	608	auio.uio_iovcnt = 1;
	609	auio.uio_segflg = UIO_SYSSPACE;
	610	auio.uio_rw = UIO_READ;
	611	auio.uio_offset = 0;
	612	auio.uio_resid = sizeof(u_int32_t);
	613	auio.uio_td = td;
	614	do {
	615	rcvflg = MSG_WAITALL;
	616	error = so_pru_soreceive(so, NULL, &auio, NULL,
	617	NULL, &rcvflg);
	618	if (error == EWOULDBLOCK && rep) {
	619	if (rep->r_flags & R_SOFTTERM)
	620	return (EINTR);
	621	}
	622	} while (error == EWOULDBLOCK);
	623	if (!error && auio.uio_resid > 0) {
	624	/*
	625	* Don't log a 0 byte receive; it means
	626	* that the socket has been closed, and
	627	* can happen during normal operation
	628	* (forcible unmount or Solaris server).
	629	*/
	630	if (auio.uio_resid != sizeof (u_int32_t))
	631	log(LOG_INFO,
	632	"short receive (%d/%d) from nfs server %s\n",
	633	(int)(sizeof(u_int32_t) - auio.uio_resid),
	634	(int)sizeof(u_int32_t),
	635	rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
	636	error = EPIPE;
	637	}
	638	if (error)
	639	goto errout;
	640	len = ntohl(len) & ~0x80000000;
	641	/*
	642	* This is SERIOUS! We are out of sync with the sender
	643	* and forcing a disconnect/reconnect is all I can do.
	644	*/
	645	if (len > NFS_MAXPACKET) {
	646	log(LOG_ERR, "%s (%d) from nfs server %s\n",
	647	"impossible packet length",
	648	len,
	649	rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
	650	error = EFBIG;
	651	goto errout;
	652	}
	653	auio.uio_resid = len;
	654	do {
	655	rcvflg = MSG_WAITALL;
	656	error = so_pru_soreceive(so, NULL, &auio, mp,
	657	NULL, &rcvflg);
	658	} while (error == EWOULDBLOCK \|\| error == EINTR \|\|
	659	error == ERESTART);
	660	if (!error && auio.uio_resid > 0) {
	661	if (len != auio.uio_resid)
	662	log(LOG_INFO,
	663	"short receive (%d/%d) from nfs server %s\n",
	664	len - auio.uio_resid, len,
	665	rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
	666	error = EPIPE;
	667	}
	668	} else {
	669	/*
	670	* NB: Since uio_resid is big, MSG_WAITALL is ignored
	671	* and soreceive() will return when it has either a
	672	* control msg or a data msg.
	673	* We have no use for control msg., but must grab them
	674	* and then throw them away so we know what is going
	675	* on.
	676	*/
	677	auio.uio_resid = len = 100000000; /* Anything Big */
	678	auio.uio_td = td;
	679	do {
	680	rcvflg = 0;
	681	error = so_pru_soreceive(so, NULL, &auio, mp,
	682	&control, &rcvflg);
	683	if (control)
	684	m_freem(control);
	685	if (error == EWOULDBLOCK && rep) {
	686	if (rep->r_flags & R_SOFTTERM)
	687	return (EINTR);
	688	}
	689	} while (error == EWOULDBLOCK \|\|
	690	(!error && *mp == NULL && control));
	691	if ((rcvflg & MSG_EOR) == 0)
	692	printf("Egad!!\n");
	693	if (!error && *mp == NULL)
	694	error = EPIPE;
	695	len -= auio.uio_resid;
	696	}
	697	errout:
	698	if (error && error != EINTR && error != ERESTART) {
	699	m_freem(*mp);
	700	mp = (struct mbuf )0;
	701	if (error != EPIPE)
	702	log(LOG_INFO,
	703	"receive error %d from nfs server %s\n",
	704	error,
	705	rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
	706	error = nfs_sndlock(rep);
	707	if (!error) {
	708	error = nfs_reconnect(rep);
	709	if (!error)
	710	goto tryagain;
	711	else
	712	nfs_sndunlock(rep);
	713	}
	714	}
	715	} else {
	716	if ((so = rep->r_nmp->nm_so) == NULL)
	717	return (EACCES);
	718	if (so->so_state & SS_ISCONNECTED)
	719	getnam = (struct sockaddr **)0;
	720	else
	721	getnam = aname;
	722	auio.uio_resid = len = 1000000;
	723	auio.uio_td = td;
	724	do {
	725	rcvflg = 0;
	726	error = so_pru_soreceive(so, getnam, &auio, mp, NULL,
	727	&rcvflg);
	728	if (error == EWOULDBLOCK &&
	729	(rep->r_flags & R_SOFTTERM))
	730	return (EINTR);
	731	} while (error == EWOULDBLOCK);
	732	len -= auio.uio_resid;
	733	}
	734	if (error) {
	735	m_freem(*mp);
	736	mp = (struct mbuf )0;
	737	}
	738	/*
	739	* Search for any mbufs that are not a multiple of 4 bytes long
	740	* or with m_data not longword aligned.
	741	* These could cause pointer alignment problems, so copy them to
	742	* well aligned mbufs.
	743	*/
	744	nfs_realign(mp, 5 * NFSX_UNSIGNED);
	745	return (error);
	746	}
	747
	748	/*
	749	* Implement receipt of reply on a socket.
	750	* We must search through the list of received datagrams matching them
	751	* with outstanding requests using the xid, until ours is found.
	752	*/
	753	/* ARGSUSED */
	754	int
	755	nfs_reply(myrep)
	756	struct nfsreq *myrep;
	757	{
	758	struct nfsreq *rep;
	759	struct nfsmount *nmp = myrep->r_nmp;
	760	int32_t t1;
	761	struct mbuf mrep, md;
	762	struct sockaddr *nam;
	763	u_int32_t rxid, *tl;
	764	caddr_t dpos, cp2;
	765	int error;
	766
	767	/*
	768	* Loop around until we get our own reply
	769	*/
	770	for (;;) {
	771	/*
	772	* Lock against other receivers so that I don't get stuck in
	773	* sbwait() after someone else has received my reply for me.
	774	* Also necessary for connection based protocols to avoid
	775	* race conditions during a reconnect.
	776	* If nfs_rcvlock() returns EALREADY, that means that
	777	* the reply has already been recieved by another
	778	* process and we can return immediately. In this
	779	* case, the lock is not taken to avoid races with
	780	* other processes.
	781	*/
	782	error = nfs_rcvlock(myrep);
	783	if (error == EALREADY)
	784	return (0);
	785	if (error)
	786	return (error);
	787	/*
	788	* Get the next Rpc reply off the socket
	789	*/
	790	error = nfs_receive(myrep, &nam, &mrep);
	791	nfs_rcvunlock(myrep);
	792	if (error) {
	793
	794	/*
	795	* Ignore routing errors on connectionless protocols??
	796	*/
	797	if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
	798	nmp->nm_so->so_error = 0;
	799	if (myrep->r_flags & R_GETONEREP)
	800	return (0);
	801	continue;
	802	}
	803	return (error);
	804	}
	805	if (nam)
	806	FREE(nam, M_SONAME);
	807
	808	/*
	809	* Get the xid and check that it is an rpc reply
	810	*/
	811	md = mrep;
	812	dpos = mtod(md, caddr_t);
	813	nfsm_dissect(tl, u_int32_t , 2NFSX_UNSIGNED);
	814	rxid = *tl++;
	815	if (*tl != rpc_reply) {
	816	#ifndef NFS_NOSERVER
	817	if (nmp->nm_flag & NFSMNT_NQNFS) {
	818	if (nqnfs_callback(nmp, mrep, md, dpos))
	819	nfsstats.rpcinvalid++;
	820	} else {
	821	nfsstats.rpcinvalid++;
	822	m_freem(mrep);
	823	}
	824	#else
	825	nfsstats.rpcinvalid++;
	826	m_freem(mrep);
	827	#endif
	828	nfsmout:
	829	if (myrep->r_flags & R_GETONEREP)
	830	return (0);
	831	continue;
	832	}
	833
	834	/*
	835	* Loop through the request list to match up the reply
	836	* Iff no match, just drop the datagram
	837	*/
	838	for (rep = nfs_reqq.tqh_first; rep != 0;
	839	rep = rep->r_chain.tqe_next) {
	840	if (rep->r_mrep == NULL && rxid == rep->r_xid) {
	841	/* Found it.. */
	842	rep->r_mrep = mrep;
	843	rep->r_md = md;
	844	rep->r_dpos = dpos;
	845	if (nfsrtton) {
	846	struct rttl *rt;
	847
	848	rt = &nfsrtt.rttl[nfsrtt.pos];
	849	rt->proc = rep->r_procnum;
	850	rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
	851	rt->sent = nmp->nm_sent;
	852	rt->cwnd = nmp->nm_cwnd;
	853	rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
	854	rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
	855	rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
	856	getmicrotime(&rt->tstamp);
	857	if (rep->r_flags & R_TIMING)
	858	rt->rtt = rep->r_rtt;
	859	else
	860	rt->rtt = 1000000;
	861	nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
	862	}
	863	/*
	864	* Update congestion window.
	865	* Do the additive increase of
	866	* one rpc/rtt.
	867	*/
	868	if (nmp->nm_cwnd <= nmp->nm_sent) {
	869	nmp->nm_cwnd +=
	870	(NFS_CWNDSCALE * NFS_CWNDSCALE +
	871	(nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
	872	if (nmp->nm_cwnd > NFS_MAXCWND)
	873	nmp->nm_cwnd = NFS_MAXCWND;
	874	}
	875	crit_enter(); /* nfs_timer interlock*/
	876	if (rep->r_flags & R_SENT) {
	877	rep->r_flags &= ~R_SENT;
	878	nmp->nm_sent -= NFS_CWNDSCALE;
	879	}
	880	crit_exit();
	881	/*
	882	* Update rtt using a gain of 0.125 on the mean
	883	* and a gain of 0.25 on the deviation.
	884	*/
	885	if (rep->r_flags & R_TIMING) {
	886	/*
	887	* Since the timer resolution of
	888	* NFS_HZ is so course, it can often
	889	* result in r_rtt == 0. Since
	890	* r_rtt == N means that the actual
	891	* rtt is between N+dt and N+2-dt ticks,
	892	* add 1.
	893	*/
	894	t1 = rep->r_rtt + 1;
	895	t1 -= (NFS_SRTT(rep) >> 3);
	896	NFS_SRTT(rep) += t1;
	897	if (t1 < 0)
	898	t1 = -t1;
	899	t1 -= (NFS_SDRTT(rep) >> 2);
	900	NFS_SDRTT(rep) += t1;
	901	}
	902	nmp->nm_timeouts = 0;
	903	break;
	904	}
	905	}
	906	/*
	907	* If not matched to a request, drop it.
	908	* If it's mine, get out.
	909	*/
	910	if (rep == 0) {
	911	nfsstats.rpcunexpected++;
	912	m_freem(mrep);
	913	} else if (rep == myrep) {
	914	if (rep->r_mrep == NULL)
	915	panic("nfsreply nil");
	916	return (0);
	917	}
	918	if (myrep->r_flags & R_GETONEREP)
	919	return (0);
	920	}
	921	}
	922
	923	/*
	924	* nfs_request - goes something like this
	925	* - fill in request struct
	926	* - links it into list
	927	* - calls nfs_send() for first transmit
	928	* - calls nfs_receive() to get reply
	929	* - break down rpc header and return with nfs reply pointed to
	930	* by mrep or error
	931	* nb: always frees up mreq mbuf list
	932	*/
	933	int
	934	nfs_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp)
	935	struct vnode *vp;
	936	struct mbuf *mrest;
	937	int procnum;
	938	struct thread *td;
	939	struct ucred *cred;
	940	struct mbuf **mrp;
	941	struct mbuf **mdp;
	942	caddr_t *dposp;
	943	{
	944	struct mbuf mrep, m2;
	945	struct nfsreq *rep;
	946	u_int32_t *tl;
	947	int i;
	948	struct nfsmount *nmp;
	949	struct mbuf m, md, *mheadend;
	950	struct nfsnode *np;
	951	char nickv[RPCX_NICKVERF];
	952	time_t reqtime, waituntil;
	953	caddr_t dpos, cp2;
	954	int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
	955	int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
	956	int verf_len, verf_type;
	957	u_int32_t xid;
	958	u_quad_t frev;
	959	char auth_str, verf_str;
	960	NFSKERBKEY_T key; /* save session key */
	961
	962	/* Reject requests while attempting a forced unmount. */
	963	if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
	964	m_freem(mrest);
	965	return (ESTALE);
	966	}
	967	nmp = VFSTONFS(vp->v_mount);
	968	MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
	969	rep->r_nmp = nmp;
	970	rep->r_vp = vp;
	971	rep->r_td = td;
	972	rep->r_procnum = procnum;
	973	i = 0;
	974	m = mrest;
	975	while (m) {
	976	i += m->m_len;
	977	m = m->m_next;
	978	}
	979	mrest_len = i;
	980
	981	/*
	982	* Get the RPC header with authorization.
	983	*/
	984	kerbauth:
	985	verf_str = auth_str = (char *)0;
	986	if (nmp->nm_flag & NFSMNT_KERB) {
	987	verf_str = nickv;
	988	verf_len = sizeof (nickv);
	989	auth_type = RPCAUTH_KERB4;
	990	bzero((caddr_t)key, sizeof (key));
	991	if (failed_auth \|\| nfs_getnickauth(nmp, cred, &auth_str,
	992	&auth_len, verf_str, verf_len)) {
	993	error = nfs_getauth(nmp, rep, cred, &auth_str,
	994	&auth_len, verf_str, &verf_len, key);
	995	if (error) {
	996	free((caddr_t)rep, M_NFSREQ);
	997	m_freem(mrest);
	998	return (error);
	999	}
	1000	}
	1001	} else {
	1002	auth_type = RPCAUTH_UNIX;
	1003	if (cred->cr_ngroups < 1)
	1004	panic("nfsreq nogrps");
	1005	auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
	1006	nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
	1007	5 * NFSX_UNSIGNED;
	1008	}
	1009	m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len,
	1010	auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid);
	1011	if (auth_str)
	1012	free(auth_str, M_TEMP);
	1013
	1014	/*
	1015	* For stream protocols, insert a Sun RPC Record Mark.
	1016	*/
	1017	if (nmp->nm_sotype == SOCK_STREAM) {
	1018	M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
	1019	if (m == NULL)
	1020	return (ENOBUFS);
	1021	mtod(m, u_int32_t ) = htonl(0x80000000 \|
	1022	(m->m_pkthdr.len - NFSX_UNSIGNED));
	1023	}
	1024	rep->r_mreq = m;
	1025	rep->r_xid = xid;
	1026	tryagain:
	1027	if (nmp->nm_flag & NFSMNT_SOFT)
	1028	rep->r_retry = nmp->nm_retry;
	1029	else
	1030	rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
	1031	rep->r_rtt = rep->r_rexmit = 0;
	1032	if (proct[procnum] > 0)
	1033	rep->r_flags = R_TIMING \| R_MASKTIMER;
	1034	else
	1035	rep->r_flags = R_MASKTIMER;
	1036	rep->r_mrep = NULL;
	1037
	1038	/*
	1039	* Do the client side RPC.
	1040	*/
	1041	nfsstats.rpcrequests++;
	1042
	1043	/*
	1044	* Chain request into list of outstanding requests. Be sure
	1045	* to put it LAST so timer finds oldest requests first. Note
	1046	* that R_MASKTIMER is set at the moment to prevent any timer
	1047	* action on this request while we are still doing processing on
	1048	* it below. splsoftclock() primarily protects nm_sent. Note
	1049	* that we may block in this code so there is no atomicy guarentee.
	1050	*/
	1051	s = splsoftclock();
	1052	TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
	1053
	1054	/* Get send time for nqnfs */
	1055	reqtime = time_second;
	1056
	1057	/*
	1058	* If backing off another request or avoiding congestion, don't
	1059	* send this one now but let timer do it. If not timing a request,
	1060	* do it now.
	1061	*/
	1062	if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM \|\|
	1063	(nmp->nm_flag & NFSMNT_DUMBTIMR) \|\|
	1064	nmp->nm_sent < nmp->nm_cwnd)) {
	1065	if (nmp->nm_soflags & PR_CONNREQUIRED)
	1066	error = nfs_sndlock(rep);
	1067	if (!error) {
	1068	m2 = m_copym(m, 0, M_COPYALL, M_WAIT);
	1069	error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep);
	1070	if (nmp->nm_soflags & PR_CONNREQUIRED)
	1071	nfs_sndunlock(rep);
	1072	}
	1073	if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
	1074	nmp->nm_sent += NFS_CWNDSCALE;
	1075	rep->r_flags \|= R_SENT;
	1076	}
	1077	} else {
	1078	rep->r_rtt = -1;
	1079	}
	1080
	1081	/*
	1082	* Let the timer do what it will with the request, then
	1083	* wait for the reply from our send or the timer's.
	1084	*/
	1085	rep->r_flags &= ~R_MASKTIMER;
	1086	splx(s);
	1087	if (!error \|\| error == EPIPE)
	1088	error = nfs_reply(rep);
	1089
	1090	/*
	1091	* RPC done, unlink the request.
	1092	*/
	1093	s = splsoftclock();
	1094	TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
	1095
	1096	/*
	1097	* Decrement the outstanding request count.
	1098	*/
	1099	if (rep->r_flags & R_SENT) {
	1100	rep->r_flags &= ~R_SENT;
	1101	nmp->nm_sent -= NFS_CWNDSCALE;
	1102	}
	1103	splx(s);
	1104
	1105	/*
	1106	* If there was a successful reply and a tprintf msg.
	1107	* tprintf a response.
	1108	*/
	1109	if (!error && (rep->r_flags & R_TPRINTFMSG))
	1110	nfs_msg(rep->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname,
	1111	"is alive again");
	1112	mrep = rep->r_mrep;
	1113	md = rep->r_md;
	1114	dpos = rep->r_dpos;
	1115	if (error) {
	1116	m_freem(rep->r_mreq);
	1117	free((caddr_t)rep, M_NFSREQ);
	1118	return (error);
	1119	}
	1120
	1121	/*
	1122	* break down the rpc header and check if ok
	1123	*/
	1124	nfsm_dissect(tl, u_int32_t , 3 NFSX_UNSIGNED);
	1125	if (*tl++ == rpc_msgdenied) {
	1126	if (*tl == rpc_mismatch)
	1127	error = EOPNOTSUPP;
	1128	else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
	1129	if (!failed_auth) {
	1130	failed_auth++;
	1131	mheadend->m_next = (struct mbuf *)0;
	1132	m_freem(mrep);
	1133	m_freem(rep->r_mreq);
	1134	goto kerbauth;
	1135	} else
	1136	error = EAUTH;
	1137	} else
	1138	error = EACCES;
	1139	m_freem(mrep);
	1140	m_freem(rep->r_mreq);
	1141	free((caddr_t)rep, M_NFSREQ);
	1142	return (error);
	1143	}
	1144
	1145	/*
	1146	* Grab any Kerberos verifier, otherwise just throw it away.
	1147	*/
	1148	verf_type = fxdr_unsigned(int, *tl++);
	1149	i = fxdr_unsigned(int32_t, *tl);
	1150	if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) {
	1151	error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep);
	1152	if (error)
	1153	goto nfsmout;
	1154	} else if (i > 0)
	1155	nfsm_adv(nfsm_rndup(i));
	1156	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
	1157	/* 0 == ok */
	1158	if (*tl == 0) {
	1159	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
	1160	if (*tl != 0) {
	1161	error = fxdr_unsigned(int, *tl);
	1162	if ((nmp->nm_flag & NFSMNT_NFSV3) &&
	1163	error == NFSERR_TRYLATER) {
	1164	m_freem(mrep);
	1165	error = 0;
	1166	waituntil = time_second + trylater_delay;
	1167	while (time_second < waituntil)
	1168	(void) tsleep((caddr_t)&lbolt,
	1169	0, "nqnfstry", 0);
	1170	trylater_delay *= nfs_backoff[trylater_cnt];
	1171	if (trylater_cnt < 7)
	1172	trylater_cnt++;
	1173	goto tryagain;
	1174	}
	1175
	1176	/*
	1177	* If the File Handle was stale, invalidate the
	1178	* lookup cache, just in case.
	1179	*/
	1180	if (error == ESTALE)
	1181	cache_purge(vp);
	1182	if (nmp->nm_flag & NFSMNT_NFSV3) {
	1183	*mrp = mrep;
	1184	*mdp = md;
	1185	*dposp = dpos;
	1186	error \|= NFSERR_RETERR;
	1187	} else
	1188	m_freem(mrep);
	1189	m_freem(rep->r_mreq);
	1190	free((caddr_t)rep, M_NFSREQ);
	1191	return (error);
	1192	}
	1193
	1194	/*
	1195	* For nqnfs, get any lease in reply
	1196	*/
	1197	if (nmp->nm_flag & NFSMNT_NQNFS) {
	1198	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
	1199	if (*tl) {
	1200	np = VTONFS(vp);
	1201	nqlflag = fxdr_unsigned(int, *tl);
	1202	nfsm_dissect(tl, u_int32_t , 4NFSX_UNSIGNED);
	1203	cachable = fxdr_unsigned(int, *tl++);
	1204	reqtime += fxdr_unsigned(int, *tl++);
	1205	if (reqtime > time_second) {
	1206	frev = fxdr_hyper(tl);
	1207	nqnfs_clientlease(nmp, np, nqlflag,
	1208	cachable, reqtime, frev);
	1209	}
	1210	}
	1211	}
	1212	*mrp = mrep;
	1213	*mdp = md;
	1214	*dposp = dpos;
	1215	m_freem(rep->r_mreq);
	1216	FREE((caddr_t)rep, M_NFSREQ);
	1217	return (0);
	1218	}
	1219	m_freem(mrep);
	1220	error = EPROTONOSUPPORT;
	1221	nfsmout:
	1222	m_freem(rep->r_mreq);
	1223	free((caddr_t)rep, M_NFSREQ);
	1224	return (error);
	1225	}
	1226
	1227	#ifndef NFS_NOSERVER
	1228	/*
	1229	* Generate the rpc reply header
	1230	* siz arg. is used to decide if adding a cluster is worthwhile
	1231	*/
	1232	int
	1233	nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp)
	1234	int siz;
	1235	struct nfsrv_descript *nd;
	1236	struct nfssvc_sock *slp;
	1237	int err;
	1238	int cache;
	1239	u_quad_t *frev;
	1240	struct mbuf **mrq;
	1241	struct mbuf **mbp;
	1242	caddr_t *bposp;
	1243	{
	1244	u_int32_t *tl;
	1245	struct mbuf *mreq;
	1246	caddr_t bpos;
	1247	struct mbuf mb, mb2;
	1248
	1249	MGETHDR(mreq, M_WAIT, MT_DATA);
	1250	mb = mreq;
	1251	/*
	1252	* If this is a big reply, use a cluster else
	1253	* try and leave leading space for the lower level headers.
	1254	*/
	1255	siz += RPC_REPLYSIZ;
	1256	if ((max_hdr + siz) >= MINCLSIZE) {
	1257	MCLGET(mreq, M_WAIT);
	1258	} else
	1259	mreq->m_data += max_hdr;
	1260	tl = mtod(mreq, u_int32_t *);
	1261	mreq->m_len = 6 * NFSX_UNSIGNED;
	1262	bpos = ((caddr_t)tl) + mreq->m_len;
	1263	*tl++ = txdr_unsigned(nd->nd_retxid);
	1264	*tl++ = rpc_reply;
	1265	if (err == ERPCMISMATCH \|\| (err & NFSERR_AUTHERR)) {
	1266	*tl++ = rpc_msgdenied;
	1267	if (err & NFSERR_AUTHERR) {
	1268	*tl++ = rpc_autherr;
	1269	*tl = txdr_unsigned(err & ~NFSERR_AUTHERR);
	1270	mreq->m_len -= NFSX_UNSIGNED;
	1271	bpos -= NFSX_UNSIGNED;
	1272	} else {
	1273	*tl++ = rpc_mismatch;
	1274	*tl++ = txdr_unsigned(RPC_VER2);
	1275	*tl = txdr_unsigned(RPC_VER2);
	1276	}
	1277	} else {
	1278	*tl++ = rpc_msgaccepted;
	1279
	1280	/*
	1281	* For Kerberos authentication, we must send the nickname
	1282	* verifier back, otherwise just RPCAUTH_NULL.
	1283	*/
	1284	if (nd->nd_flag & ND_KERBFULL) {
	1285	struct nfsuid *nuidp;
	1286	struct timeval ktvin, ktvout;
	1287
	1288	for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first;
	1289	nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
	1290	if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid &&
	1291	(!nd->nd_nam2 \|\| netaddr_match(NU_NETFAM(nuidp),
	1292	&nuidp->nu_haddr, nd->nd_nam2)))
	1293	break;
	1294	}
	1295	if (nuidp) {
	1296	ktvin.tv_sec =
	1297	txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1);
	1298	ktvin.tv_usec =
	1299	txdr_unsigned(nuidp->nu_timestamp.tv_usec);
	1300
	1301	/*
	1302	* Encrypt the timestamp in ecb mode using the
	1303	* session key.
	1304	*/
	1305	#ifdef NFSKERB
	1306	XXX
	1307	#endif
	1308
	1309	*tl++ = rpc_auth_kerb;
	1310	tl++ = txdr_unsigned(3 NFSX_UNSIGNED);
	1311	*tl = ktvout.tv_sec;
	1312	nfsm_build(tl, u_int32_t , 3 NFSX_UNSIGNED);
	1313	*tl++ = ktvout.tv_usec;
	1314	*tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid);
	1315	} else {
	1316	*tl++ = 0;
	1317	*tl++ = 0;
	1318	}
	1319	} else {
	1320	*tl++ = 0;
	1321	*tl++ = 0;
	1322	}
	1323	switch (err) {
	1324	case EPROGUNAVAIL:
	1325	*tl = txdr_unsigned(RPC_PROGUNAVAIL);
	1326	break;
	1327	case EPROGMISMATCH:
	1328	*tl = txdr_unsigned(RPC_PROGMISMATCH);
	1329	nfsm_build(tl, u_int32_t , 2 NFSX_UNSIGNED);
	1330	if (nd->nd_flag & ND_NQNFS) {
	1331	*tl++ = txdr_unsigned(3);
	1332	*tl = txdr_unsigned(3);
	1333	} else {
	1334	*tl++ = txdr_unsigned(2);
	1335	*tl = txdr_unsigned(3);
	1336	}
	1337	break;
	1338	case EPROCUNAVAIL:
	1339	*tl = txdr_unsigned(RPC_PROCUNAVAIL);
	1340	break;
	1341	case EBADRPC:
	1342	*tl = txdr_unsigned(RPC_GARBAGE);
	1343	break;
	1344	default:
	1345	*tl = 0;
	1346	if (err != NFSERR_RETVOID) {
	1347	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
	1348	if (err)
	1349	*tl = txdr_unsigned(nfsrv_errmap(nd, err));
	1350	else
	1351	*tl = 0;
	1352	}
	1353	break;
	1354	};
	1355	}
	1356
	1357	/*
	1358	* For nqnfs, piggyback lease as requested.
	1359	*/
	1360	if ((nd->nd_flag & ND_NQNFS) && err == 0) {
	1361	if (nd->nd_flag & ND_LEASE) {
	1362	nfsm_build(tl, u_int32_t , 5 NFSX_UNSIGNED);
	1363	*tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE);
	1364	*tl++ = txdr_unsigned(cache);
	1365	*tl++ = txdr_unsigned(nd->nd_duration);
	1366	txdr_hyper(*frev, tl);
	1367	} else {
	1368	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
	1369	*tl = 0;
	1370	}
	1371	}
	1372	if (mrq != NULL)
	1373	*mrq = mreq;
	1374	*mbp = mb;
	1375	*bposp = bpos;
	1376	if (err != 0 && err != NFSERR_RETVOID)
	1377	nfsstats.srvrpc_errs++;
	1378	return (0);
	1379	}
	1380
	1381
	1382	#endif /* NFS_NOSERVER */
	1383	/*
	1384	* Nfs timer routine
	1385	* Scan the nfsreq list and retranmit any requests that have timed out
	1386	* To avoid retransmission attempts on STREAM sockets (in the future) make
	1387	* sure to set the r_retry field to 0 (implies nm_retry == 0).
	1388	*/
	1389	void
	1390	nfs_timer(arg)
	1391	void arg; / never used */
	1392	{
	1393	struct nfsreq *rep;
	1394	struct mbuf *m;
	1395	struct socket *so;
	1396	struct nfsmount *nmp;
	1397	int timeo;
	1398	int s, error;
	1399	#ifndef NFS_NOSERVER
	1400	static long lasttime = 0;
	1401	struct nfssvc_sock *slp;
	1402	u_quad_t cur_usec;
	1403	#endif /* NFS_NOSERVER */
	1404	struct thread td = &thread0; / XXX for credentials, will break if sleep */
	1405
	1406	s = splnet();
	1407	for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
	1408	nmp = rep->r_nmp;
	1409	if (rep->r_mrep \|\| (rep->r_flags & (R_SOFTTERM\|R_MASKTIMER)))
	1410	continue;
	1411	if (nfs_sigintr(nmp, rep, rep->r_td)) {
	1412	nfs_softterm(rep);
	1413	continue;
	1414	}
	1415	if (rep->r_rtt >= 0) {
	1416	rep->r_rtt++;
	1417	if (nmp->nm_flag & NFSMNT_DUMBTIMR)
	1418	timeo = nmp->nm_timeo;
	1419	else
	1420	timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
	1421	if (nmp->nm_timeouts > 0)
	1422	timeo *= nfs_backoff[nmp->nm_timeouts - 1];
	1423	if (rep->r_rtt <= timeo)
	1424	continue;
	1425	if (nmp->nm_timeouts < 8)
	1426	nmp->nm_timeouts++;
	1427	}
	1428	/*
	1429	* Check for server not responding
	1430	*/
	1431	if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
	1432	rep->r_rexmit > nmp->nm_deadthresh) {
	1433	nfs_msg(rep->r_td,
	1434	nmp->nm_mountp->mnt_stat.f_mntfromname,
	1435	"not responding");
	1436	rep->r_flags \|= R_TPRINTFMSG;
	1437	}
	1438	if (rep->r_rexmit >= rep->r_retry) { /* too many */
	1439	nfsstats.rpctimeouts++;
	1440	nfs_softterm(rep);
	1441	continue;
	1442	}
	1443	if (nmp->nm_sotype != SOCK_DGRAM) {
	1444	if (++rep->r_rexmit > NFS_MAXREXMIT)
	1445	rep->r_rexmit = NFS_MAXREXMIT;
	1446	continue;
	1447	}
	1448	if ((so = nmp->nm_so) == NULL)
	1449	continue;
	1450
	1451	/*
	1452	* If there is enough space and the window allows..
	1453	* Resend it
	1454	* Set r_rtt to -1 in case we fail to send it now.
	1455	*/
	1456	rep->r_rtt = -1;
	1457	if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
	1458	((nmp->nm_flag & NFSMNT_DUMBTIMR) \|\|
	1459	(rep->r_flags & R_SENT) \|\|
	1460	nmp->nm_sent < nmp->nm_cwnd) &&
	1461	(m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
	1462	if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
	1463	error = so_pru_send(so, 0, m, (struct sockaddr *)0,
	1464	(struct mbuf *)0, td);
	1465	else
	1466	error = so_pru_send(so, 0, m, nmp->nm_nam,
	1467	(struct mbuf *)0, td);
	1468	if (error) {
	1469	if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
	1470	so->so_error = 0;
	1471	} else {
	1472	/*
	1473	* Iff first send, start timing
	1474	* else turn timing off, backoff timer
	1475	* and divide congestion window by 2.
	1476	*/
	1477	if (rep->r_flags & R_SENT) {
	1478	rep->r_flags &= ~R_TIMING;
	1479	if (++rep->r_rexmit > NFS_MAXREXMIT)
	1480	rep->r_rexmit = NFS_MAXREXMIT;
	1481	nmp->nm_cwnd >>= 1;
	1482	if (nmp->nm_cwnd < NFS_CWNDSCALE)
	1483	nmp->nm_cwnd = NFS_CWNDSCALE;
	1484	nfsstats.rpcretries++;
	1485	} else {
	1486	rep->r_flags \|= R_SENT;
	1487	nmp->nm_sent += NFS_CWNDSCALE;
	1488	}
	1489	rep->r_rtt = 0;
	1490	}
	1491	}
	1492	}
	1493	#ifndef NFS_NOSERVER
	1494	/*
	1495	* Call the nqnfs server timer once a second to handle leases.
	1496	*/
	1497	if (lasttime != time_second) {
	1498	lasttime = time_second;
	1499	nqnfs_serverd();
	1500	}
	1501
	1502	/*
	1503	* Scan the write gathering queues for writes that need to be
	1504	* completed now.
	1505	*/
	1506	cur_usec = nfs_curusec();
	1507	for (slp = nfssvc_sockhead.tqh_first; slp != 0;
	1508	slp = slp->ns_chain.tqe_next) {
	1509	if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec)
	1510	nfsrv_wakenfsd(slp);
	1511	}
	1512	#endif /* NFS_NOSERVER */
	1513	splx(s);
	1514	nfs_timer_handle = timeout(nfs_timer, (void *)0, nfs_ticks);
	1515	}
	1516
	1517	/*
	1518	* Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
	1519	* wait for all requests to complete. This is used by forced unmounts
	1520	* to terminate any outstanding RPCs.
	1521	*/
	1522	int
	1523	nfs_nmcancelreqs(nmp)
	1524	struct nfsmount *nmp;
	1525	{
	1526	struct nfsreq *req;
	1527	int i, s1, s2;
	1528
	1529	s1 = splnet();
	1530	s2 = splsoftclock();
	1531	TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
	1532	if (nmp != req->r_nmp \|\| req->r_mrep != NULL \|\|
	1533	(req->r_flags & R_SOFTTERM))
	1534	continue;
	1535	nfs_softterm(req);
	1536	}
	1537	splx(s2);
	1538	splx(s1);
	1539
	1540	for (i = 0; i < 30; i++) {
	1541	int s = splnet();
	1542	TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
	1543	if (nmp == req->r_nmp)
	1544	break;
	1545	}
	1546	splx(s);
	1547	if (req == NULL)
	1548	return (0);
	1549	tsleep(&lbolt, 0, "nfscancel", 0);
	1550	}
	1551	return (EBUSY);
	1552	}
	1553
	1554	/*
	1555	* Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
	1556	* The nm_send count is decremented now to avoid deadlocks when the process in
	1557	* soreceive() hasn't yet managed to send its own request.
	1558	*
	1559	* This routine must be called at splsoftclock() to protect r_flags and
	1560	* nm_sent.
	1561	*/
	1562
	1563	static void
	1564	nfs_softterm(rep)
	1565	struct nfsreq *rep;
	1566	{
	1567	rep->r_flags \|= R_SOFTTERM;
	1568
	1569	if (rep->r_flags & R_SENT) {
	1570	rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
	1571	rep->r_flags &= ~R_SENT;
	1572	}
	1573	}
	1574
	1575	/*
	1576	* Test for a termination condition pending on the process.
	1577	* This is used for NFSMNT_INT mounts.
	1578	*/
	1579	int
	1580	nfs_sigintr(struct nfsmount nmp, struct nfsreq rep, struct thread *td)
	1581	{
	1582	sigset_t tmpset;
	1583	struct proc *p;
	1584
	1585	if (rep && (rep->r_flags & R_SOFTTERM))
	1586	return (EINTR);
	1587	/* Terminate all requests while attempting a forced unmount. */
	1588	if (nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF)
	1589	return (EINTR);
	1590	if (!(nmp->nm_flag & NFSMNT_INT))
	1591	return (0);
	1592	/* td might be NULL YYY */
	1593	if (td == NULL \|\| (p = td->td_proc) == NULL)
	1594	return (0);
	1595
	1596	tmpset = p->p_siglist;
	1597	SIGSETNAND(tmpset, p->p_sigmask);
	1598	SIGSETNAND(tmpset, p->p_sigignore);
	1599	if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset))
	1600	return (EINTR);
	1601
	1602	return (0);
	1603	}
	1604
	1605	/*
	1606	* Lock a socket against others.
	1607	* Necessary for STREAM sockets to ensure you get an entire rpc request/reply
	1608	* and also to avoid race conditions between the processes with nfs requests
	1609	* in progress when a reconnect is necessary.
	1610	*/
	1611	int
	1612	nfs_sndlock(struct nfsreq *rep)
	1613	{
	1614	int *statep = &rep->r_nmp->nm_state;
	1615	struct thread *td;
	1616	int slptimeo;
	1617	int slpflag;
	1618	int error;
	1619
	1620	slpflag = 0;
	1621	slptimeo = 0;
	1622	td = rep->r_td;
	1623	if (rep->r_nmp->nm_flag & NFSMNT_INT)
	1624	slpflag = PCATCH;
	1625
	1626	error = 0;
	1627	crit_enter();
	1628	while (*statep & NFSSTA_SNDLOCK) {
	1629	*statep \|= NFSSTA_WANTSND;
	1630	if (nfs_sigintr(rep->r_nmp, rep, td)) {
	1631	error = EINTR;
	1632	break;
	1633	}
	1634	tsleep((caddr_t)statep, slpflag, "nfsndlck", slptimeo);
	1635	if (slpflag == PCATCH) {
	1636	slpflag = 0;
	1637	slptimeo = 2 * hz;
	1638	}
	1639	}
	1640	/* Always fail if our request has been cancelled. */
	1641	if ((rep->r_flags & R_SOFTTERM))
	1642	error = EINTR;
	1643	if (error == 0)
	1644	*statep \|= NFSSTA_SNDLOCK;
	1645	crit_exit();
	1646	return (error);
	1647	}
	1648
	1649	/*
	1650	* Unlock the stream socket for others.
	1651	*/
	1652	void
	1653	nfs_sndunlock(rep)
	1654	struct nfsreq *rep;
	1655	{
	1656	int *statep = &rep->r_nmp->nm_state;
	1657
	1658	if ((*statep & NFSSTA_SNDLOCK) == 0)
	1659	panic("nfs sndunlock");
	1660	crit_enter();
	1661	*statep &= ~NFSSTA_SNDLOCK;
	1662	if (*statep & NFSSTA_WANTSND) {
	1663	*statep &= ~NFSSTA_WANTSND;
	1664	wakeup((caddr_t)statep);
	1665	}
	1666	crit_exit();
	1667	}
	1668
	1669	static int
	1670	nfs_rcvlock(rep)
	1671	struct nfsreq *rep;
	1672	{
	1673	int *statep = &rep->r_nmp->nm_state;
	1674	int slpflag;
	1675	int slptimeo;
	1676	int error;
	1677
	1678	/*
	1679	* Unconditionally check for completion in case another nfsiod
	1680	* get the packet while the caller was blocked, before the caller
	1681	* called us. Packet reception is handled by mainline code which
	1682	* is protected by the BGL at the moment.
	1683	*
	1684	* We do not strictly need the second check just before the
	1685	* tsleep(), but it's good defensive programming.
	1686	*/
	1687	if (rep->r_mrep != NULL)
	1688	return (EALREADY);
	1689
	1690	if (rep->r_nmp->nm_flag & NFSMNT_INT)
	1691	slpflag = PCATCH;
	1692	else
	1693	slpflag = 0;
	1694	slptimeo = 0;
	1695	error = 0;
	1696	crit_enter();
	1697	while (*statep & NFSSTA_RCVLOCK) {
	1698	if (nfs_sigintr(rep->r_nmp, rep, rep->r_td)) {
	1699	error = EINTR;
	1700	break;
	1701	}
	1702	if (rep->r_mrep != NULL) {
	1703	error = EALREADY;
	1704	break;
	1705	}
	1706	*statep \|= NFSSTA_WANTRCV;
	1707	tsleep((caddr_t)statep, slpflag, "nfsrcvlk", slptimeo);
	1708	/*
	1709	* If our reply was recieved while we were sleeping,
	1710	* then just return without taking the lock to avoid a
	1711	* situation where a single iod could 'capture' the
	1712	* recieve lock.
	1713	*/
	1714	if (rep->r_mrep != NULL) {
	1715	error = EALREADY;
	1716	break;
	1717	}
	1718	if (slpflag == PCATCH) {
	1719	slpflag = 0;
	1720	slptimeo = 2 * hz;
	1721	}
	1722	}
	1723	if (error == 0) {
	1724	*statep \|= NFSSTA_RCVLOCK;
	1725	rep->r_nmp->nm_rcvlock_td = curthread; /* DEBUGGING */
	1726	}
	1727	crit_exit();
	1728	return (error);
	1729	}
	1730
	1731	/*
	1732	* Unlock the stream socket for others.
	1733	*/
	1734	static void
	1735	nfs_rcvunlock(rep)
	1736	struct nfsreq *rep;
	1737	{
	1738	int *statep = &rep->r_nmp->nm_state;
	1739
	1740	if ((*statep & NFSSTA_RCVLOCK) == 0)
	1741	panic("nfs rcvunlock");
	1742	crit_enter();
	1743	rep->r_nmp->nm_rcvlock_td = (void )-1; / DEBUGGING */
	1744	*statep &= ~NFSSTA_RCVLOCK;
	1745	if (*statep & NFSSTA_WANTRCV) {
	1746	*statep &= ~NFSSTA_WANTRCV;
	1747	wakeup((caddr_t)statep);
	1748	}
	1749	crit_exit();
	1750	}
	1751
	1752	/*
	1753	* nfs_realign:
	1754	*
	1755	* Check for badly aligned mbuf data and realign by copying the unaligned
	1756	* portion of the data into a new mbuf chain and freeing the portions
	1757	* of the old chain that were replaced.
	1758	*
	1759	* We cannot simply realign the data within the existing mbuf chain
	1760	* because the underlying buffers may contain other rpc commands and
	1761	* we cannot afford to overwrite them.
	1762	*
	1763	* We would prefer to avoid this situation entirely. The situation does
	1764	* not occur with NFS/UDP and is supposed to only occassionally occur
	1765	* with TCP. Use vfs.nfs.realign_count and realign_test to check this.
	1766	*/
	1767	static void
	1768	nfs_realign(pm, hsiz)
	1769	struct mbuf **pm;
	1770	int hsiz;
	1771	{
	1772	struct mbuf *m;
	1773	struct mbuf *n = NULL;
	1774	int off = 0;
	1775
	1776	++nfs_realign_test;
	1777
	1778	while ((m = *pm) != NULL) {
	1779	if ((m->m_len & 0x3) \|\| (mtod(m, intptr_t) & 0x3)) {
	1780	MGET(n, M_WAIT, MT_DATA);
	1781	if (m->m_len >= MINCLSIZE) {
	1782	MCLGET(n, M_WAIT);
	1783	}
	1784	n->m_len = 0;
	1785	break;
	1786	}
	1787	pm = &m->m_next;
	1788	}
	1789
	1790	/*
	1791	* If n is non-NULL, loop on m copying data, then replace the
	1792	* portion of the chain that had to be realigned.
	1793	*/
	1794	if (n != NULL) {
	1795	++nfs_realign_count;
	1796	while (m) {
	1797	m_copyback(n, off, m->m_len, mtod(m, caddr_t));
	1798	off += m->m_len;
	1799	m = m->m_next;
	1800	}
	1801	m_freem(*pm);
	1802	*pm = n;
	1803	}
	1804	}
	1805
	1806	#ifndef NFS_NOSERVER
	1807
	1808	/*
	1809	* Parse an RPC request
	1810	* - verify it
	1811	* - fill in the cred struct.
	1812	*/
	1813	int
	1814	nfs_getreq(nd, nfsd, has_header)
	1815	struct nfsrv_descript *nd;
	1816	struct nfsd *nfsd;
	1817	int has_header;
	1818	{
	1819	int len, i;
	1820	u_int32_t *tl;
	1821	int32_t t1;
	1822	struct uio uio;
	1823	struct iovec iov;
	1824	caddr_t dpos, cp2, cp;
	1825	u_int32_t nfsvers, auth_type;
	1826	uid_t nickuid;
	1827	int error = 0, nqnfs = 0, ticklen;
	1828	struct mbuf mrep, md;
	1829	struct nfsuid *nuidp;
	1830	struct timeval tvin, tvout;
	1831	#if 0 /* until encrypted keys are implemented */
	1832	NFSKERBKEYSCHED_T keys; /* stores key schedule */
	1833	#endif
	1834
	1835	mrep = nd->nd_mrep;
	1836	md = nd->nd_md;
	1837	dpos = nd->nd_dpos;
	1838	if (has_header) {
	1839	nfsm_dissect(tl, u_int32_t , 10 NFSX_UNSIGNED);
	1840	nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++);
	1841	if (*tl++ != rpc_call) {
	1842	m_freem(mrep);
	1843	return (EBADRPC);
	1844	}
	1845	} else
	1846	nfsm_dissect(tl, u_int32_t , 8 NFSX_UNSIGNED);
	1847	nd->nd_repstat = 0;
	1848	nd->nd_flag = 0;
	1849	if (*tl++ != rpc_vers) {
	1850	nd->nd_repstat = ERPCMISMATCH;
	1851	nd->nd_procnum = NFSPROC_NOOP;
	1852	return (0);
	1853	}
	1854	if (*tl != nfs_prog) {
	1855	if (*tl == nqnfs_prog)
	1856	nqnfs++;
	1857	else {
	1858	nd->nd_repstat = EPROGUNAVAIL;
	1859	nd->nd_procnum = NFSPROC_NOOP;
	1860	return (0);
	1861	}
	1862	}
	1863	tl++;
	1864	nfsvers = fxdr_unsigned(u_int32_t, *tl++);
	1865	if (((nfsvers < NFS_VER2 \|\| nfsvers > NFS_VER3) && !nqnfs) \|\|
	1866	(nfsvers != NQNFS_VER3 && nqnfs)) {
	1867	nd->nd_repstat = EPROGMISMATCH;
	1868	nd->nd_procnum = NFSPROC_NOOP;
	1869	return (0);
	1870	}
	1871	if (nqnfs)
	1872	nd->nd_flag = (ND_NFSV3 \| ND_NQNFS);
	1873	else if (nfsvers == NFS_VER3)
	1874	nd->nd_flag = ND_NFSV3;
	1875	nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++);
	1876	if (nd->nd_procnum == NFSPROC_NULL)
	1877	return (0);
	1878	if (nd->nd_procnum >= NFS_NPROCS \|\|
	1879	(!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) \|\|
	1880	(!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) {
	1881	nd->nd_repstat = EPROCUNAVAIL;
	1882	nd->nd_procnum = NFSPROC_NOOP;
	1883	return (0);
	1884	}
	1885	if ((nd->nd_flag & ND_NFSV3) == 0)
	1886	nd->nd_procnum = nfsv3_procid[nd->nd_procnum];
	1887	auth_type = *tl++;
	1888	len = fxdr_unsigned(int, *tl++);
	1889	if (len < 0 \|\| len > RPCAUTH_MAXSIZ) {
	1890	m_freem(mrep);
	1891	return (EBADRPC);
	1892	}
	1893
	1894	nd->nd_flag &= ~ND_KERBAUTH;
	1895	/*
	1896	* Handle auth_unix or auth_kerb.
	1897	*/
	1898	if (auth_type == rpc_auth_unix) {
	1899	len = fxdr_unsigned(int, *++tl);
	1900	if (len < 0 \|\| len > NFS_MAXNAMLEN) {
	1901	m_freem(mrep);
	1902	return (EBADRPC);
	1903	}
	1904	nfsm_adv(nfsm_rndup(len));
	1905	nfsm_dissect(tl, u_int32_t , 3 NFSX_UNSIGNED);
	1906	bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred));
	1907	nd->nd_cr.cr_ref = 1;
	1908	nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
	1909	nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
	1910	len = fxdr_unsigned(int, *tl);
	1911	if (len < 0 \|\| len > RPCAUTH_UNIXGIDS) {
	1912	m_freem(mrep);
	1913	return (EBADRPC);
	1914	}
	1915	nfsm_dissect(tl, u_int32_t , (len + 2) NFSX_UNSIGNED);
	1916	for (i = 1; i <= len; i++)
	1917	if (i < NGROUPS)
	1918	nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
	1919	else
	1920	tl++;
	1921	nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
	1922	if (nd->nd_cr.cr_ngroups > 1)
	1923	nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups);
	1924	len = fxdr_unsigned(int, *++tl);
	1925	if (len < 0 \|\| len > RPCAUTH_MAXSIZ) {
	1926	m_freem(mrep);
	1927	return (EBADRPC);
	1928	}
	1929	if (len > 0)
	1930	nfsm_adv(nfsm_rndup(len));
	1931	} else if (auth_type == rpc_auth_kerb) {
	1932	switch (fxdr_unsigned(int, *tl++)) {
	1933	case RPCAKN_FULLNAME:
	1934	ticklen = fxdr_unsigned(int, *tl);
	1935	((u_int32_t )nfsd->nfsd_authstr) = *tl;
	1936	uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED;
	1937	nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED;
	1938	if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
	1939	m_freem(mrep);
	1940	return (EBADRPC);
	1941	}
	1942	uio.uio_offset = 0;
	1943	uio.uio_iov = &iov;
	1944	uio.uio_iovcnt = 1;
	1945	uio.uio_segflg = UIO_SYSSPACE;
	1946	iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4];
	1947	iov.iov_len = RPCAUTH_MAXSIZ - 4;
	1948	nfsm_mtouio(&uio, uio.uio_resid);
	1949	nfsm_dissect(tl, u_int32_t , 2 NFSX_UNSIGNED);
	1950	if (*tl++ != rpc_auth_kerb \|\|
	1951	fxdr_unsigned(int, tl) != 4 NFSX_UNSIGNED) {
	1952	printf("Bad kerb verifier\n");
	1953	nd->nd_repstat = (NFSERR_AUTHERR\|AUTH_BADVERF);
	1954	nd->nd_procnum = NFSPROC_NOOP;
	1955	return (0);
	1956	}
	1957	nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED);
	1958	tl = (u_int32_t *)cp;
	1959	if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) {
	1960	printf("Not fullname kerb verifier\n");
	1961	nd->nd_repstat = (NFSERR_AUTHERR\|AUTH_BADVERF);
	1962	nd->nd_procnum = NFSPROC_NOOP;
	1963	return (0);
	1964	}
	1965	cp += NFSX_UNSIGNED;
	1966	bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED);
	1967	nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED;
	1968	nd->nd_flag \|= ND_KERBFULL;
	1969	nfsd->nfsd_flag \|= NFSD_NEEDAUTH;
	1970	break;
	1971	case RPCAKN_NICKNAME:
	1972	if (len != 2 * NFSX_UNSIGNED) {
	1973	printf("Kerb nickname short\n");
	1974	nd->nd_repstat = (NFSERR_AUTHERR\|AUTH_BADCRED);
	1975	nd->nd_procnum = NFSPROC_NOOP;
	1976	return (0);
	1977	}
	1978	nickuid = fxdr_unsigned(uid_t, *tl);
	1979	nfsm_dissect(tl, u_int32_t , 2 NFSX_UNSIGNED);
	1980	if (*tl++ != rpc_auth_kerb \|\|
	1981	fxdr_unsigned(int, tl) != 3 NFSX_UNSIGNED) {
	1982	printf("Kerb nick verifier bad\n");
	1983	nd->nd_repstat = (NFSERR_AUTHERR\|AUTH_BADVERF);
	1984	nd->nd_procnum = NFSPROC_NOOP;
	1985	return (0);
	1986	}
	1987	nfsm_dissect(tl, u_int32_t , 3 NFSX_UNSIGNED);
	1988	tvin.tv_sec = *tl++;
	1989	tvin.tv_usec = *tl;
	1990
	1991	for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first;
	1992	nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
	1993	if (nuidp->nu_cr.cr_uid == nickuid &&
	1994	(!nd->nd_nam2 \|\|
	1995	netaddr_match(NU_NETFAM(nuidp),
	1996	&nuidp->nu_haddr, nd->nd_nam2)))
	1997	break;
	1998	}
	1999	if (!nuidp) {
	2000	nd->nd_repstat =
	2001	(NFSERR_AUTHERR\|AUTH_REJECTCRED);
	2002	nd->nd_procnum = NFSPROC_NOOP;
	2003	return (0);
	2004	}
	2005
	2006	/*
	2007	* Now, decrypt the timestamp using the session key
	2008	* and validate it.
	2009	*/
	2010	#ifdef NFSKERB
	2011	XXX
	2012	#endif
	2013
	2014	tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec);
	2015	tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec);
	2016	if (nuidp->nu_expire < time_second \|\|
	2017	nuidp->nu_timestamp.tv_sec > tvout.tv_sec \|\|
	2018	(nuidp->nu_timestamp.tv_sec == tvout.tv_sec &&
	2019	nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) {
	2020	nuidp->nu_expire = 0;
	2021	nd->nd_repstat =
	2022	(NFSERR_AUTHERR\|AUTH_REJECTVERF);
	2023	nd->nd_procnum = NFSPROC_NOOP;
	2024	return (0);
	2025	}
	2026	nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr);
	2027	nd->nd_flag \|= ND_KERBNICK;
	2028	};
	2029	} else {
	2030	nd->nd_repstat = (NFSERR_AUTHERR \| AUTH_REJECTCRED);
	2031	nd->nd_procnum = NFSPROC_NOOP;
	2032	return (0);
	2033	}
	2034
	2035	/*
	2036	* For nqnfs, get piggybacked lease request.
	2037	*/
	2038	if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
	2039	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
	2040	nd->nd_flag \|= fxdr_unsigned(int, *tl);
	2041	if (nd->nd_flag & ND_LEASE) {
	2042	nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
	2043	nd->nd_duration = fxdr_unsigned(int32_t, *tl);
	2044	} else
	2045	nd->nd_duration = NQ_MINLEASE;
	2046	} else
	2047	nd->nd_duration = NQ_MINLEASE;
	2048	nd->nd_md = md;
	2049	nd->nd_dpos = dpos;
	2050	return (0);
	2051	nfsmout:
	2052	return (error);
	2053	}
	2054
	2055	#endif
	2056
	2057	/*
	2058	* Send a message to the originating process's terminal. The thread and/or
	2059	* process may be NULL. YYY the thread should not be NULL but there may
	2060	* still be some uio_td's that are still being passed as NULL through to
	2061	* nfsm_request().
	2062	*/
	2063	static int
	2064	nfs_msg(struct thread td, char server, char *msg)
	2065	{
	2066	tpr_t tpr;
	2067
	2068	if (td && td->td_proc)
	2069	tpr = tprintf_open(td->td_proc);
	2070	else
	2071	tpr = NULL;
	2072	tprintf(tpr, "nfs server %s: %s\n", server, msg);
	2073	tprintf_close(tpr);
	2074	return (0);
	2075	}
	2076
	2077	#ifndef NFS_NOSERVER
	2078	/*
	2079	* Socket upcall routine for the nfsd sockets.
	2080	* The caddr_t arg is a pointer to the "struct nfssvc_sock".
	2081	* Essentially do as much as possible non-blocking, else punt and it will
	2082	* be called with M_WAIT from an nfsd.
	2083	*/
	2084	void
	2085	nfsrv_rcv(so, arg, waitflag)
	2086	struct socket *so;
	2087	void *arg;
	2088	int waitflag;
	2089	{
	2090	struct nfssvc_sock slp = (struct nfssvc_sock )arg;
	2091	struct mbuf *m;
	2092	struct mbuf *mp;
	2093	struct sockaddr *nam;
	2094	struct uio auio;
	2095	int flags, error;
	2096
	2097	if ((slp->ns_flag & SLP_VALID) == 0)
	2098	return;
	2099	#ifdef notdef
	2100	/*
	2101	* Define this to test for nfsds handling this under heavy load.
	2102	*/
	2103	if (waitflag == M_DONTWAIT) {
	2104	slp->ns_flag \|= SLP_NEEDQ; goto dorecs;
	2105	}
	2106	#endif
	2107	auio.uio_td = NULL;
	2108	if (so->so_type == SOCK_STREAM) {
	2109	/*
	2110	* If there are already records on the queue, defer soreceive()
	2111	* to an nfsd so that there is feedback to the TCP layer that
	2112	* the nfs servers are heavily loaded.
	2113	*/
	2114	if (STAILQ_FIRST(&slp->ns_rec) && waitflag == M_DONTWAIT) {
	2115	slp->ns_flag \|= SLP_NEEDQ;
	2116	goto dorecs;
	2117	}
	2118
	2119	/*
	2120	* Do soreceive().
	2121	*/
	2122	auio.uio_resid = 1000000000;
	2123	flags = MSG_DONTWAIT;
	2124	error = so_pru_soreceive(so, &nam, &auio, &mp, NULL, &flags);
	2125	if (error \|\| mp == (struct mbuf *)0) {
	2126	if (error == EWOULDBLOCK)
	2127	slp->ns_flag \|= SLP_NEEDQ;
	2128	else
	2129	slp->ns_flag \|= SLP_DISCONN;
	2130	goto dorecs;
	2131	}
	2132	m = mp;
	2133	if (slp->ns_rawend) {
	2134	slp->ns_rawend->m_next = m;
	2135	slp->ns_cc += 1000000000 - auio.uio_resid;
	2136	} else {
	2137	slp->ns_raw = m;
	2138	slp->ns_cc = 1000000000 - auio.uio_resid;
	2139	}
	2140	while (m->m_next)
	2141	m = m->m_next;
	2142	slp->ns_rawend = m;
	2143
	2144	/*
	2145	* Now try and parse record(s) out of the raw stream data.
	2146	*/
	2147	error = nfsrv_getstream(slp, waitflag);
	2148	if (error) {
	2149	if (error == EPERM)
	2150	slp->ns_flag \|= SLP_DISCONN;
	2151	else
	2152	slp->ns_flag \|= SLP_NEEDQ;
	2153	}
	2154	} else {
	2155	do {
	2156	auio.uio_resid = 1000000000;
	2157	flags = MSG_DONTWAIT;
	2158	error = so_pru_soreceive(so, &nam, &auio, &mp, NULL,
	2159	&flags);
	2160	if (mp) {
	2161	struct nfsrv_rec *rec;
	2162	int mf = (waitflag & M_DONTWAIT) ?
	2163	M_NOWAIT : M_WAITOK;
	2164	rec = malloc(sizeof(struct nfsrv_rec),
	2165	M_NFSRVDESC, mf);
	2166	if (!rec) {
	2167	if (nam)
	2168	FREE(nam, M_SONAME);
	2169	m_freem(mp);
	2170	continue;
	2171	}
	2172	nfs_realign(&mp, 10 * NFSX_UNSIGNED);
	2173	rec->nr_address = nam;
	2174	rec->nr_packet = mp;
	2175	STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
	2176	}
	2177	if (error) {
	2178	if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
	2179	&& error != EWOULDBLOCK) {
	2180	slp->ns_flag \|= SLP_DISCONN;
	2181	goto dorecs;
	2182	}
	2183	}
	2184	} while (mp);
	2185	}
	2186
	2187	/*
	2188	* Now try and process the request records, non-blocking.
	2189	*/
	2190	dorecs:
	2191	if (waitflag == M_DONTWAIT &&
	2192	(STAILQ_FIRST(&slp->ns_rec)
	2193	\|\| (slp->ns_flag & (SLP_NEEDQ \| SLP_DISCONN))))
	2194	nfsrv_wakenfsd(slp);
	2195	}
	2196
	2197	/*
	2198	* Try and extract an RPC request from the mbuf data list received on a
	2199	* stream socket. The "waitflag" argument indicates whether or not it
	2200	* can sleep.
	2201	*/
	2202	static int
	2203	nfsrv_getstream(slp, waitflag)
	2204	struct nfssvc_sock *slp;
	2205	int waitflag;
	2206	{
	2207	struct mbuf m, *mpp;
	2208	char cp1, cp2;
	2209	int len;
	2210	struct mbuf om, m2, *recm;
	2211	u_int32_t recmark;
	2212
	2213	if (slp->ns_flag & SLP_GETSTREAM)
	2214	panic("nfs getstream");
	2215	slp->ns_flag \|= SLP_GETSTREAM;
	2216	for (;;) {
	2217	if (slp->ns_reclen == 0) {
	2218	if (slp->ns_cc < NFSX_UNSIGNED) {
	2219	slp->ns_flag &= ~SLP_GETSTREAM;
	2220	return (0);
	2221	}
	2222	m = slp->ns_raw;
	2223	if (m->m_len >= NFSX_UNSIGNED) {
	2224	bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
	2225	m->m_data += NFSX_UNSIGNED;
	2226	m->m_len -= NFSX_UNSIGNED;
	2227	} else {
	2228	cp1 = (caddr_t)&recmark;
	2229	cp2 = mtod(m, caddr_t);
	2230	while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
	2231	while (m->m_len == 0) {
	2232	m = m->m_next;
	2233	cp2 = mtod(m, caddr_t);
	2234	}
	2235	cp1++ = cp2++;
	2236	m->m_data++;
	2237	m->m_len--;
	2238	}
	2239	}
	2240	slp->ns_cc -= NFSX_UNSIGNED;
	2241	recmark = ntohl(recmark);
	2242	slp->ns_reclen = recmark & ~0x80000000;
	2243	if (recmark & 0x80000000)
	2244	slp->ns_flag \|= SLP_LASTFRAG;
	2245	else
	2246	slp->ns_flag &= ~SLP_LASTFRAG;
	2247	if (slp->ns_reclen > NFS_MAXPACKET) {
	2248	slp->ns_flag &= ~SLP_GETSTREAM;
	2249	return (EPERM);
	2250	}
	2251	}
	2252
	2253	/*
	2254	* Now get the record part.
	2255	*
	2256	* Note that slp->ns_reclen may be 0. Linux sometimes
	2257	* generates 0-length RPCs
	2258	*/
	2259	recm = NULL;
	2260	if (slp->ns_cc == slp->ns_reclen) {
	2261	recm = slp->ns_raw;
	2262	slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
	2263	slp->ns_cc = slp->ns_reclen = 0;
	2264	} else if (slp->ns_cc > slp->ns_reclen) {
	2265	len = 0;
	2266	m = slp->ns_raw;
	2267	om = (struct mbuf *)0;
	2268
	2269	while (len < slp->ns_reclen) {
	2270	if ((len + m->m_len) > slp->ns_reclen) {
	2271	m2 = m_copym(m, 0, slp->ns_reclen - len,
	2272	waitflag);
	2273	if (m2) {
	2274	if (om) {
	2275	om->m_next = m2;
	2276	recm = slp->ns_raw;
	2277	} else
	2278	recm = m2;
	2279	m->m_data += slp->ns_reclen - len;
	2280	m->m_len -= slp->ns_reclen - len;
	2281	len = slp->ns_reclen;
	2282	} else {
	2283	slp->ns_flag &= ~SLP_GETSTREAM;
	2284	return (EWOULDBLOCK);
	2285	}
	2286	} else if ((len + m->m_len) == slp->ns_reclen) {
	2287	om = m;
	2288	len += m->m_len;
	2289	m = m->m_next;
	2290	recm = slp->ns_raw;
	2291	om->m_next = (struct mbuf *)0;
	2292	} else {
	2293	om = m;
	2294	len += m->m_len;
	2295	m = m->m_next;
	2296	}
	2297	}
	2298	slp->ns_raw = m;
	2299	slp->ns_cc -= len;
	2300	slp->ns_reclen = 0;
	2301	} else {
	2302	slp->ns_flag &= ~SLP_GETSTREAM;
	2303	return (0);
	2304	}
	2305
	2306	/*
	2307	* Accumulate the fragments into a record.
	2308	*/
	2309	mpp = &slp->ns_frag;
	2310	while (*mpp)
	2311	mpp = &((*mpp)->m_next);
	2312	*mpp = recm;
	2313	if (slp->ns_flag & SLP_LASTFRAG) {
	2314	struct nfsrv_rec *rec;
	2315	int mf = (waitflag & M_DONTWAIT) ? M_NOWAIT : M_WAITOK;
	2316	rec = malloc(sizeof(struct nfsrv_rec), M_NFSRVDESC, mf);
	2317	if (!rec) {
	2318	m_freem(slp->ns_frag);
	2319	} else {
	2320	nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
	2321	rec->nr_address = (struct sockaddr *)0;
	2322	rec->nr_packet = slp->ns_frag;
	2323	STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
	2324	}
	2325	slp->ns_frag = (struct mbuf *)0;
	2326	}
	2327	}
	2328	}
	2329
	2330	/*
	2331	* Parse an RPC header.
	2332	*/
	2333	int
	2334	nfsrv_dorec(slp, nfsd, ndp)
	2335	struct nfssvc_sock *slp;
	2336	struct nfsd *nfsd;
	2337	struct nfsrv_descript **ndp;
	2338	{
	2339	struct nfsrv_rec *rec;
	2340	struct mbuf *m;
	2341	struct sockaddr *nam;
	2342	struct nfsrv_descript *nd;
	2343	int error;
	2344
	2345	*ndp = NULL;
	2346	if ((slp->ns_flag & SLP_VALID) == 0 \|\| !STAILQ_FIRST(&slp->ns_rec))
	2347	return (ENOBUFS);
	2348	rec = STAILQ_FIRST(&slp->ns_rec);
	2349	STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
	2350	nam = rec->nr_address;
	2351	m = rec->nr_packet;
	2352	free(rec, M_NFSRVDESC);
	2353	MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript),
	2354	M_NFSRVDESC, M_WAITOK);
	2355	nd->nd_md = nd->nd_mrep = m;
	2356	nd->nd_nam2 = nam;
	2357	nd->nd_dpos = mtod(m, caddr_t);
	2358	error = nfs_getreq(nd, nfsd, TRUE);
	2359	if (error) {
	2360	if (nam) {
	2361	FREE(nam, M_SONAME);
	2362	}
	2363	free((caddr_t)nd, M_NFSRVDESC);
	2364	return (error);
	2365	}
	2366	*ndp = nd;
	2367	nfsd->nfsd_nd = nd;
	2368	return (0);
	2369	}
	2370
	2371	/*
	2372	* Search for a sleeping nfsd and wake it up.
	2373	* SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
	2374	* running nfsds will go look for the work in the nfssvc_sock list.
	2375	*/
	2376	void
	2377	nfsrv_wakenfsd(slp)
	2378	struct nfssvc_sock *slp;
	2379	{
	2380	struct nfsd *nd;
	2381
	2382	if ((slp->ns_flag & SLP_VALID) == 0)
	2383	return;
	2384	for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) {
	2385	if (nd->nfsd_flag & NFSD_WAITING) {
	2386	nd->nfsd_flag &= ~NFSD_WAITING;
	2387	if (nd->nfsd_slp)
	2388	panic("nfsd wakeup");
	2389	slp->ns_sref++;
	2390	nd->nfsd_slp = slp;
	2391	wakeup((caddr_t)nd);
	2392	return;
	2393	}
	2394	}
	2395	slp->ns_flag \|= SLP_DOREC;
	2396	nfsd_head_flag \|= NFSD_CHECKSLP;
	2397	}
	2398	#endif /* NFS_NOSERVER */