From f8565b0fd3905ebd9fb24ff83243c0b4cb3c0ea9 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 17 Jul 2009 18:25:05 -0700 Subject: [PATCH] NFS - Rewrite the RTT code and the request flags & low level state machine * The RTT code basically did not work with UDP, and never has very well. Use a somewhat different algorithm and make more of an assumption that the network is either reliable, or if it isn't reliable that the sysad is using a reliable protocol (such as TCP). The new RTT code should do a better job, though there will still be significant stalling on a lossy network if using UDP (too bad!). * Replace the congestion control code. Instead of trying to defer packet transmission to the nfs_timer, which we no longer run at insane rates, we instead limit the number of async BIOs we allow to be queued at any given moment. * Redo the req->r_flags flags and the low level RPC interlocks between the various consumers and producers. Add additional flags that can be tested in assertions to validate expectations. Use R_LOCKED for more then just the nfs_timer code. * Close a memory hole. * Hold critical sections to close races that could result in a thread deadlocking in soreceive waiting for a reply that another thread already processed. * Add two sysctls to aid in testing. vfs.nfs.showrexmit - Causes "X" to be printed on the console whenever NFS feels that it must retransmit a RPC. Only occurs with UDP mounts. vfs.nfs.showrtt - Causes NFS to print out the SRTT, SDRTT, and converted timeout calculation once a second while NFS requests are active. * Increase the asynchronous BIO limits. Because we only need two helper threads and no longer depend on dozens of synchronous-rpc threads (the old nfsiod's), there is no practical limit to the number of requests we can have active. Reformulate how asynchronous BIOs are accounted for. They are now accounted for from the moment they are queued to the very end where they are biodone()'d. --- sys/vfs/nfs/nfs.h | 32 ++- sys/vfs/nfs/nfs_bio.c | 11 +- sys/vfs/nfs/nfs_iod.c | 31 ++- sys/vfs/nfs/nfs_socket.c | 548 ++++++++++++++++++++++----------------- sys/vfs/nfs/nfs_vfsops.c | 3 +- sys/vfs/nfs/nfsm_subs.c | 2 - sys/vfs/nfs/nfsm_subs.h | 2 +- sys/vfs/nfs/nfsmount.h | 7 +- 8 files changed, 376 insertions(+), 260 deletions(-) diff --git a/sys/vfs/nfs/nfs.h b/sys/vfs/nfs/nfs.h index 622cbf44b1..a6d57ecacd 100644 --- a/sys/vfs/nfs/nfs.h +++ b/sys/vfs/nfs/nfs.h @@ -87,6 +87,7 @@ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 4 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 32 /* Max. read ahead # blocks */ +#define NFS_MAXASYNCBIO 64 /* Max. asynchronous BIOs */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY @@ -97,6 +98,10 @@ #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ #endif +#define NFS_ASYSCALE 256 +#define NFS_MINASYNC_SCALED (4 * NFS_ASYSCALE) +#define NFS_MAXASYNC_SCALED (NFS_MAXASYNCBIO * NFS_ASYSCALE) + /* * Oddballs */ @@ -372,6 +377,24 @@ struct nfsreq { /* * Flag values for r_flags + * + * R_TIMING - Timer is active on request. If timer goes off the + * request's retransmit counter will bump and it will + * be flagged NEEDSXMIT to retransmit. + * + * R_SENT - Indicates that the request was sent the first time. + * This stays set unless we want to repeat a first-time + * styled transmit sequence. + * + * R_NEEDSXMIT - The request needs a [re]transmission. It will still + * be sitting on nm_reqq if synchronous, otherwise it + * will be moved to nm_reqtxq. + * + * R_ASYNC - Indicates an asynchronous request, which modifies the + * behavior of how it is dealt with. + * + * R_LOCKED - Prevents the request from being moved between queues + * e.g. by the timer or by the async thread. */ #define R_TIMING 0x0001 /* timing request (in mntp) */ #define R_SENT 0x0002 /* request has been sent */ @@ -379,10 +402,11 @@ struct nfsreq { #define R_INTR 0x0008 /* intr mnt, signal pending */ #define R_SOCKERR 0x0010 /* Fatal error on socket */ #define R_TPRINTFMSG 0x0020 /* Did a tprintf msg. */ -#define R_MUSTRESEND 0x0040 /* Must resend request */ -#define R_UNUSED07 0x0080 -#define R_MASKTIMER 0x0100 /* Timer should ignore this req */ -#define R_LOCKED 0x0200 /* Locked by the timer */ +#define R_NEEDSXMIT 0x0040 /* Needs [re]transmission */ +#define R_ASYNC 0x0080 /* Asynchronous mechanics */ +#define R_ONREQQ 0x0100 /* On primary request queue */ +#define R_LOCKED 0x0200 /* Locked by the someone */ +#define R_WANTED 0x0400 /* Someone wants access */ /* * A list of nfssvc_sock structures is maintained with all the sockets diff --git a/sys/vfs/nfs/nfs_bio.c b/sys/vfs/nfs/nfs_bio.c index 0cc3bf48b4..d5acf9370a 100644 --- a/sys/vfs/nfs/nfs_bio.c +++ b/sys/vfs/nfs/nfs_bio.c @@ -1205,7 +1205,8 @@ nfs_vinvalbuf(struct vnode *vp, int flags, int intrflg) int nfs_asyncok(struct nfsmount *nmp) { - return (nmp->nm_bioqlen < 64 && + return (nmp->nm_bioqlen < NFS_MAXASYNCBIO && + nmp->nm_bioqlen < nmp->nm_maxasync_scaled / NFS_ASYSCALE && nmp->nm_rxstate <= NFSSVC_PENDING && nmp->nm_txstate <= NFSSVC_PENDING); } @@ -1216,6 +1217,9 @@ nfs_asyncok(struct nfsmount *nmp) * We don't touch the bio otherwise... that is, we do not even * construct or send the initial rpc. The txthread will do it * for us. + * + * NOTE! nm_bioqlen is not decremented until the request completes, + * so it does not reflect the number of bio's on bioq. */ void nfs_asyncio(struct vnode *vp, struct bio *bio) @@ -1226,8 +1230,10 @@ nfs_asyncio(struct vnode *vp, struct bio *bio) KKASSERT(vp->v_tag == VT_NFS); BUF_KERNPROC(bp); bio->bio_driver_info = vp; + crit_enter(); TAILQ_INSERT_TAIL(&nmp->nm_bioq, bio, bio_act); - nmp->nm_bioqlen++; + atomic_add_int(&nmp->nm_bioqlen, 1); + crit_exit(); nfssvc_iod_writer_wakeup(nmp); } @@ -1638,6 +1644,7 @@ nfs_readrpc_bio_done(nfsm_info_t info) } #endif nfsmout: + kfree(info, M_NFSREQ); if (error) { bp->b_error = error; bp->b_flags |= B_ERROR; diff --git a/sys/vfs/nfs/nfs_iod.c b/sys/vfs/nfs/nfs_iod.c index 7fd9169ddf..b52494132d 100644 --- a/sys/vfs/nfs/nfs_iod.c +++ b/sys/vfs/nfs/nfs_iod.c @@ -81,6 +81,7 @@ nfssvc_iod_reader(void *arg) if (nmp->nm_rxstate == NFSSVC_INIT) nmp->nm_rxstate = NFSSVC_PENDING; + crit_enter(); for (;;) { if (nmp->nm_rxstate == NFSSVC_WAITING) { if (TAILQ_FIRST(&nmp->nm_reqq) == NULL && @@ -109,11 +110,22 @@ nfssvc_iod_reader(void *arg) * it means the request went back to an auth or retransmit * state and we let the iod_writer thread deal with it. * + * Any lock on the request is strictly temporary due to + * MP races (XXX). + * * If the request completes we run the info->done call * to finish up the I/O. */ while ((req = TAILQ_FIRST(&nmp->nm_reqrxq)) != NULL) { + if (req->r_flags & R_LOCKED) { + while (req->r_flags & R_LOCKED) { + req->r_flags |= R_WANTED; + tsleep(req, 0, "nfstrac", 0); + } + continue; + } TAILQ_REMOVE(&nmp->nm_reqrxq, req, r_chain); + crit_exit(); info = req->r_info; KKASSERT(info); info->error = nfs_request(info, @@ -124,10 +136,13 @@ nfssvc_iod_reader(void *arg) TAILQ_INSERT_TAIL(&nmp->nm_reqtxq, req, r_chain); nfssvc_iod_writer_wakeup(nmp); } else { + atomic_subtract_int(&nmp->nm_bioqlen, 1); info->done(info); } + crit_enter(); } } + crit_exit(); nmp->nm_rxthread = NULL; nmp->nm_rxstate = NFSSVC_DONE; wakeup(&nmp->nm_rxthread); @@ -152,6 +167,7 @@ nfssvc_iod_writer(void *arg) if (nmp->nm_txstate == NFSSVC_INIT) nmp->nm_txstate = NFSSVC_PENDING; + crit_enter(); for (;;) { if (nmp->nm_txstate == NFSSVC_WAITING) { tsleep(&nmp->nm_txstate, 0, "nfsidl", 0); @@ -161,13 +177,12 @@ nfssvc_iod_writer(void *arg) break; nmp->nm_txstate = NFSSVC_WAITING; - while (nmp->nm_bioqlen && nmp->nm_reqqlen < 32) { - bio = TAILQ_FIRST(&nmp->nm_bioq); - KKASSERT(bio); + while ((bio = TAILQ_FIRST(&nmp->nm_bioq)) != NULL) { TAILQ_REMOVE(&nmp->nm_bioq, bio, bio_act); - nmp->nm_bioqlen--; vp = bio->bio_driver_info; + crit_exit(); nfs_startio(vp, bio, NULL); + crit_enter(); } /* @@ -180,18 +195,20 @@ nfssvc_iod_writer(void *arg) TAILQ_REMOVE(&nmp->nm_reqtxq, req, r_chain); info = req->r_info; KKASSERT(info); + crit_exit(); info->error = nfs_request(info, NFSM_STATE_AUTH, NFSM_STATE_WAITREPLY); + crit_enter(); if (info->error == EINPROGRESS) { - /* - TAILQ_INSERT_TAIL(&nmp->nm_reqrxq, req, r_chain); - */ + ; } else { + atomic_subtract_int(&nmp->nm_bioqlen, 1); info->done(info); } } } + crit_exit(); nmp->nm_txthread = NULL; nmp->nm_txstate = NFSSVC_DONE; wakeup(&nmp->nm_txthread); diff --git a/sys/vfs/nfs/nfs_socket.c b/sys/vfs/nfs/nfs_socket.c index 7166946f42..aa477befca 100644 --- a/sys/vfs/nfs/nfs_socket.c +++ b/sys/vfs/nfs/nfs_socket.c @@ -83,24 +83,13 @@ #define FALSE 0 /* - * Estimate rto for an nfs rpc sent via. an unreliable datagram. - * Use the mean and mean deviation of rtt for the appropriate type of rpc - * for the frequent rpcs and a default for the others. - * The justification for doing "other" this way is that these rpcs - * happen so infrequently that timer est. would probably be stale. - * Also, since many of these rpcs are - * non-idempotent, a conservative timeout is desired. - * getattr, lookup - A+2D - * read, write - A+4D - * other - nm_timeo + * RTT calculations are scaled by 256 (8 bits). A proper fractional + * RTT will still be calculated even with a slow NFS timer. */ -#define NFS_RTO(n, t) \ - ((t) == 0 ? (n)->nm_timeo : \ - ((t) < 3 ? \ - (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ - ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) -#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] -#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] +#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum]] +#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum]] +#define NFS_RTT_SCALE_BITS 8 /* bits */ +#define NFS_RTT_SCALE 256 /* value */ /* * Defines which timer to use for the procnum. @@ -115,40 +104,27 @@ static int proct[NFS_NPROCS] = { 0, 0, 0, }; +static int nfs_backoff[8] = { 2, 3, 5, 8, 13, 21, 34, 55 }; static int nfs_realign_test; static int nfs_realign_count; static int nfs_bufpackets = 4; -static int nfs_timer_raced; +static int nfs_showrtt; +static int nfs_showrexmit; SYSCTL_DECL(_vfs_nfs); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count, 0, ""); SYSCTL_INT(_vfs_nfs, OID_AUTO, bufpackets, CTLFLAG_RW, &nfs_bufpackets, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, showrtt, CTLFLAG_RW, &nfs_showrtt, 0, ""); +SYSCTL_INT(_vfs_nfs, OID_AUTO, showrexmit, CTLFLAG_RW, &nfs_showrexmit, 0, ""); static int nfs_request_setup(nfsm_info_t info); static int nfs_request_auth(struct nfsreq *rep); static int nfs_request_try(struct nfsreq *rep); static int nfs_request_waitreply(struct nfsreq *rep); static int nfs_request_processreply(nfsm_info_t info, int); -static void nfs_async_return(struct nfsmount *nmp, struct nfsreq *rep); -/* - * There is a congestion window for outstanding rpcs maintained per mount - * point. The cwnd size is adjusted in roughly the way that: - * Van Jacobson, Congestion avoidance and Control, In "Proceedings of - * SIGCOMM '88". ACM, August 1988. - * describes for TCP. The cwnd size is chopped in half on a retransmit timeout - * and incremented by 1/cwnd when each rpc reply is received and a full cwnd - * of rpcs is in progress. - * (The sent count and cwnd are scaled for integer arith.) - * Variants of "slow start" were tried and were found to be too much of a - * performance hit (ave. rtt 3 times larger), - * I suspect due to the large rtt that nfs rpcs have. - */ -#define NFS_CWNDSCALE 256 -#define NFS_MAXCWND (NFS_CWNDSCALE * 32) -static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; struct callout nfs_timer_handle; @@ -159,7 +135,8 @@ static void nfs_rcvunlock (struct nfsmount *nmp); static void nfs_realign (struct mbuf **pm, int hsiz); static int nfs_receive (struct nfsmount *nmp, struct nfsreq *rep, struct sockaddr **aname, struct mbuf **mp); -static void nfs_softterm (struct nfsreq *rep); +static void nfs_softterm (struct nfsreq *rep, int islocked); +static void nfs_hardterm (struct nfsreq *rep, int islocked); static int nfs_reconnect (struct nfsmount *nmp, struct nfsreq *rep); #ifndef NFS_NOSERVER static int nfsrv_getstream (struct nfssvc_sock *, int, int *); @@ -361,11 +338,10 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep) /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = - nmp->nm_srtt[3] = (NFS_TIMEO << 3); + nmp->nm_srtt[3] = (NFS_TIMEO << NFS_RTT_SCALE_BITS); nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = nmp->nm_sdrtt[3] = 0; - nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ - nmp->nm_sent = 0; + nmp->nm_maxasync_scaled = NFS_MINASYNC_SCALED; nmp->nm_timeouts = 0; return (0); @@ -379,7 +355,7 @@ bad: * Called when a connection is broken on a reliable protocol. * - clean up the old socket * - nfs_connect() again - * - set R_MUSTRESEND for all outstanding requests on mount point + * - set R_NEEDSXMIT for all outstanding requests on mount point * If this fails the mount point is DEAD! * nb: Must be called with the nfs_sndlock() set on the mount point. */ @@ -403,7 +379,7 @@ nfs_reconnect(struct nfsmount *nmp, struct nfsreq *rep) crit_enter(); TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) { KKASSERT(req->r_nmp == nmp); - req->r_flags |= R_MUSTRESEND; + req->r_flags |= R_NEEDSXMIT; } crit_exit(); return (0); @@ -439,7 +415,7 @@ nfs_safedisconnect(struct nfsmount *nmp) * "rep == NULL" indicates that it has been called from a server. * For the client side: * - return EINTR if the RPC is terminated, 0 otherwise - * - set R_MUSTRESEND if the send fails for any reason + * - set R_NEEDSXMIT if the send fails for any reason * - do any cleanup required by recoverable socket errors (?) * For the server side: * - return EINTR or ERESTART if interrupted by a signal @@ -459,14 +435,15 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top, return (EINTR); } if ((so = rep->r_nmp->nm_so) == NULL) { - rep->r_flags |= R_MUSTRESEND; + rep->r_flags |= R_NEEDSXMIT; m_freem(top); return (0); } - rep->r_flags &= ~R_MUSTRESEND; + rep->r_flags &= ~R_NEEDSXMIT; soflags = rep->r_nmp->nm_soflags; - } else + } else { soflags = so->so_proto->pr_flags; + } if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) sendnam = NULL; else @@ -484,8 +461,11 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top, */ if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { error = 0; - if (rep) /* do backoff retransmit on client */ - rep->r_flags |= R_MUSTRESEND; + /* + * do backoff retransmit on client + */ + if (rep) + rep->r_flags |= R_NEEDSXMIT; } if (error) { @@ -498,9 +478,10 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top, if (rep->r_flags & R_SOFTTERM) error = EINTR; else - rep->r_flags |= R_MUSTRESEND; - } else + rep->r_flags |= R_NEEDSXMIT; + } else { log(LOG_INFO, "nfsd send error %d\n", error); + } /* * Handle any recoverable (soft) socket errors here. (?) @@ -578,7 +559,7 @@ tryagain: } goto tryagain; } - while (rep && (rep->r_flags & R_MUSTRESEND)) { + while (rep && (rep->r_flags & R_NEEDSXMIT)) { m = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT); nfsstats.rpcretries++; error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); @@ -767,7 +748,6 @@ nfs_reply(struct nfsmount *nmp, struct nfsreq *myrep) u_int32_t *tl; int error; struct nfsm_info info; - int t1; /* * Loop around until we get our own reply @@ -807,13 +787,18 @@ nfs_reply(struct nfsmount *nmp, struct nfsreq *myrep) /* * Get the next Rpc reply off the socket + * + * We cannot release the receive lock until we've + * filled in rep->r_mrep, otherwise a waiting + * thread may deadlock in soreceive with no incoming + * packets expected. */ error = nfs_receive(nmp, myrep, &nam, &info.mrep); - nfs_rcvunlock(nmp); if (error) { /* * Ignore routing errors on connectionless protocols?? */ + nfs_rcvunlock(nmp); if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { if (nmp->nm_so == NULL) return (error); @@ -837,6 +822,7 @@ nfs_reply(struct nfsmount *nmp, struct nfsreq *myrep) m_freem(info.mrep); info.mrep = NULL; nfsmout: + nfs_rcvunlock(nmp); continue; } @@ -852,7 +838,6 @@ nfsmout: if (rep->r_mrep == NULL && rxid == rep->r_xid) break; } - crit_exit(); /* * Fill in the rest of the reply if we found a match. @@ -865,9 +850,9 @@ nfsmout: rt = &nfsrtt.rttl[nfsrtt.pos]; rt->proc = rep->r_procnum; - rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); - rt->sent = nmp->nm_sent; - rt->cwnd = nmp->nm_cwnd; + rt->rto = 0; + rt->sent = 0; + rt->cwnd = nmp->nm_maxasync_scaled; rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; @@ -878,27 +863,21 @@ nfsmout: rt->rtt = 1000000; nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; } + /* - * Update congestion window. - * Do the additive increase of - * one rpc/rtt. + * New congestion control is based only on async + * requests. */ - if (nmp->nm_cwnd <= nmp->nm_sent) { - nmp->nm_cwnd += - (NFS_CWNDSCALE * NFS_CWNDSCALE + - (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; - if (nmp->nm_cwnd > NFS_MAXCWND) - nmp->nm_cwnd = NFS_MAXCWND; - } - crit_enter(); /* nfs_timer interlock for nm_sent */ + if (nmp->nm_maxasync_scaled < NFS_MAXASYNC_SCALED) + ++nmp->nm_maxasync_scaled; if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; - nmp->nm_sent -= NFS_CWNDSCALE; } - crit_exit(); /* * Update rtt using a gain of 0.125 on the mean * and a gain of 0.25 on the deviation. + * + * NOTE SRTT/SDRTT are only good if R_TIMING is set. */ if (rep->r_flags & R_TIMING) { /* @@ -909,39 +888,36 @@ nfsmout: * rtt is between N+dt and N+2-dt ticks, * add 1. */ - t1 = rep->r_rtt + 1; - t1 -= (NFS_SRTT(rep) >> 3); - NFS_SRTT(rep) += t1; - if (t1 < 0) - t1 = -t1; - t1 -= (NFS_SDRTT(rep) >> 2); - NFS_SDRTT(rep) += t1; - } - nmp->nm_timeouts = 0; - rep->r_mrep = info.mrep; + int n; + int d; - /* - * Wakeup anyone waiting explicitly for this reply. - */ - mtx_abort_ex_link(&rep->r_nmp->nm_rxlock, &rep->r_link); +#define NFSRSB NFS_RTT_SCALE_BITS + n = ((NFS_SRTT(rep) * 7) + + (rep->r_rtt << NFSRSB)) >> 3; + d = n - NFS_SRTT(rep); + NFS_SRTT(rep) = n; - /* - * Asynchronous replies are bound-over to the - * rxthread. Note that nmp->nm_reqqlen is not - * decremented until the rxthread has finished - * with the request. - * - * async is sometimes temporarily turned off to - * avoid races. - */ - if (rep->r_info && rep->r_info->async) { - KKASSERT(rep->r_info->state == - NFSM_STATE_WAITREPLY || - rep->r_info->state == - NFSM_STATE_TRY); - nfs_async_return(nmp, rep); + /* + * Don't let the jitter calculation decay + * too quickly, but we want a fast rampup. + */ + if (d < 0) + d = -d; + d <<= NFSRSB; + if (d < NFS_SDRTT(rep)) + n = ((NFS_SDRTT(rep) * 15) + d) >> 4; + else + n = ((NFS_SDRTT(rep) * 3) + d) >> 2; + NFS_SDRTT(rep) = n; +#undef NFSRSB } + nmp->nm_timeouts = 0; + rep->r_mrep = info.mrep; + nfs_hardterm(rep, 0); } + nfs_rcvunlock(nmp); + crit_exit(); + /* * If not matched to a request, drop it. * If it's mine, get out. @@ -970,7 +946,6 @@ nfsmout: int nfs_request(struct nfsm_info *info, nfsm_state_t bstate, nfsm_state_t estate) { - struct nfsmount *nmp = info->nmp; struct nfsreq *req; while (info->state >= bstate && info->state < estate) { @@ -1012,34 +987,21 @@ nfs_request(struct nfsm_info *info, nfsm_state_t bstate, nfsm_state_t estate) * next state. * * This can trivially race the receiver if the - * request is asynchronous. Temporarily turn - * off async mode so the structure doesn't get - * ripped out from under us, and resolve the - * race. + * request is asynchronous. nfs_request_try() + * will thus set the state for us and we + * must also return immediately if we are + * running an async state machine, because + * info can become invalid due to races after + * try() returns. */ - if (info->async) { - info->async = 0; - info->error = nfs_request_try(info->req); - crit_enter(); - info->async = 1; - KKASSERT(info->state == NFSM_STATE_TRY); - if (info->req->r_mrep) - nfs_async_return(nmp, info->req); - else - info->state = NFSM_STATE_WAITREPLY; - crit_exit(); + if (info->req->r_flags & R_ASYNC) { + nfs_request_try(info->req); + if (estate == NFSM_STATE_WAITREPLY) + return (EINPROGRESS); } else { - info->error = nfs_request_try(info->req); + nfs_request_try(info->req); info->state = NFSM_STATE_WAITREPLY; } - - /* - * The backend can rip the request out from under - * is at this point. If we were async the estate - * will be set to WAITREPLY. Return immediately. - */ - if (estate == NFSM_STATE_WAITREPLY) - return (EINPROGRESS); break; case NFSM_STATE_WAITREPLY: /* @@ -1128,6 +1090,8 @@ nfs_request_setup(nfsm_info_t info) req->r_td = info->td; req->r_procnum = info->procnum; req->r_mreq = NULL; + req->r_cred = info->cred; + i = 0; m = info->mreq; while (m) { @@ -1136,14 +1100,19 @@ nfs_request_setup(nfsm_info_t info) } req->r_mrest = info->mreq; req->r_mrest_len = i; - req->r_cred = info->cred; /* * The presence of a non-NULL r_info in req indicates * async completion via our helper threads. See the receiver * code. */ - req->r_info = info->async ? info : NULL; + if (info->bio) { + req->r_info = info; + req->r_flags = R_ASYNC; + } else { + req->r_info = NULL; + req->r_flags = 0; + } info->req = req; return(0); } @@ -1223,15 +1192,27 @@ nfs_request_try(struct nfsreq *rep) struct mbuf *m2; int error; + /* + * Request is not on any queue, only the owner has access to it + * so it should not be locked by anyone atm. + * + * Interlock to prevent races. While locked the only remote + * action possible is for r_mrep to be set (once we enqueue it). + */ + if (rep->r_flags == 0xdeadc0de) { + print_backtrace(); + panic("flags nbad\n"); + } + KKASSERT((rep->r_flags & (R_LOCKED | R_ONREQQ)) == 0); if (nmp->nm_flag & NFSMNT_SOFT) rep->r_retry = nmp->nm_retry; else rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ rep->r_rtt = rep->r_rexmit = 0; if (proct[rep->r_procnum] > 0) - rep->r_flags = R_TIMING | R_MASKTIMER; + rep->r_flags |= R_TIMING | R_LOCKED; else - rep->r_flags = R_MASKTIMER; + rep->r_flags |= R_LOCKED; rep->r_mrep = NULL; /* @@ -1242,92 +1223,120 @@ nfs_request_try(struct nfsreq *rep) /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. Note - * that R_MASKTIMER is set at the moment to prevent any timer - * action on this request while we are still doing processing on - * it below. splsoftclock() primarily protects nm_sent. Note - * that we may block in this code so there is no atomicy guarentee. + * that our control of R_LOCKED prevents the request from + * getting ripped out from under us or transmitted by the + * timer code. + * + * For requests with info structures we must atomically set the + * info's state because the structure could become invalid upon + * return due to races (i.e., if async) */ crit_enter(); mtx_link_init(&rep->r_link); - TAILQ_INSERT_TAIL(&nmp->nm_reqq, rep, r_chain);/* XXX */ + TAILQ_INSERT_TAIL(&nmp->nm_reqq, rep, r_chain); + rep->r_flags |= R_ONREQQ; ++nmp->nm_reqqlen; - nfssvc_iod_reader_wakeup(nmp); + if (rep->r_flags & R_ASYNC) + rep->r_info->state = NFSM_STATE_WAITREPLY; + crit_exit(); error = 0; /* - * If backing off another request or avoiding congestion, don't - * send this one now but let timer do it. If not timing a request, - * do it now. - * - * Even though the timer will not mess with our request there is - * still the possibility that we will race a reply (which clears - * R_SENT), especially on localhost connections, so be very careful - * when setting R_SENT. We could set R_SENT prior to calling - * nfs_send() but why bother if the response occurs that quickly? + * Send if we can. Congestion control is not handled here any more + * becausing trying to defer the initial send based on the nfs_timer + * requires having a very fast nfs_timer, which is silly. */ - if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || - (nmp->nm_flag & NFSMNT_DUMBTIMR) || - nmp->nm_sent < nmp->nm_cwnd)) { + if (nmp->nm_so) { if (nmp->nm_soflags & PR_CONNREQUIRED) error = nfs_sndlock(nmp, rep); - if (!error) { + if (error == 0) { m2 = m_copym(rep->r_mreq, 0, M_COPYALL, MB_WAIT); error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(nmp); - } - if (!error && (rep->r_flags & R_MUSTRESEND) == 0 && - rep->r_mrep == NULL) { - KASSERT((rep->r_flags & R_SENT) == 0, - ("R_SENT ASSERT %p", rep)); - nmp->nm_sent += NFS_CWNDSCALE; - rep->r_flags |= R_SENT; + rep->r_flags &= ~R_NEEDSXMIT; + if ((rep->r_flags & R_SENT) == 0) { + rep->r_flags |= R_SENT; + } + } else { + rep->r_flags |= R_NEEDSXMIT; } } else { + rep->r_flags |= R_NEEDSXMIT; rep->r_rtt = -1; } if (error == EPIPE) error = 0; + /* - * Let the timer do what it will with the request, then - * wait for the reply from our send or the timer's. + * Release the lock. The only remote action that may have occurred + * would have been the setting of rep->r_mrep. If this occured + * and the request was async we have to move it to the reader + * thread's queue for action. + * + * For async requests also make sure the reader is woken up so + * it gets on the socket to read responses. */ - if (error == 0) - rep->r_flags &= ~R_MASKTIMER; + crit_enter(); + if (rep->r_flags & R_ASYNC) { + if (rep->r_mrep) + nfs_hardterm(rep, 1); + rep->r_flags &= ~R_LOCKED; + nfssvc_iod_reader_wakeup(nmp); + } else { + rep->r_flags &= ~R_LOCKED; + } + if (rep->r_flags & R_WANTED) { + rep->r_flags &= ~R_WANTED; + wakeup(rep); + } crit_exit(); return (error); } +/* + * This code is only called for synchronous requests. Completed synchronous + * requests are left on reqq and we remove them before moving on to the + * processing state. + */ static int nfs_request_waitreply(struct nfsreq *rep) { struct nfsmount *nmp = rep->r_nmp; int error; + KKASSERT((rep->r_flags & R_ASYNC) == 0); + + /* + * Wait until the request is finished. + */ error = nfs_reply(nmp, rep); - crit_enter(); /* * RPC done, unlink the request, but don't rip it out from under * the callout timer. + * + * Once unlinked no other receiver or the timer will have + * visibility, so we do not have to set R_LOCKED. */ + crit_enter(); while (rep->r_flags & R_LOCKED) { - nfs_timer_raced = 1; - tsleep(&nfs_timer_raced, 0, "nfstrac", 0); + rep->r_flags |= R_WANTED; + tsleep(rep, 0, "nfstrac", 0); } + KKASSERT(rep->r_flags & R_ONREQQ); TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain); + rep->r_flags &= ~R_ONREQQ; --nmp->nm_reqqlen; + crit_exit(); /* * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { rep->r_flags &= ~R_SENT; - nmp->nm_sent -= NFS_CWNDSCALE; } - crit_exit(); - return (error); } @@ -1342,9 +1351,7 @@ nfs_request_processreply(nfsm_info_t info, int error) { struct nfsreq *req = info->req; struct nfsmount *nmp = req->r_nmp; - time_t waituntil; u_int32_t *tl; - int trylater_delay = 15, trylater_cnt = 0; int verf_type; int i; @@ -1417,19 +1424,18 @@ nfs_request_processreply(nfsm_info_t info, int error) NULLOUT(tl = nfsm_dissect(info, NFSX_UNSIGNED)); if (*tl != 0) { error = fxdr_unsigned(int, *tl); + + /* + * Does anyone even implement this? Just impose + * a 1-second delay. + */ if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { m_freem(info->mrep); info->mrep = NULL; error = 0; - waituntil = time_second + trylater_delay; - while (time_second < waituntil) - (void) tsleep((caddr_t)&lbolt, - 0, "nqnfstry", 0); - trylater_delay *= nfs_backoff[trylater_cnt]; - if (trylater_cnt < 7) - trylater_cnt++; - req->r_flags &= ~R_MASKTIMER; + + tsleep((caddr_t)&lbolt, 0, "nqnfstry", 0); return (EAGAIN); /* goto tryagain */ } @@ -1611,11 +1617,17 @@ nfs_rephead(int siz, struct nfsrv_descript *nd, struct nfssvc_sock *slp, #endif /* NFS_NOSERVER */ + /* - * Nfs timer routine + * Nfs timer routine. + * * Scan the nfsreq list and retranmit any requests that have timed out * To avoid retransmission attempts on STREAM sockets (in the future) make * sure to set the r_retry field to 0 (implies nm_retry == 0). + * + * Requests with attached responses, terminated requests, and + * locked requests are ignored. Locked requests will be picked up + * in a later timer call. */ void nfs_timer(void *arg /* never used */) @@ -1631,17 +1643,21 @@ nfs_timer(void *arg /* never used */) TAILQ_FOREACH(nmp, &nfs_mountq, nm_entry) { TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) { KKASSERT(nmp == req->r_nmp); - if (req->r_mrep || - (req->r_flags & (R_SOFTTERM|R_MASKTIMER))) { + if (req->r_mrep) + continue; + if (req->r_flags & (R_SOFTTERM | R_LOCKED)) continue; - } req->r_flags |= R_LOCKED; if (nfs_sigintr(nmp, req, req->r_td)) { - nfs_softterm(req); + nfs_softterm(req, 1); } else { nfs_timer_req(req); } req->r_flags &= ~R_LOCKED; + if (req->r_flags & R_WANTED) { + req->r_flags &= ~R_WANTED; + wakeup(req); + } } } #ifndef NFS_NOSERVER @@ -1656,15 +1672,6 @@ nfs_timer(void *arg /* never used */) nfsrv_wakenfsd(slp, 1); } #endif /* NFS_NOSERVER */ - - /* - * Due to possible blocking, a client operation may be waiting for - * us to finish processing this request so it can remove it. - */ - if (nfs_timer_raced) { - nfs_timer_raced = 0; - wakeup(&nfs_timer_raced); - } crit_exit(); callout_reset(&nfs_timer_handle, nfs_ticks, nfs_timer, NULL); } @@ -1680,53 +1687,101 @@ nfs_timer_req(struct nfsreq *req) int timeo; int error; + /* + * rtt ticks and timeout calculation. Return if the timeout + * has not been reached yet, unless the packet is flagged + * for an immediate send. + * + * The mean rtt doesn't help when we get random I/Os, we have + * to multiply by fairly large numbers. + */ if (req->r_rtt >= 0) { req->r_rtt++; - if (nmp->nm_flag & NFSMNT_DUMBTIMR) - timeo = nmp->nm_timeo; - else - timeo = NFS_RTO(nmp, proct[req->r_procnum]); + if (nmp->nm_flag & NFSMNT_DUMBTIMR) { + timeo = nmp->nm_timeo << NFS_RTT_SCALE_BITS; + } else if (req->r_flags & R_TIMING) { + timeo = NFS_SRTT(req) + NFS_SDRTT(req); + } else { + timeo = nmp->nm_timeo << NFS_RTT_SCALE_BITS; + } + /* timeo is still scaled by SCALE_BITS */ + +#define NFSFS (NFS_RTT_SCALE * NFS_HZ) + if (req->r_flags & R_TIMING) { + static long last_time; + if (nfs_showrtt && last_time != time_second) { + kprintf("rpccmd %d NFS SRTT %d SDRTT %d " + "timeo %d.%03d\n", + proct[req->r_procnum], + NFS_SRTT(req), NFS_SDRTT(req), + timeo / NFSFS, + timeo % NFSFS * 1000 / NFSFS); + last_time = time_second; + } + } +#undef NFSFS + + /* + * deal with nfs_timer jitter. + */ + timeo = (timeo >> NFS_RTT_SCALE_BITS) + 1; + if (timeo < 2) + timeo = 2; + if (nmp->nm_timeouts > 0) timeo *= nfs_backoff[nmp->nm_timeouts - 1]; - if (req->r_rtt <= timeo) - return; - if (nmp->nm_timeouts < 8) + if (timeo > NFS_MAXTIMEO) + timeo = NFS_MAXTIMEO; + if (req->r_rtt <= timeo) { + if ((req->r_flags & R_NEEDSXMIT) == 0) + return; + } else if (nmp->nm_timeouts < 8) { nmp->nm_timeouts++; + } } + /* * Check for server not responding */ if ((req->r_flags & R_TPRINTFMSG) == 0 && req->r_rexmit > nmp->nm_deadthresh) { - nfs_msg(req->r_td, - nmp->nm_mountp->mnt_stat.f_mntfromname, - "not responding"); + nfs_msg(req->r_td, nmp->nm_mountp->mnt_stat.f_mntfromname, + "not responding"); req->r_flags |= R_TPRINTFMSG; } if (req->r_rexmit >= req->r_retry) { /* too many */ nfsstats.rpctimeouts++; - nfs_softterm(req); + nfs_softterm(req, 1); return; } + + /* + * Generally disable retransmission on reliable sockets, + * unless the request is flagged for immediate send. + */ if (nmp->nm_sotype != SOCK_DGRAM) { if (++req->r_rexmit > NFS_MAXREXMIT) req->r_rexmit = NFS_MAXREXMIT; - return; + if ((req->r_flags & R_NEEDSXMIT) == 0) + return; } + + /* + * Stop here if we do not have a socket! + */ if ((so = nmp->nm_so) == NULL) return; /* - * If there is enough space and the window allows.. - * Resend it + * If there is enough space and the window allows.. resend it. + * * Set r_rtt to -1 in case we fail to send it now. */ req->r_rtt = -1; if (ssb_space(&so->so_snd) >= req->r_mreq->m_pkthdr.len && - ((nmp->nm_flag & NFSMNT_DUMBTIMR) || - (req->r_flags & R_SENT) || - nmp->nm_sent < nmp->nm_cwnd) && + (req->r_flags & (R_SENT | R_NEEDSXMIT)) && (m = m_copym(req->r_mreq, 0, M_COPYALL, MB_DONTWAIT))){ + req->r_flags &= ~R_NEEDSXMIT; if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) error = so_pru_send(so, 0, m, NULL, NULL, td); else @@ -1735,6 +1790,7 @@ nfs_timer_req(struct nfsreq *req) if (error) { if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) so->so_error = 0; + req->r_flags |= R_NEEDSXMIT; } else if (req->r_mrep == NULL) { /* * Iff first send, start timing @@ -1749,16 +1805,17 @@ nfs_timer_req(struct nfsreq *req) * us entirely. */ if (req->r_flags & R_SENT) { + if (nfs_showrexmit) + kprintf("X"); req->r_flags &= ~R_TIMING; if (++req->r_rexmit > NFS_MAXREXMIT) req->r_rexmit = NFS_MAXREXMIT; - nmp->nm_cwnd >>= 1; - if (nmp->nm_cwnd < NFS_CWNDSCALE) - nmp->nm_cwnd = NFS_CWNDSCALE; + nmp->nm_maxasync_scaled >>= 1; + if (nmp->nm_maxasync_scaled < NFS_MINASYNC_SCALED) + nmp->nm_maxasync_scaled = NFS_MINASYNC_SCALED; nfsstats.rpcretries++; } else { req->r_flags |= R_SENT; - nmp->nm_sent += NFS_CWNDSCALE; } req->r_rtt = 0; } @@ -1769,6 +1826,9 @@ nfs_timer_req(struct nfsreq *req) * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and * wait for all requests to complete. This is used by forced unmounts * to terminate any outstanding RPCs. + * + * Locked requests cannot be canceled but will be marked for + * soft-termination. */ int nfs_nmcancelreqs(struct nfsmount *nmp) @@ -1778,11 +1838,9 @@ nfs_nmcancelreqs(struct nfsmount *nmp) crit_enter(); TAILQ_FOREACH(req, &nmp->nm_reqq, r_chain) { - if (nmp != req->r_nmp || req->r_mrep != NULL || - (req->r_flags & R_SOFTTERM)) { + if (req->r_mrep != NULL || (req->r_flags & R_SOFTTERM)) continue; - } - nfs_softterm(req); + nfs_softterm(req, 0); } /* XXX the other two queues as well */ crit_exit(); @@ -1801,50 +1859,62 @@ nfs_nmcancelreqs(struct nfsmount *nmp) return (EBUSY); } +/* + * Soft-terminate a request, effectively marking it as failed. + * + * Must be called from within a critical section. + */ static void -nfs_async_return(struct nfsmount *nmp, struct nfsreq *rep) +nfs_softterm(struct nfsreq *rep, int islocked) { - KKASSERT(rep->r_info->state == NFSM_STATE_TRY || - rep->r_info->state == NFSM_STATE_WAITREPLY); - rep->r_info->state = NFSM_STATE_PROCESSREPLY; - TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain); - if (rep->r_flags & R_SENT) { - rep->r_flags &= ~R_SENT; - nmp->nm_sent -= NFS_CWNDSCALE; - } - --nmp->nm_reqqlen; - TAILQ_INSERT_TAIL(&nmp->nm_reqrxq, rep, r_chain); - nfssvc_iod_reader_wakeup(nmp); + rep->r_flags |= R_SOFTTERM; + nfs_hardterm(rep, islocked); } /* - * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT). - * The nm_send count is decremented now to avoid deadlocks when the process in - * soreceive() hasn't yet managed to send its own request. + * Hard-terminate a request, typically after getting a response. * - * This routine must be called at splsoftclock() to protect r_flags and - * nm_sent. + * The state machine can still decide to re-issue it later if necessary. + * + * Must be called from within a critical section. */ static void -nfs_softterm(struct nfsreq *rep) +nfs_hardterm(struct nfsreq *rep, int islocked) { struct nfsmount *nmp = rep->r_nmp; - rep->r_flags |= R_SOFTTERM; - + /* + * The nm_send count is decremented now to avoid deadlocks + * when the process in soreceive() hasn't yet managed to send + * its own request. + */ if (rep->r_flags & R_SENT) { - rep->r_nmp->nm_sent -= NFS_CWNDSCALE; rep->r_flags &= ~R_SENT; } /* - * Asynchronous replies are bound-over to the - * rxthread. Note that nmp->nm_reqqlen is not - * decremented until the rxthread has finished - * with the request. + * If we locked the request or nobody else has locked the request, + * and the request is async, we can move it to the reader thread's + * queue now and fix up the state. + * + * If we locked the request or nobody else has locked the request, + * we can wake up anyone blocked waiting for a response on the + * request. */ - if (rep->r_info && rep->r_info->async) - nfs_async_return(nmp, rep); + if (islocked || (rep->r_flags & R_LOCKED) == 0) { + if ((rep->r_flags & (R_ONREQQ | R_ASYNC)) == + (R_ONREQQ | R_ASYNC)) { + rep->r_flags &= ~R_ONREQQ; + TAILQ_REMOVE(&nmp->nm_reqq, rep, r_chain); + --nmp->nm_reqqlen; + TAILQ_INSERT_TAIL(&nmp->nm_reqrxq, rep, r_chain); + KKASSERT(rep->r_info->state == NFSM_STATE_TRY || + rep->r_info->state == NFSM_STATE_WAITREPLY); + rep->r_info->state = NFSM_STATE_PROCESSREPLY; + nfssvc_iod_reader_wakeup(nmp); + } + mtx_abort_ex_link(&nmp->nm_rxlock, &rep->r_link); + } } /* diff --git a/sys/vfs/nfs/nfs_vfsops.c b/sys/vfs/nfs/nfs_vfsops.c index 030d1bb3a9..f2eb747e15 100644 --- a/sys/vfs/nfs/nfs_vfsops.c +++ b/sys/vfs/nfs/nfs_vfsops.c @@ -1005,10 +1005,11 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, #ifdef SMP switch(ncpus) { case 0: + case 1: rxcpu = 0; txcpu = 0; break; - case 1: + case 2: rxcpu = 0; txcpu = 1; break; diff --git a/sys/vfs/nfs/nfsm_subs.c b/sys/vfs/nfs/nfsm_subs.c index 7b98604788..ee7c13cc68 100644 --- a/sys/vfs/nfs/nfsm_subs.c +++ b/sys/vfs/nfs/nfsm_subs.c @@ -765,7 +765,6 @@ nfsm_request(nfsm_info_t info, struct vnode *vp, int procnum, info->vp = vp; info->td = td; info->cred = cred; - info->async = 0; info->bio = NULL; info->nmp = VFSTONFS(vp->v_mount); @@ -798,7 +797,6 @@ nfsm_request_bio(nfsm_info_t info, struct vnode *vp, int procnum, info->vp = vp; info->td = td; info->cred = cred; - info->async = 1; info->nmp = VFSTONFS(vp->v_mount); error = nfs_request(info, NFSM_STATE_SETUP, NFSM_STATE_WAITREPLY); diff --git a/sys/vfs/nfs/nfsm_subs.h b/sys/vfs/nfs/nfsm_subs.h index 2e7d234dae..4af48642ec 100644 --- a/sys/vfs/nfs/nfsm_subs.h +++ b/sys/vfs/nfs/nfsm_subs.h @@ -81,7 +81,7 @@ struct nfsm_info { struct ucred *cred; struct nfsreq *req; struct nfsmount *nmp; - int async; /* indicates async completion */ + int unused01; int error; /* diff --git a/sys/vfs/nfs/nfsmount.h b/sys/vfs/nfs/nfsmount.h index 859e2490ac..50e90acb85 100644 --- a/sys/vfs/nfs/nfsmount.h +++ b/sys/vfs/nfs/nfsmount.h @@ -78,10 +78,9 @@ struct nfsmount { struct sockaddr *nm_nam; /* Addr of server */ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ int nm_retry; /* Max retries */ - int nm_srtt[4]; /* Timers for rpcs */ - int nm_sdrtt[4]; - int nm_sent; /* Request send count */ - int nm_cwnd; /* Request send window */ + int nm_srtt[5]; /* Timers for rpcs */ + int nm_sdrtt[5]; + int nm_maxasync_scaled; /* Used to control congestion */ int nm_timeouts; /* Request timeouts */ int nm_deadthresh; /* Threshold of timeouts-->dead server*/ int nm_rsize; /* Max size of read rpc */ -- 2.41.0