* Remove all the nfsiod junk.
* Add two per-mount threads, one for reading from the socket, one for writing
to the socket, in a new file nfs_iod.c
* Implement a quick and dirty synchronous, single threaded nfs_doio()
loop in the writer thread to test basic mechanics.
vfs/nfs/nfs_vfsops.c optional nfs
vfs/nfs/nfs_vnops.c optional nfs
vfs/nfs/nfs_kerb.c optional nfs
+vfs/nfs/nfs_iod.c optional nfs
vfs/nfs/bootp_subr.c optional bootp
vfs/nfs/nfs_mountrpc.c optional nfs
vfs/nfs/krpc_subr.c optional nfs
KMOD= nfs
SRCS= nfs_bio.c nfs_node.c nfs_kerb.c nfs_serv.c nfs_socket.c \
- nfs_srvcache.c nfs_subs.c nfs_syscalls.c nfs_vfsops.c \
+ nfs_srvcache.c nfs_subs.c nfs_syscalls.c nfs_vfsops.c nfs_iod.c \
nfs_vnops.c opt_inet.h opt_nfs.h opt_vmpage.h opt_bootp.h \
opt_nfsroot.h
NFS_INET?= 1 # 0/1 - requires INET to be configured in kernel
#define NFS_DEFRAHEAD 4 /* Def. read ahead # blocks */
#define NFS_MAXRAHEAD 32 /* Max. read ahead # blocks */
#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
-#define NFS_MAXASYNCDAEMON 64 /* Max. number async_daemons runnable */
#define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */
#ifndef NFS_GATHERDELAY
#define NFS_GATHERDELAY 20 /* Default write gather delay (msec) */
int nfs_clientd(struct nfsmount *nmp, struct ucred *cred,
struct nfsd_cargs *ncd, int flag, caddr_t argp,
struct thread *td);
-
+void nfssvc_iod_reader(void *arg);
+void nfssvc_iod_writer(void *arg);
+void nfssvc_iod_stop(struct nfsmount *nmp);
+void nfssvc_iod_writer_wakeup(struct nfsmount *nmp);
#endif /* _KERNEL */
static int nfs_check_dirent(struct nfs_dirent *dp, int maxlen);
static void nfsiodone_sync(struct bio *bio);
-extern int nfs_numasync;
extern int nfs_pbuf_freecnt;
extern struct nfsstats nfsstats;
/*
* Start the read ahead(s), as required.
*/
- if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
+ if (nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
rabn = lbn + 1 + nra;
* (You need the current block first, so that you have the
* directory offset cookie of the next block.)
*/
- if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ if (nmp->nm_readahead > 0 &&
(bp->b_flags & B_INVAL) == 0 &&
(np->n_direofoffset == 0 ||
loffset + NFS_DIRBLKSIZ < np->n_direofoffset) &&
{
struct buf *bp = bio->bio_buf;
struct nfsmount *nmp;
- int i;
- int gotiod;
- int slpflag = 0;
- int slptimeo = 0;
- int error;
-
- /*
- * If no async daemons then return EIO to force caller to run the rpc
- * synchronously.
- */
- if (nfs_numasync == 0)
- return (EIO);
KKASSERT(vp->v_tag == VT_NFS);
nmp = VFSTONFS(vp->v_mount);
/*
- * Commits are usually short and sweet so lets save some cpu and
- * leave the async daemons for more important rpc's (such as reads
- * and writes).
- */
- if (bp->b_cmd == BUF_CMD_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
- (nmp->nm_bioqiods > nfs_numasync / 2)) {
- return(EIO);
- }
-
-again:
- if (nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- gotiod = FALSE;
-
- /*
- * Find a free iod to process this request.
- */
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_iodwant[i]) {
- /*
- * Found one, so wake it up and tell it which
- * mount to process.
- */
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: waking iod %d for mount %p\n",
- i, nmp));
- nfs_iodwant[i] = NULL;
- nfs_iodmount[i] = nmp;
- nmp->nm_bioqiods++;
- wakeup((caddr_t)&nfs_iodwant[i]);
- gotiod = TRUE;
- break;
- }
-
- /*
- * If none are free, we may already have an iod working on this mount
- * point. If so, it will process our request.
- */
- if (!gotiod) {
- if (nmp->nm_bioqiods > 0) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: %d iods are already processing mount %p\n",
- nmp->nm_bioqiods, nmp));
- gotiod = TRUE;
- }
- }
-
- /*
- * If we have an iod which can process the request, then queue
- * the buffer.
+ * If no async daemons then return EIO to force caller to run the rpc
+ * synchronously.
*/
- if (gotiod) {
- /*
- * Ensure that the queue never grows too large. We still want
- * to asynchronize so we block rather then return EIO.
- */
- while (nmp->nm_bioqlen >= 2*nfs_numasync) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
- nmp->nm_bioqwant = TRUE;
- error = tsleep(&nmp->nm_bioq, slpflag,
- "nfsaio", slptimeo);
- if (error) {
- if (nfs_sigintr(nmp, NULL, td))
- return (EINTR);
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- }
- /*
- * We might have lost our iod while sleeping,
- * so check and loop if nescessary.
- */
- if (nmp->nm_bioqiods == 0) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
- goto again;
- }
- }
- BUF_KERNPROC(bp);
+ if (nmp->nm_rxstate > NFSSVC_PENDING)
+ return (EIO);
- /*
- * The passed bio's buffer is not necessary associated with
- * the NFS vnode it is being written to. Store the NFS vnode
- * in the BIO driver info.
- */
- bio->bio_driver_info = vp;
- TAILQ_INSERT_TAIL(&nmp->nm_bioq, bio, bio_act);
- nmp->nm_bioqlen++;
- return (0);
- }
+ BUF_KERNPROC(bp);
/*
- * All the iods are busy on other mounts, so return EIO to
- * force the caller to process the i/o synchronously.
+ * The passed bio's buffer is not necessary associated with
+ * the NFS vnode it is being written to. Store the NFS vnode
+ * in the BIO driver info.
*/
- NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
- return (EIO);
+ bio->bio_driver_info = vp;
+ TAILQ_INSERT_TAIL(&nmp->nm_bioq, bio, bio_act);
+ nmp->nm_bioqlen++;
+ nfssvc_iod_writer_wakeup(nmp);
+ return (0);
}
/*
--- /dev/null
+/*
+ * Copyright (c) 2009 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * NFSIOD operations - now built into the kernel.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/protosw.h>
+#include <sys/resourcevar.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/socketops.h>
+#include <sys/syslog.h>
+#include <sys/thread.h>
+#include <sys/tprintf.h>
+#include <sys/sysctl.h>
+#include <sys/signalvar.h>
+#include <sys/mutex.h>
+
+#include <sys/signal2.h>
+#include <sys/mutex2.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/thread2.h>
+
+#include "rpcv2.h"
+#include "nfsproto.h"
+#include "nfs.h"
+#include "xdr_subs.h"
+#include "nfsm_subs.h"
+#include "nfsmount.h"
+#include "nfsnode.h"
+#include "nfsrtt.h"
+
+void
+nfssvc_iod_reader(void *arg)
+{
+ struct nfsmount *nmp = arg;
+
+ if (nmp->nm_rxstate == NFSSVC_INIT)
+ nmp->nm_rxstate = NFSSVC_PENDING;
+ for (;;) {
+ if (nmp->nm_rxstate == NFSSVC_WAITING) {
+ tsleep(&nmp->nm_rxstate, 0, "nfsidl", 0);
+ continue;
+ }
+ if (nmp->nm_rxstate != NFSSVC_PENDING)
+ break;
+ nmp->nm_rxstate = NFSSVC_WAITING;
+
+#if 0
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PCATCH, "nfsidl", 0);
+#endif
+ }
+ nmp->nm_rxthread = NULL;
+ nmp->nm_rxstate = NFSSVC_DONE;
+ wakeup(&nmp->nm_rxthread);
+}
+
+/*
+ * The writer sits on the send side of the client's socket and
+ * does both the initial processing of BIOs and also transmission
+ * and retransmission of nfsreq's.
+ */
+void
+nfssvc_iod_writer(void *arg)
+{
+ struct nfsmount *nmp = arg;
+ struct bio *bio;
+ struct vnode *vp;
+
+ if (nmp->nm_txstate == NFSSVC_INIT)
+ nmp->nm_txstate = NFSSVC_PENDING;
+ for (;;) {
+ if (nmp->nm_txstate == NFSSVC_WAITING) {
+ tsleep(&nmp->nm_txstate, 0, "nfsidl", 0);
+ continue;
+ }
+ if (nmp->nm_txstate != NFSSVC_PENDING)
+ break;
+ nmp->nm_txstate = NFSSVC_WAITING;
+
+ while (nmp->nm_bioqlen && nmp->nm_reqqlen < 32) {
+ bio = TAILQ_FIRST(&nmp->nm_bioq);
+ KKASSERT(bio);
+ TAILQ_REMOVE(&nmp->nm_bioq, bio, bio_act);
+ nmp->nm_bioqlen--;
+ vp = bio->bio_driver_info;
+ nfs_doio(vp, bio, NULL);
+ }
+ }
+ nmp->nm_txthread = NULL;
+ nmp->nm_txstate = NFSSVC_DONE;
+ wakeup(&nmp->nm_txthread);
+}
+
+void
+nfssvc_iod_stop(struct nfsmount *nmp)
+{
+ nmp->nm_txstate = NFSSVC_STOPPING;
+ wakeup(&nmp->nm_txstate);
+ while (nmp->nm_txthread)
+ tsleep(&nmp->nm_txthread, 0, "nfssttx", 0);
+
+ nmp->nm_rxstate = NFSSVC_STOPPING;
+ wakeup(&nmp->nm_rxstate);
+ while (nmp->nm_rxthread)
+ tsleep(&nmp->nm_rxthread, 0, "nfsstrx", 0);
+}
+
+void
+nfssvc_iod_writer_wakeup(struct nfsmount *nmp)
+{
+ if (nmp->nm_txstate == NFSSVC_WAITING) {
+ nmp->nm_txstate = NFSSVC_PENDING;
+ wakeup(&nmp->nm_txstate);
+ }
+}
TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
kfree((caddr_t)nuidp, M_NFSUID);
}
+ nfssvc_iod_stop(nmp);
nfs_free_mount(nmp);
if (error == EWOULDBLOCK)
error = 0;
int
nfs_init(struct vfsconf *vfsp)
{
- int i;
-
callout_init(&nfs_timer_handle);
nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
if (nfs_ticks < 1)
nfs_ticks = 1;
- /* Ensure async daemons disabled */
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
- nfs_iodwant[i] = NULL;
- nfs_iodmount[i] = NULL;
- }
nfs_nhinit(); /* Init the nfsnode table */
#ifndef NFS_NOSERVER
nfsrv_init(0); /* Init server data structures */
#ifndef NFS_NOSERVER
static void nfsrv_zapsock (struct nfssvc_sock *slp);
#endif
-static int nfssvc_iod (struct thread *);
#define TRUE 1
#define FALSE 0
-static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
-
SYSCTL_DECL(_vfs_nfs);
#ifndef NFS_NOSERVER
(void) tsleep((caddr_t)&nfssvc_sockhead, 0, "nfsd init", 0);
}
if (uap->flag & NFSSVC_BIOD)
- error = nfssvc_iod(td);
+ error = ENXIO; /* no longer need nfsiod's */
#ifdef NFS_NOSERVER
else
error = ENXIO;
SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
/*
- * Asynchronous I/O daemons for client nfs.
- * They do read-ahead and write-behind operations on the block I/O cache.
- * Never returns unless it fails or gets killed.
- */
-static int
-nfssvc_iod(struct thread *td)
-{
- struct bio *bio;
- int i, myiod;
- struct nfsmount *nmp;
- int error = 0;
-
- /*
- * Assign my position or return error if too many already running
- */
- myiod = -1;
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_asyncdaemon[i] == 0) {
- nfs_asyncdaemon[i]++;
- myiod = i;
- break;
- }
- if (myiod == -1)
- return (EBUSY);
- nfs_numasync++;
- /*
- * Just loop around doin our stuff until SIGKILL
- */
- for (;;) {
- while (((nmp = nfs_iodmount[myiod]) == NULL
- || TAILQ_EMPTY(&nmp->nm_bioq))
- && error == 0) {
- if (nmp)
- nmp->nm_bioqiods--;
- nfs_iodwant[myiod] = td;
- nfs_iodmount[myiod] = NULL;
- error = tsleep((caddr_t)&nfs_iodwant[myiod],
- PCATCH, "nfsidl", 0);
- }
- if (error) {
- nfs_asyncdaemon[myiod] = 0;
- if (nmp)
- nmp->nm_bioqiods--;
- nfs_iodwant[myiod] = NULL;
- nfs_iodmount[myiod] = NULL;
- nfs_numasync--;
- return (error);
- }
- while ((bio = TAILQ_FIRST(&nmp->nm_bioq)) != NULL) {
- /*
- * Take one off the front of the list. The BIO's
- * block number is normalized for DEV_BSIZE.
- */
- TAILQ_REMOVE(&nmp->nm_bioq, bio, bio_act);
- nmp->nm_bioqlen--;
- if (nmp->nm_bioqwant && nmp->nm_bioqlen <= nfs_numasync) {
- nmp->nm_bioqwant = FALSE;
- wakeup(&nmp->nm_bioq);
- }
- nfs_doio((struct vnode *)bio->bio_driver_info, bio, NULL);
-
- /*
- * If there are more than one iod on this mount, then defect
- * so that the iods can be shared out fairly between the mounts
- */
- if (nfs_defect && nmp->nm_bioqiods > 1) {
- NFS_DPF(ASYNCIO,
- ("nfssvc_iod: iod %d defecting from mount %p\n",
- myiod, nmp));
- nfs_iodmount[myiod] = NULL;
- nmp->nm_bioqiods--;
- break;
- }
- }
- }
-}
-
-
-/*
* Get an authorization string for the uid by having the mount_nfs sitting
* on this mount point porpous out of the kernel and do it.
*/
*/
vn_unlock(*vpp);
+ /*
+ * Start the reader and writer threads.
+ */
+ lwkt_create(nfssvc_iod_reader, nmp, &nmp->nm_rxthread,
+ NULL, 0, -1, "nfsiod_rx");
+ lwkt_create(nfssvc_iod_writer, nmp, &nmp->nm_txthread,
+ NULL, 0, -1, "nfsiod_tx");
+
return (0);
bad:
nfs_disconnect(nmp);
nfs_disconnect(nmp);
FREE(nmp->nm_nam, M_SONAME);
- if ((nmp->nm_flag & NFSMNT_KERB) == 0)
+ if ((nmp->nm_flag & NFSMNT_KERB) == 0) {
+ nfssvc_iod_stop(nmp);
nfs_free_mount(nmp);
+ }
return (0);
}
extern u_int32_t nfs_xdrneg1;
extern struct nfsstats nfsstats;
extern nfstype nfsv3_type[9];
-struct thread *nfs_iodwant[NFS_MAXASYNCDAEMON];
-struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
-int nfs_numasync = 0;
SYSCTL_DECL(_vfs_nfs);
#include <sys/mutex.h>
+enum nfssvc_state {
+ NFSSVC_INIT,
+ NFSSVC_WAITING,
+ NFSSVC_PENDING,
+ NFSSVC_STOPPING,
+ NFSSVC_DONE
+};
+
/*
* Mount structure.
* One allocated on every NFS mount.
TAILQ_ENTRY(nfsmount) nm_entry; /* entry in nfsmountq */
struct mtx nm_rxlock; /* receive socket lock */
struct mtx nm_txlock; /* send socket lock */
+ thread_t nm_rxthread;
+ thread_t nm_txthread;
+ enum nfssvc_state nm_rxstate;
+ enum nfssvc_state nm_txstate;
struct mount *nm_mountp; /* Vfs structure for this filesystem */
int nm_numgrps; /* Max. size of groupslist */
u_char nm_fh[NFSX_V3FHMAX]; /* File handle of root dir */
LIST_HEAD(, nfsuid) nm_uidhashtbl[NFS_MUIDHASHSIZ];
TAILQ_HEAD(, bio) nm_bioq; /* async io buffer queue */
TAILQ_HEAD(, nfsreq) nm_reqq; /* nfsreq queue */
- short nm_bioqlen; /* number of buffers in queue */
- short nm_bioqwant; /* process wants to add to the queue */
- int nm_bioqiods; /* number of iods processing queue */
+ int nm_bioqlen; /* number of buffers in queue */
+ int nm_reqqlen; /* number of nfsreqs in queue */
u_int64_t nm_maxfilesize; /* maximum file size */
struct ucred *nm_cred; /* 'root' credential */
};
* Queue head for nfsiod's
*/
extern TAILQ_HEAD(nfs_bufq, buf) nfs_bufq;
-extern struct thread *nfs_iodwant[NFS_MAXASYNCDAEMON];
-extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
#if defined(_KERNEL)