From b9a7a2bd9ad2f9f23579d9721b65ab0daecd7486 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 9 Oct 2009 17:56:34 -0700 Subject: [PATCH] NFS - Fix numerous issues with NFS root mounts, primarily for vkernels vkernels do not inherit BOOTP data from PXE and because of this a number of code paths are run which are not run in non-vkernel NFS mounts. * Fix a kernel memory check that was failing for vkernels and causing a panic. * Change the default BOOTP socket type from UDP to TCP. * Adjust the default vfs.nfs.maxasyncbio value based on available mbuf clusters. The value will be adjusted down for systems with small memory footprints. Otherwise NFS can trivially run the system out of mbufs. * Fix bugs in the NFS BIO queueing code related to values of vfs.nfs.maxasyncbio lower then the maximum. * Remove the BOOTP_NFSV3 option when probing for the type of NFS mount (V2 or V3). The BOOTP_NFSV3 kernel option is no longer required for root mounts to default to NFSv3. This also works around a bug related to large write()s with NFSv2 which has not yet been found. Reported-by: Rumko --- sys/vfs/nfs/bootp_subr.c | 8 ++++---- sys/vfs/nfs/nfs_iod.c | 2 +- sys/vfs/nfs/nfs_mountrpc.c | 14 +++++--------- sys/vfs/nfs/nfs_socket.c | 4 ++-- sys/vfs/nfs/nfs_subs.c | 8 ++++++++ sys/vfs/nfs/nfsmountrpc.h | 2 +- sys/vm/pmap.h | 11 ++++++++++- 7 files changed, 31 insertions(+), 18 deletions(-) diff --git a/sys/vfs/nfs/bootp_subr.c b/sys/vfs/nfs/bootp_subr.c index 2e1b9c7c04..12e088961d 100644 --- a/sys/vfs/nfs/bootp_subr.c +++ b/sys/vfs/nfs/bootp_subr.c @@ -1431,7 +1431,7 @@ bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, ifctx->replylen, TAG_ROOTOPTS); if (p != NULL) { - mountopts(&nd->root_args, p); + nfs_mountopts(&nd->root_args, p); kprintf("rootopts %s ", p); } } else @@ -1454,7 +1454,7 @@ bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, TAG_SWAPOPTS); if (p != NULL) { /* swap mount options */ - mountopts(&nd->swap_args, p); + nfs_mountopts(&nd->swap_args, p); kprintf("swapopts %s ", p); } @@ -1611,9 +1611,9 @@ bootpc_init(void) #endif } - mountopts(&nd->root_args, NULL); + nfs_mountopts(&nd->root_args, NULL); - mountopts(&nd->swap_args, NULL); + nfs_mountopts(&nd->swap_args, NULL); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (bootpc_ifctx_isresolved(ifctx) != 0) diff --git a/sys/vfs/nfs/nfs_iod.c b/sys/vfs/nfs/nfs_iod.c index c4ddd502e8..b1547b4502 100644 --- a/sys/vfs/nfs/nfs_iod.c +++ b/sys/vfs/nfs/nfs_iod.c @@ -182,7 +182,7 @@ nfssvc_iod_writer(void *arg) * did everything the kernel wanted us to do. */ while ((bio = TAILQ_FIRST(&nmp->nm_bioq)) != NULL) { - if (nmp->nm_reqqlen >= NFS_MAXASYNCBIO) + if (nmp->nm_reqqlen > nfs_maxasyncbio) break; TAILQ_REMOVE(&nmp->nm_bioq, bio, bio_act); vp = bio->bio_driver_info; diff --git a/sys/vfs/nfs/nfs_mountrpc.c b/sys/vfs/nfs/nfs_mountrpc.c index 233cbc93e9..88843d4007 100644 --- a/sys/vfs/nfs/nfs_mountrpc.c +++ b/sys/vfs/nfs/nfs_mountrpc.c @@ -92,7 +92,7 @@ static int xdr_opaque_decode(struct mbuf **ptr, u_char *buf, int len); static int xdr_int_decode(struct mbuf **ptr, int *iptr); void -mountopts(struct nfs_args *args, char *p) +nfs_mountopts(struct nfs_args *args, char *p) { char *tmp; @@ -100,7 +100,7 @@ mountopts(struct nfs_args *args, char *p) args->rsize = 8192; args->wsize = 8192; args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT; - args->sotype = SOCK_DGRAM; + args->sotype = SOCK_STREAM; if (p == NULL) return; if ((tmp = (char *)substr(p, "rsize="))) @@ -113,8 +113,8 @@ mountopts(struct nfs_args *args, char *p) args->flags |= NFSMNT_SOFT; if ((tmp = (char *)substr(p, "noconn"))) args->flags |= NFSMNT_NOCONN; - if ((tmp = (char *)substr(p, "tcp"))) - args->sotype = SOCK_STREAM; + if ((tmp = (char *)substr(p, "udp"))) + args->sotype = SOCK_DGRAM; } /* @@ -136,7 +136,6 @@ md_mount(struct sockaddr_in *mdsin, /* mountd server address */ int authcount; int authver; -#ifdef BOOTP_NFSV3 /* First try NFS v3 */ /* Get port number for MOUNTD. */ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3, @@ -151,7 +150,6 @@ md_mount(struct sockaddr_in *mdsin, /* mountd server address */ if (error == 0) { args->flags |= NFSMNT_NFSV3; } else { -#endif /* Fallback to NFS v2 */ /* Get port number for MOUNTD. */ @@ -167,10 +165,8 @@ md_mount(struct sockaddr_in *mdsin, /* mountd server address */ RPCMNT_MOUNT, &m, NULL, td); if (error != 0) return error; /* message already freed */ - -#ifdef BOOTP_NFSV3 + args->flags &= ~NFSMNT_NFSV3; } -#endif if (xdr_int_decode(&m, &error) != 0 || error != 0) goto bad; diff --git a/sys/vfs/nfs/nfs_socket.c b/sys/vfs/nfs/nfs_socket.c index 2b71044469..294f627419 100644 --- a/sys/vfs/nfs/nfs_socket.c +++ b/sys/vfs/nfs/nfs_socket.c @@ -1376,7 +1376,7 @@ nfs_request_waitreply(struct nfsreq *rep) rep->r_flags &= ~R_ONREQQ; --nmp->nm_reqqlen; if (TAILQ_FIRST(&nmp->nm_bioq) && - nmp->nm_reqqlen == NFS_MAXASYNCBIO * 2 / 3) { + nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) { nfssvc_iod_writer_wakeup(nmp); } crit_exit(); @@ -1972,7 +1972,7 @@ nfs_hardterm(struct nfsreq *rep, int islocked) rep->r_info->state = NFSM_STATE_PROCESSREPLY; nfssvc_iod_reader_wakeup(nmp); if (TAILQ_FIRST(&nmp->nm_bioq) && - nmp->nm_reqqlen == NFS_MAXASYNCBIO * 2 / 3) { + nmp->nm_reqqlen <= nfs_maxasyncbio * 2 / 3) { nfssvc_iod_writer_wakeup(nmp); } } diff --git a/sys/vfs/nfs/nfs_subs.c b/sys/vfs/nfs/nfs_subs.c index 10cfa0d3ba..4a194e7c74 100644 --- a/sys/vfs/nfs/nfs_subs.c +++ b/sys/vfs/nfs/nfs_subs.c @@ -591,6 +591,14 @@ nfs_init(struct vfsconf *vfsp) nfsrv_initcache(); /* Init the server request cache */ #endif + /* + * Mainly for vkernel operation. If memory is severely limited + */ + if (nfs_maxasyncbio > nmbclusters * MCLBYTES / NFS_MAXDATA / 3) + nfs_maxasyncbio = nmbclusters * MCLBYTES / NFS_MAXDATA / 3; + if (nfs_maxasyncbio < 4) + nfs_maxasyncbio = 4; + /* * Initialize reply list and start timer */ diff --git a/sys/vfs/nfs/nfsmountrpc.h b/sys/vfs/nfs/nfsmountrpc.h index cc64cc5dfa..7edf06b91c 100644 --- a/sys/vfs/nfs/nfsmountrpc.h +++ b/sys/vfs/nfs/nfsmountrpc.h @@ -45,6 +45,6 @@ int md_lookup_swap(struct sockaddr_in *mdsin,char *path, u_char *fhp, int *fhsizep, struct nfs_args *args, struct thread *td); -void mountopts(struct nfs_args *args, char *p); +void nfs_mountopts(struct nfs_args *args, char *p); int setfs(struct sockaddr_in *addr, char *path, char *p); diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 0fdffb924e..813233e270 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -120,12 +120,21 @@ extern vm_offset_t virtual_start; extern vm_offset_t virtual_end; extern vm_paddr_t phys_avail[]; +/* + * Return true if the passed address is in the kernel address space. + * This is mainly a check that the address is NOT in the user address space. + * + * For a vkernels all addresses are in the kernel address space. + */ static inline int kva_p(const void *addr) { - /* XXX: mapped? */ +#ifdef _KERNEL_VIRTUAL + return (addr != NULL); +#else return ((unsigned long)KvaStart <= (unsigned long)addr) && ((unsigned long)addr < (unsigned long)KvaEnd); +#endif } void pmap_change_wiring (pmap_t, vm_offset_t, boolean_t); -- 2.41.0