gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
	39	* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.128 2008/06/01 19:27:35 dillon Exp $
	41	*/
	42
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
	45	#include <sys/buf.h>
	46	#include <sys/conf.h>
	47	#include <sys/sysent.h>
	48	#include <sys/malloc.h>
	49	#include <sys/mount.h>
	50	#include <sys/mountctl.h>
	51	#include <sys/sysproto.h>
	52	#include <sys/filedesc.h>
	53	#include <sys/kernel.h>
	54	#include <sys/fcntl.h>
	55	#include <sys/file.h>
	56	#include <sys/linker.h>
	57	#include <sys/stat.h>
	58	#include <sys/unistd.h>
	59	#include <sys/vnode.h>
	60	#include <sys/proc.h>
	61	#include <sys/namei.h>
	62	#include <sys/nlookup.h>
	63	#include <sys/dirent.h>
	64	#include <sys/extattr.h>
	65	#include <sys/spinlock.h>
	66	#include <sys/kern_syscall.h>
	67	#include <sys/objcache.h>
	68	#include <sys/sysctl.h>
	69
	70	#include <sys/buf2.h>
	71	#include <sys/file2.h>
	72	#include <sys/spinlock2.h>
	73
	74	#include <vm/vm.h>
	75	#include <vm/vm_object.h>
	76	#include <vm/vm_page.h>
	77
	78	#include <machine/limits.h>
	79	#include <machine/stdarg.h>
	80
	81	#include <vfs/union/union.h>
	82
	83	static void mount_warning(struct mount mp, const char ctl, ...);
	84	static int mount_path(struct proc p, struct mount mp, char rb, char fb);
	85	static int checkvp_chdir (struct vnode vn, struct thread td);
	86	static void checkdirs (struct nchandle old_nch, struct nchandle new_nch);
	87	static int chroot_refuse_vdir_fds (struct filedesc *fdp);
	88	static int chroot_visible_mnt(struct mount mp, struct proc p);
	89	static int getutimes (const struct timeval , struct timespec );
	90	static int setfown (struct vnode *, uid_t, gid_t);
	91	static int setfmode (struct vnode *, int);
	92	static int setfflags (struct vnode *, int);
	93	static int setutimes (struct vnode , const struct timespec , int);
	94	static int usermount = 0; /* if 1, non-root can mount fs. */
	95
	96	int (union_dircheckp) (struct thread , struct vnode *, struct file );
	97
	98	SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
	99
	100	/*
	101	* Virtual File System System Calls
	102	*/
	103
	104	/*
	105	* Mount a file system.
	106	*/
	107	/*
	108	* mount_args(char type, char path, int flags, caddr_t data)
	109	*/
	110	/* ARGSUSED */
	111	int
	112	sys_mount(struct mount_args *uap)
	113	{
	114	struct thread *td = curthread;
	115	struct proc *p = td->td_proc;
	116	struct vnode *vp;
	117	struct nchandle nch;
	118	struct mount *mp;
	119	struct vfsconf *vfsp;
	120	int error, flag = 0, flag2 = 0;
	121	int hasmount;
	122	struct vattr va;
	123	struct nlookupdata nd;
	124	char fstypename[MFSNAMELEN];
	125	struct ucred *cred = p->p_ucred;
	126
	127	KKASSERT(p);
	128	if (cred->cr_prison != NULL)
	129	return (EPERM);
	130	if (usermount == 0 && (error = suser(td)))
	131	return (error);
	132	/*
	133	* Do not allow NFS export by non-root users.
	134	*/
	135	if (uap->flags & MNT_EXPORTED) {
	136	error = suser(td);
	137	if (error)
	138	return (error);
	139	}
	140	/*
	141	* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
	142	*/
	143	if (suser(td))
	144	uap->flags \|= MNT_NOSUID \| MNT_NODEV;
	145
	146	/*
	147	* Lookup the requested path and extract the nch and vnode.
	148	*/
	149	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	150	if (error == 0) {
	151	if ((error = nlookup(&nd)) == 0) {
	152	if (nd.nl_nch.ncp->nc_vp == NULL)
	153	error = ENOENT;
	154	}
	155	}
	156	if (error) {
	157	nlookup_done(&nd);
	158	return (error);
	159	}
	160
	161	/*
	162	* Extract the locked+refd ncp and cleanup the nd structure
	163	*/
	164	nch = nd.nl_nch;
	165	cache_zero(&nd.nl_nch);
	166	nlookup_done(&nd);
	167
	168	if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch))
	169	hasmount = 1;
	170	else
	171	hasmount = 0;
	172
	173
	174	/*
	175	* now we have the locked ref'd nch and unreferenced vnode.
	176	*/
	177	vp = nch.ncp->nc_vp;
	178	if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
	179	cache_put(&nch);
	180	return (error);
	181	}
	182	cache_unlock(&nch);
	183
	184	/*
	185	* Now we have an unlocked ref'd nch and a locked ref'd vp
	186	*/
	187	if (uap->flags & MNT_UPDATE) {
	188	if ((vp->v_flag & VROOT) == 0) {
	189	cache_drop(&nch);
	190	vput(vp);
	191	return (EINVAL);
	192	}
	193	mp = vp->v_mount;
	194	flag = mp->mnt_flag;
	195	flag2 = mp->mnt_kern_flag;
	196	/*
	197	* We only allow the filesystem to be reloaded if it
	198	* is currently mounted read-only.
	199	*/
	200	if ((uap->flags & MNT_RELOAD) &&
	201	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	202	cache_drop(&nch);
	203	vput(vp);
	204	return (EOPNOTSUPP); /* Needs translation */
	205	}
	206	/*
	207	* Only root, or the user that did the original mount is
	208	* permitted to update it.
	209	*/
	210	if (mp->mnt_stat.f_owner != cred->cr_uid &&
	211	(error = suser(td))) {
	212	cache_drop(&nch);
	213	vput(vp);
	214	return (error);
	215	}
	216	if (vfs_busy(mp, LK_NOWAIT)) {
	217	cache_drop(&nch);
	218	vput(vp);
	219	return (EBUSY);
	220	}
	221	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	222	cache_drop(&nch);
	223	vfs_unbusy(mp);
	224	vput(vp);
	225	return (EBUSY);
	226	}
	227	vp->v_flag \|= VMOUNT;
	228	mp->mnt_flag \|=
	229	uap->flags & (MNT_RELOAD \| MNT_FORCE \| MNT_UPDATE);
	230	vn_unlock(vp);
	231	goto update;
	232	}
	233	/*
	234	* If the user is not root, ensure that they own the directory
	235	* onto which we are attempting to mount.
	236	*/
	237	if ((error = VOP_GETATTR(vp, &va)) \|\|
	238	(va.va_uid != cred->cr_uid && (error = suser(td)))) {
	239	cache_drop(&nch);
	240	vput(vp);
	241	return (error);
	242	}
	243	if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
	244	cache_drop(&nch);
	245	vput(vp);
	246	return (error);
	247	}
	248	if (vp->v_type != VDIR) {
	249	cache_drop(&nch);
	250	vput(vp);
	251	return (ENOTDIR);
	252	}
	253	if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
	254	cache_drop(&nch);
	255	vput(vp);
	256	return (EPERM);
	257	}
	258	if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
	259	cache_drop(&nch);
	260	vput(vp);
	261	return (error);
	262	}
	263	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	264	if (!strcmp(vfsp->vfc_name, fstypename))
	265	break;
	266	}
	267	if (vfsp == NULL) {
	268	linker_file_t lf;
	269
	270	/* Only load modules for root (very important!) */
	271	if ((error = suser(td)) != 0) {
	272	cache_drop(&nch);
	273	vput(vp);
	274	return error;
	275	}
	276	error = linker_load_file(fstypename, &lf);
	277	if (error \|\| lf == NULL) {
	278	cache_drop(&nch);
	279	vput(vp);
	280	if (lf == NULL)
	281	error = ENODEV;
	282	return error;
	283	}
	284	lf->userrefs++;
	285	/* lookup again, see if the VFS was loaded */
	286	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	287	if (!strcmp(vfsp->vfc_name, fstypename))
	288	break;
	289	}
	290	if (vfsp == NULL) {
	291	lf->userrefs--;
	292	linker_file_unload(lf);
	293	cache_drop(&nch);
	294	vput(vp);
	295	return (ENODEV);
	296	}
	297	}
	298	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	299	cache_drop(&nch);
	300	vput(vp);
	301	return (EBUSY);
	302	}
	303	vp->v_flag \|= VMOUNT;
	304
	305	/*
	306	* Allocate and initialize the filesystem.
	307	*/
	308	mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO\|M_WAITOK);
	309	TAILQ_INIT(&mp->mnt_nvnodelist);
	310	TAILQ_INIT(&mp->mnt_reservedvnlist);
	311	TAILQ_INIT(&mp->mnt_jlist);
	312	mp->mnt_nvnodelistsize = 0;
	313	lockinit(&mp->mnt_lock, "vfslock", 0, 0);
	314	vfs_busy(mp, LK_NOWAIT);
	315	mp->mnt_op = vfsp->vfc_vfsops;
	316	mp->mnt_vfc = vfsp;
	317	vfsp->vfc_refcount++;
	318	mp->mnt_stat.f_type = vfsp->vfc_typenum;
	319	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	320	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
	321	mp->mnt_stat.f_owner = cred->cr_uid;
	322	mp->mnt_iosize_max = DFLTPHYS;
	323	vn_unlock(vp);
	324	update:
	325	/*
	326	* Set the mount level flags.
	327	*/
	328	if (uap->flags & MNT_RDONLY)
	329	mp->mnt_flag \|= MNT_RDONLY;
	330	else if (mp->mnt_flag & MNT_RDONLY)
	331	mp->mnt_kern_flag \|= MNTK_WANTRDWR;
	332	mp->mnt_flag &=~ (MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	333	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_NOATIME \|
	334	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	335	MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	336	mp->mnt_flag \|= uap->flags & (MNT_NOSUID \| MNT_NOEXEC \|
	337	MNT_NODEV \| MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_FORCE \|
	338	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	339	MNT_NOATIME \| MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	340	/*
	341	* Mount the filesystem.
	342	* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
	343	* get.
	344	*/
	345	error = VFS_MOUNT(mp, uap->path, uap->data, cred);
	346	if (mp->mnt_flag & MNT_UPDATE) {
	347	if (mp->mnt_kern_flag & MNTK_WANTRDWR)
	348	mp->mnt_flag &= ~MNT_RDONLY;
	349	mp->mnt_flag &=~ (MNT_UPDATE \| MNT_RELOAD \| MNT_FORCE);
	350	mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
	351	if (error) {
	352	mp->mnt_flag = flag;
	353	mp->mnt_kern_flag = flag2;
	354	}
	355	vfs_unbusy(mp);
	356	vp->v_flag &= ~VMOUNT;
	357	vrele(vp);
	358	cache_drop(&nch);
	359	return (error);
	360	}
	361	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	362	/*
	363	* Put the new filesystem on the mount list after root. The mount
	364	* point gets its own mnt_ncmountpt (unless the VFS already set one
	365	* up) which represents the root of the mount. The lookup code
	366	* detects the mount point going forward and checks the root of
	367	* the mount going backwards.
	368	*
	369	* It is not necessary to invalidate or purge the vnode underneath
	370	* because elements under the mount will be given their own glue
	371	* namecache record.
	372	*/
	373	if (!error) {
	374	if (mp->mnt_ncmountpt.ncp == NULL) {
	375	/*
	376	* allocate, then unlock, but leave the ref intact
	377	*/
	378	cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
	379	cache_unlock(&mp->mnt_ncmountpt);
	380	}
	381	mp->mnt_ncmounton = nch; /* inherits ref */
	382	nch.ncp->nc_flag \|= NCF_ISMOUNTPT;
	383
	384	/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
	385	vp->v_flag &= ~VMOUNT;
	386	mountlist_insert(mp, MNTINS_LAST);
	387	checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
	388	vn_unlock(vp);
	389	error = vfs_allocate_syncvnode(mp);
	390	vfs_unbusy(mp);
	391	error = VFS_START(mp, 0);
	392	vrele(vp);
	393	} else {
	394	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	395	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	396	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	397	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	398	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	399	vp->v_flag &= ~VMOUNT;
	400	mp->mnt_vfc->vfc_refcount--;
	401	vfs_unbusy(mp);
	402	kfree(mp, M_MOUNT);
	403	cache_drop(&nch);
	404	vput(vp);
	405	}
	406	return (error);
	407	}
	408
	409	/*
	410	* Scan all active processes to see if any of them have a current
	411	* or root directory onto which the new filesystem has just been
	412	* mounted. If so, replace them with the new mount point.
	413	*
	414	* The passed ncp is ref'd and locked (from the mount code) and
	415	* must be associated with the vnode representing the root of the
	416	* mount point.
	417	*/
	418	struct checkdirs_info {
	419	struct nchandle old_nch;
	420	struct nchandle new_nch;
	421	struct vnode *old_vp;
	422	struct vnode *new_vp;
	423	};
	424
	425	static int checkdirs_callback(struct proc p, void data);
	426
	427	static void
	428	checkdirs(struct nchandle old_nch, struct nchandle new_nch)
	429	{
	430	struct checkdirs_info info;
	431	struct vnode *olddp;
	432	struct vnode *newdp;
	433	struct mount *mp;
	434
	435	/*
	436	* If the old mount point's vnode has a usecount of 1, it is not
	437	* being held as a descriptor anywhere.
	438	*/
	439	olddp = old_nch->ncp->nc_vp;
	440	if (olddp == NULL \|\| olddp->v_sysref.refcnt == 1)
	441	return;
	442
	443	/*
	444	* Force the root vnode of the new mount point to be resolved
	445	* so we can update any matching processes.
	446	*/
	447	mp = new_nch->mount;
	448	if (VFS_ROOT(mp, &newdp))
	449	panic("mount: lost mount");
	450	cache_setunresolved(new_nch);
	451	cache_setvp(new_nch, newdp);
	452
	453	/*
	454	* Special handling of the root node
	455	*/
	456	if (rootvnode == olddp) {
	457	vref(newdp);
	458	vfs_cache_setroot(newdp, cache_hold(new_nch));
	459	}
	460
	461	/*
	462	* Pass newdp separately so the callback does not have to access
	463	* it via new_nch->ncp->nc_vp.
	464	*/
	465	info.old_nch = *old_nch;
	466	info.new_nch = *new_nch;
	467	info.new_vp = newdp;
	468	allproc_scan(checkdirs_callback, &info);
	469	vput(newdp);
	470	}
	471
	472	/*
	473	* NOTE: callback is not MP safe because the scanned process's filedesc
	474	* structure can be ripped out from under us, amoung other things.
	475	*/
	476	static int
	477	checkdirs_callback(struct proc p, void data)
	478	{
	479	struct checkdirs_info *info = data;
	480	struct filedesc *fdp;
	481	struct nchandle ncdrop1;
	482	struct nchandle ncdrop2;
	483	struct vnode *vprele1;
	484	struct vnode *vprele2;
	485
	486	if ((fdp = p->p_fd) != NULL) {
	487	cache_zero(&ncdrop1);
	488	cache_zero(&ncdrop2);
	489	vprele1 = NULL;
	490	vprele2 = NULL;
	491
	492	/*
	493	* MPUNSAFE - XXX fdp can be pulled out from under a
	494	* foreign process.
	495	*
	496	* A shared filedesc is ok, we don't have to copy it
	497	* because we are making this change globally.
	498	*/
	499	spin_lock_wr(&fdp->fd_spin);
	500	if (fdp->fd_ncdir.mount == info->old_nch.mount &&
	501	fdp->fd_ncdir.ncp == info->old_nch.ncp) {
	502	vprele1 = fdp->fd_cdir;
	503	vref(info->new_vp);
	504	fdp->fd_cdir = info->new_vp;
	505	ncdrop1 = fdp->fd_ncdir;
	506	cache_copy(&info->new_nch, &fdp->fd_ncdir);
	507	}
	508	if (fdp->fd_nrdir.mount == info->old_nch.mount &&
	509	fdp->fd_nrdir.ncp == info->old_nch.ncp) {
	510	vprele2 = fdp->fd_rdir;
	511	vref(info->new_vp);
	512	fdp->fd_rdir = info->new_vp;
	513	ncdrop2 = fdp->fd_nrdir;
	514	cache_copy(&info->new_nch, &fdp->fd_nrdir);
	515	}
	516	spin_unlock_wr(&fdp->fd_spin);
	517	if (ncdrop1.ncp)
	518	cache_drop(&ncdrop1);
	519	if (ncdrop2.ncp)
	520	cache_drop(&ncdrop2);
	521	if (vprele1)
	522	vrele(vprele1);
	523	if (vprele2)
	524	vrele(vprele2);
	525	}
	526	return(0);
	527	}
	528
	529	/*
	530	* Unmount a file system.
	531	*
	532	* Note: unmount takes a path to the vnode mounted on as argument,
	533	* not special file (as before).
	534	*/
	535	/*
	536	* umount_args(char *path, int flags)
	537	*/
	538	/* ARGSUSED */
	539	int
	540	sys_unmount(struct unmount_args *uap)
	541	{
	542	struct thread *td = curthread;
	543	struct proc *p = td->td_proc;
	544	struct mount *mp = NULL;
	545	int error;
	546	struct nlookupdata nd;
	547
	548	KKASSERT(p);
	549	if (p->p_ucred->cr_prison != NULL)
	550	return (EPERM);
	551	if (usermount == 0 && (error = suser(td)))
	552	return (error);
	553
	554	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	555	if (error == 0)
	556	error = nlookup(&nd);
	557	if (error)
	558	goto out;
	559
	560	mp = nd.nl_nch.mount;
	561
	562	/*
	563	* Only root, or the user that did the original mount is
	564	* permitted to unmount this filesystem.
	565	*/
	566	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
	567	(error = suser(td)))
	568	goto out;
	569
	570	/*
	571	* Don't allow unmounting the root file system.
	572	*/
	573	if (mp->mnt_flag & MNT_ROOTFS) {
	574	error = EINVAL;
	575	goto out;
	576	}
	577
	578	/*
	579	* Must be the root of the filesystem
	580	*/
	581	if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
	582	error = EINVAL;
	583	goto out;
	584	}
	585
	586	out:
	587	nlookup_done(&nd);
	588	if (error)
	589	return (error);
	590	return (dounmount(mp, uap->flags));
	591	}
	592
	593	/*
	594	* Do the actual file system unmount.
	595	*/
	596	static int
	597	dounmount_interlock(struct mount *mp)
	598	{
	599	if (mp->mnt_kern_flag & MNTK_UNMOUNT)
	600	return (EBUSY);
	601	mp->mnt_kern_flag \|= MNTK_UNMOUNT;
	602	return(0);
	603	}
	604
	605	int
	606	dounmount(struct mount *mp, int flags)
	607	{
	608	struct namecache *ncp;
	609	struct nchandle nch;
	610	struct vnode *vp;
	611	int error;
	612	int async_flag;
	613	int lflags;
	614	int freeok = 1;
	615
	616	/*
	617	* Exclusive access for unmounting purposes
	618	*/
	619	if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
	620	return (error);
	621
	622	/*
	623	* Allow filesystems to detect that a forced unmount is in progress.
	624	*/
	625	if (flags & MNT_FORCE)
	626	mp->mnt_kern_flag \|= MNTK_UNMOUNTF;
	627	lflags = LK_EXCLUSIVE \| ((flags & MNT_FORCE) ? 0 : LK_NOWAIT);
	628	error = lockmgr(&mp->mnt_lock, lflags);
	629	if (error) {
	630	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	631	if (mp->mnt_kern_flag & MNTK_MWAIT)
	632	wakeup(mp);
	633	return (error);
	634	}
	635
	636	if (mp->mnt_flag & MNT_EXPUBLIC)
	637	vfs_setpublicfs(NULL, NULL, NULL);
	638
	639	vfs_msync(mp, MNT_WAIT);
	640	async_flag = mp->mnt_flag & MNT_ASYNC;
	641	mp->mnt_flag &=~ MNT_ASYNC;
	642
	643	/*
	644	* If this filesystem isn't aliasing other filesystems,
	645	* try to invalidate any remaining namecache entries and
	646	* check the count afterwords.
	647	*/
	648	if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
	649	cache_lock(&mp->mnt_ncmountpt);
	650	cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY\|CINV_CHILDREN);
	651	cache_unlock(&mp->mnt_ncmountpt);
	652
	653	if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
	654	(ncp->nc_refs != 1 \|\| TAILQ_FIRST(&ncp->nc_list))) {
	655
	656	if ((flags & MNT_FORCE) == 0) {
	657	error = EBUSY;
	658	mount_warning(mp, "Cannot unmount: "
	659	"%d namecache "
	660	"references still "
	661	"present",
	662	ncp->nc_refs - 1);
	663	} else {
	664	mount_warning(mp, "Forced unmount: "
	665	"%d namecache "
	666	"references still "
	667	"present",
	668	ncp->nc_refs - 1);
	669	freeok = 0;
	670	}
	671	}
	672	}
	673
	674	/*
	675	* nchandle records ref the mount structure. Expect a count of 1
	676	* (our mount->mnt_ncmountpt).
	677	*/
	678	if (mp->mnt_refs != 1) {
	679	if ((flags & MNT_FORCE) == 0) {
	680	mount_warning(mp, "Cannot unmount: "
	681	"%d process references still "
	682	"present", mp->mnt_refs);
	683	error = EBUSY;
	684	} else {
	685	mount_warning(mp, "Forced unmount: "
	686	"%d process references still "
	687	"present", mp->mnt_refs);
	688	freeok = 0;
	689	}
	690	}
	691
	692	/*
	693	* Decomission our special mnt_syncer vnode. This also stops
	694	* the vnlru code. If we are unable to unmount we recommission
	695	* the vnode.
	696	*/
	697	if (error == 0) {
	698	if ((vp = mp->mnt_syncer) != NULL) {
	699	mp->mnt_syncer = NULL;
	700	vrele(vp);
	701	}
	702	if (((mp->mnt_flag & MNT_RDONLY) \|\|
	703	(error = VFS_SYNC(mp, MNT_WAIT)) == 0) \|\|
	704	(flags & MNT_FORCE)) {
	705	error = VFS_UNMOUNT(mp, flags);
	706	}
	707	}
	708	if (error) {
	709	if (mp->mnt_syncer == NULL)
	710	vfs_allocate_syncvnode(mp);
	711	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	712	mp->mnt_flag \|= async_flag;
	713	lockmgr(&mp->mnt_lock, LK_RELEASE);
	714	if (mp->mnt_kern_flag & MNTK_MWAIT)
	715	wakeup(mp);
	716	return (error);
	717	}
	718	/*
	719	* Clean up any journals still associated with the mount after
	720	* filesystem activity has ceased.
	721	*/
	722	journal_remove_all_journals(mp,
	723	((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
	724
	725	mountlist_remove(mp);
	726
	727	/*
	728	* Remove any installed vnode ops here so the individual VFSs don't
	729	* have to.
	730	*/
	731	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	732	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	733	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	734	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	735	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	736
	737	if (mp->mnt_ncmountpt.ncp != NULL) {
	738	nch = mp->mnt_ncmountpt;
	739	cache_zero(&mp->mnt_ncmountpt);
	740	cache_clrmountpt(&nch);
	741	cache_drop(&nch);
	742	}
	743	if (mp->mnt_ncmounton.ncp != NULL) {
	744	nch = mp->mnt_ncmounton;
	745	cache_zero(&mp->mnt_ncmounton);
	746	cache_clrmountpt(&nch);
	747	cache_drop(&nch);
	748	}
	749
	750	mp->mnt_vfc->vfc_refcount--;
	751	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
	752	panic("unmount: dangling vnode");
	753	lockmgr(&mp->mnt_lock, LK_RELEASE);
	754	if (mp->mnt_kern_flag & MNTK_MWAIT)
	755	wakeup(mp);
	756	if (freeok)
	757	kfree(mp, M_MOUNT);
	758	return (0);
	759	}
	760
	761	static
	762	void
	763	mount_warning(struct mount mp, const char ctl, ...)
	764	{
	765	char *ptr;
	766	char *buf;
	767	__va_list va;
	768
	769	__va_start(va, ctl);
	770	if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) {
	771	kprintf("unmount(%s): ", ptr);
	772	kvprintf(ctl, va);
	773	kprintf("\n");
	774	kfree(buf, M_TEMP);
	775	} else {
	776	kprintf("unmount(%p", mp);
	777	if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
	778	kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
	779	kprintf("): ");
	780	kvprintf(ctl, va);
	781	kprintf("\n");
	782	}
	783	__va_end(va);
	784	}
	785
	786	/*
	787	* Shim cache_fullpath() to handle the case where a process is chrooted into
	788	* a subdirectory of a mount. In this case if the root mount matches the
	789	* process root directory's mount we have to specify the process's root
	790	* directory instead of the mount point, because the mount point might
	791	* be above the root directory.
	792	*/
	793	static
	794	int
	795	mount_path(struct proc p, struct mount mp, char rb, char fb)
	796	{
	797	struct nchandle *nch;
	798
	799	if (p && p->p_fd->fd_nrdir.mount == mp)
	800	nch = &p->p_fd->fd_nrdir;
	801	else
	802	nch = &mp->mnt_ncmountpt;
	803	return(cache_fullpath(p, nch, rb, fb));
	804	}
	805
	806	/*
	807	* Sync each mounted filesystem.
	808	*/
	809
	810	#ifdef DEBUG
	811	static int syncprt = 0;
	812	SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
	813	#endif /* DEBUG */
	814
	815	static int sync_callback(struct mount mp, void data);
	816
	817	/* ARGSUSED */
	818	int
	819	sys_sync(struct sync_args *uap)
	820	{
	821	mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
	822	#ifdef DEBUG
	823	/*
	824	* print out buffer pool stat information on each sync() call.
	825	*/
	826	if (syncprt)
	827	vfs_bufstats();
	828	#endif /* DEBUG */
	829	return (0);
	830	}
	831
	832	static
	833	int
	834	sync_callback(struct mount mp, void data __unused)
	835	{
	836	int asyncflag;
	837
	838	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	839	asyncflag = mp->mnt_flag & MNT_ASYNC;
	840	mp->mnt_flag &= ~MNT_ASYNC;
	841	vfs_msync(mp, MNT_NOWAIT);
	842	VFS_SYNC(mp, MNT_NOWAIT);
	843	mp->mnt_flag \|= asyncflag;
	844	}
	845	return(0);
	846	}
	847
	848	/* XXX PRISON: could be per prison flag */
	849	static int prison_quotas;
	850	#if 0
	851	SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
	852	#endif
	853
	854	/*
	855	* quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
	856	*
	857	* Change filesystem quotas.
	858	*/
	859	/* ARGSUSED */
	860	int
	861	sys_quotactl(struct quotactl_args *uap)
	862	{
	863	struct nlookupdata nd;
	864	struct thread *td;
	865	struct proc *p;
	866	struct mount *mp;
	867	int error;
	868
	869	td = curthread;
	870	p = td->td_proc;
	871	if (p->p_ucred->cr_prison && !prison_quotas)
	872	return (EPERM);
	873
	874	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	875	if (error == 0)
	876	error = nlookup(&nd);
	877	if (error == 0) {
	878	mp = nd.nl_nch.mount;
	879	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
	880	uap->arg, nd.nl_cred);
	881	}
	882	nlookup_done(&nd);
	883	return (error);
	884	}
	885
	886	/*
	887	* mountctl(char path, int op, int fd, const void ctl, int ctllen,
	888	* void *buf, int buflen)
	889	*
	890	* This function operates on a mount point and executes the specified
	891	* operation using the specified control data, and possibly returns data.
	892	*
	893	* The actual number of bytes stored in the result buffer is returned, 0
	894	* if none, otherwise an error is returned.
	895	*/
	896	/* ARGSUSED */
	897	int
	898	sys_mountctl(struct mountctl_args *uap)
	899	{
	900	struct thread *td = curthread;
	901	struct proc *p = td->td_proc;
	902	struct file *fp;
	903	void *ctl = NULL;
	904	void *buf = NULL;
	905	char *path = NULL;
	906	int error;
	907
	908	/*
	909	* Sanity and permissions checks. We must be root.
	910	*/
	911	KKASSERT(p);
	912	if (p->p_ucred->cr_prison != NULL)
	913	return (EPERM);
	914	if ((error = suser(td)) != 0)
	915	return (error);
	916
	917	/*
	918	* Argument length checks
	919	*/
	920	if (uap->ctllen < 0 \|\| uap->ctllen > 1024)
	921	return (EINVAL);
	922	if (uap->buflen < 0 \|\| uap->buflen > 16 * 1024)
	923	return (EINVAL);
	924	if (uap->path == NULL)
	925	return (EINVAL);
	926
	927	/*
	928	* Allocate the necessary buffers and copyin data
	929	*/
	930	path = objcache_get(namei_oc, M_WAITOK);
	931	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	932	if (error)
	933	goto done;
	934
	935	if (uap->ctllen) {
	936	ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	937	error = copyin(uap->ctl, ctl, uap->ctllen);
	938	if (error)
	939	goto done;
	940	}
	941	if (uap->buflen)
	942	buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	943
	944	/*
	945	* Validate the descriptor
	946	*/
	947	if (uap->fd >= 0) {
	948	fp = holdfp(p->p_fd, uap->fd, -1);
	949	if (fp == NULL) {
	950	error = EBADF;
	951	goto done;
	952	}
	953	} else {
	954	fp = NULL;
	955	}
	956
	957	/*
	958	* Execute the internal kernel function and clean up.
	959	*/
	960	error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
	961	if (fp)
	962	fdrop(fp);
	963	if (error == 0 && uap->sysmsg_result > 0)
	964	error = copyout(buf, uap->buf, uap->sysmsg_result);
	965	done:
	966	if (path)
	967	objcache_put(namei_oc, path);
	968	if (ctl)
	969	kfree(ctl, M_TEMP);
	970	if (buf)
	971	kfree(buf, M_TEMP);
	972	return (error);
	973	}
	974
	975	/*
	976	* Execute a mount control operation by resolving the path to a mount point
	977	* and calling vop_mountctl().
	978	*/
	979	int
	980	kern_mountctl(const char path, int op, struct file fp,
	981	const void *ctl, int ctllen,
	982	void buf, int buflen, int res)
	983	{
	984	struct vnode *vp;
	985	struct mount *mp;
	986	struct nlookupdata nd;
	987	int error;
	988
	989	*res = 0;
	990	vp = NULL;
	991	error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
	992	if (error == 0)
	993	error = nlookup(&nd);
	994	if (error == 0)
	995	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	996	nlookup_done(&nd);
	997	if (error)
	998	return (error);
	999
	1000	mp = vp->v_mount;
	1001
	1002	/*
	1003	* Must be the root of the filesystem
	1004	*/
	1005	if ((vp->v_flag & VROOT) == 0) {
	1006	vput(vp);
	1007	return (EINVAL);
	1008	}
	1009	error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen,
	1010	buf, buflen, res);
	1011	vput(vp);
	1012	return (error);
	1013	}
	1014
	1015	int
	1016	kern_statfs(struct nlookupdata nd, struct statfs buf)
	1017	{
	1018	struct thread *td = curthread;
	1019	struct proc *p = td->td_proc;
	1020	struct mount *mp;
	1021	struct statfs *sp;
	1022	char fullpath, freepath;
	1023	int error;
	1024
	1025	if ((error = nlookup(nd)) != 0)
	1026	return (error);
	1027	mp = nd->nl_nch.mount;
	1028	sp = &mp->mnt_stat;
	1029	if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
	1030	return (error);
	1031
	1032	error = mount_path(p, mp, &fullpath, &freepath);
	1033	if (error)
	1034	return(error);
	1035	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1036	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1037	kfree(freepath, M_TEMP);
	1038
	1039	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1040	bcopy(sp, buf, sizeof(*buf));
	1041	/* Only root should have access to the fsid's. */
	1042	if (suser(td))
	1043	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1044	return (0);
	1045	}
	1046
	1047	/*
	1048	* statfs_args(char path, struct statfs buf)
	1049	*
	1050	* Get filesystem statistics.
	1051	*/
	1052	int
	1053	sys_statfs(struct statfs_args *uap)
	1054	{
	1055	struct nlookupdata nd;
	1056	struct statfs buf;
	1057	int error;
	1058
	1059	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1060	if (error == 0)
	1061	error = kern_statfs(&nd, &buf);
	1062	nlookup_done(&nd);
	1063	if (error == 0)
	1064	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1065	return (error);
	1066	}
	1067
	1068	int
	1069	kern_fstatfs(int fd, struct statfs *buf)
	1070	{
	1071	struct thread *td = curthread;
	1072	struct proc *p = td->td_proc;
	1073	struct file *fp;
	1074	struct mount *mp;
	1075	struct statfs *sp;
	1076	char fullpath, freepath;
	1077	int error;
	1078
	1079	KKASSERT(p);
	1080	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1081	return (error);
	1082	mp = ((struct vnode *)fp->f_data)->v_mount;
	1083	if (mp == NULL) {
	1084	error = EBADF;
	1085	goto done;
	1086	}
	1087	if (fp->f_cred == NULL) {
	1088	error = EINVAL;
	1089	goto done;
	1090	}
	1091	sp = &mp->mnt_stat;
	1092	if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
	1093	goto done;
	1094
	1095	if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
	1096	goto done;
	1097	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1098	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1099	kfree(freepath, M_TEMP);
	1100
	1101	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1102	bcopy(sp, buf, sizeof(*buf));
	1103
	1104	/* Only root should have access to the fsid's. */
	1105	if (suser(td))
	1106	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1107	error = 0;
	1108	done:
	1109	fdrop(fp);
	1110	return (error);
	1111	}
	1112
	1113	/*
	1114	* fstatfs_args(int fd, struct statfs *buf)
	1115	*
	1116	* Get filesystem statistics.
	1117	*/
	1118	int
	1119	sys_fstatfs(struct fstatfs_args *uap)
	1120	{
	1121	struct statfs buf;
	1122	int error;
	1123
	1124	error = kern_fstatfs(uap->fd, &buf);
	1125
	1126	if (error == 0)
	1127	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1128	return (error);
	1129	}
	1130
	1131	int
	1132	kern_statvfs(struct nlookupdata nd, struct statvfs buf)
	1133	{
	1134	struct mount *mp;
	1135	struct statvfs *sp;
	1136	int error;
	1137
	1138	if ((error = nlookup(nd)) != 0)
	1139	return (error);
	1140	mp = nd->nl_nch.mount;
	1141	sp = &mp->mnt_vstat;
	1142	if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
	1143	return (error);
	1144
	1145	sp->f_flag = 0;
	1146	if (mp->mnt_flag & MNT_RDONLY)
	1147	sp->f_flag \|= ST_RDONLY;
	1148	if (mp->mnt_flag & MNT_NOSUID)
	1149	sp->f_flag \|= ST_NOSUID;
	1150	bcopy(sp, buf, sizeof(*buf));
	1151	return (0);
	1152	}
	1153
	1154	/*
	1155	* statfs_args(char path, struct statfs buf)
	1156	*
	1157	* Get filesystem statistics.
	1158	*/
	1159	int
	1160	sys_statvfs(struct statvfs_args *uap)
	1161	{
	1162	struct nlookupdata nd;
	1163	struct statvfs buf;
	1164	int error;
	1165
	1166	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1167	if (error == 0)
	1168	error = kern_statvfs(&nd, &buf);
	1169	nlookup_done(&nd);
	1170	if (error == 0)
	1171	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1172	return (error);
	1173	}
	1174
	1175	int
	1176	kern_fstatvfs(int fd, struct statvfs *buf)
	1177	{
	1178	struct thread *td = curthread;
	1179	struct proc *p = td->td_proc;
	1180	struct file *fp;
	1181	struct mount *mp;
	1182	struct statvfs *sp;
	1183	int error;
	1184
	1185	KKASSERT(p);
	1186	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1187	return (error);
	1188	mp = ((struct vnode *)fp->f_data)->v_mount;
	1189	if (mp == NULL) {
	1190	error = EBADF;
	1191	goto done;
	1192	}
	1193	if (fp->f_cred == NULL) {
	1194	error = EINVAL;
	1195	goto done;
	1196	}
	1197	sp = &mp->mnt_vstat;
	1198	if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
	1199	goto done;
	1200
	1201	sp->f_flag = 0;
	1202	if (mp->mnt_flag & MNT_RDONLY)
	1203	sp->f_flag \|= ST_RDONLY;
	1204	if (mp->mnt_flag & MNT_NOSUID)
	1205	sp->f_flag \|= ST_NOSUID;
	1206
	1207	bcopy(sp, buf, sizeof(*buf));
	1208	error = 0;
	1209	done:
	1210	fdrop(fp);
	1211	return (error);
	1212	}
	1213
	1214	/*
	1215	* fstatfs_args(int fd, struct statfs *buf)
	1216	*
	1217	* Get filesystem statistics.
	1218	*/
	1219	int
	1220	sys_fstatvfs(struct fstatvfs_args *uap)
	1221	{
	1222	struct statvfs buf;
	1223	int error;
	1224
	1225	error = kern_fstatvfs(uap->fd, &buf);
	1226
	1227	if (error == 0)
	1228	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1229	return (error);
	1230	}
	1231
	1232	/*
	1233	* getfsstat_args(struct statfs *buf, long bufsize, int flags)
	1234	*
	1235	* Get statistics on all filesystems.
	1236	*/
	1237
	1238	struct getfsstat_info {
	1239	struct statfs *sfsp;
	1240	long count;
	1241	long maxcount;
	1242	int error;
	1243	int flags;
	1244	struct proc *p;
	1245	};
	1246
	1247	static int getfsstat_callback(struct mount , void );
	1248
	1249	/* ARGSUSED */
	1250	int
	1251	sys_getfsstat(struct getfsstat_args *uap)
	1252	{
	1253	struct thread *td = curthread;
	1254	struct proc *p = td->td_proc;
	1255	struct getfsstat_info info;
	1256
	1257	bzero(&info, sizeof(info));
	1258
	1259	info.maxcount = uap->bufsize / sizeof(struct statfs);
	1260	info.sfsp = uap->buf;
	1261	info.count = 0;
	1262	info.flags = uap->flags;
	1263	info.p = p;
	1264
	1265	mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
	1266	if (info.sfsp && info.count > info.maxcount)
	1267	uap->sysmsg_result = info.maxcount;
	1268	else
	1269	uap->sysmsg_result = info.count;
	1270	return (info.error);
	1271	}
	1272
	1273	static int
	1274	getfsstat_callback(struct mount mp, void data)
	1275	{
	1276	struct getfsstat_info *info = data;
	1277	struct statfs *sp;
	1278	char *freepath;
	1279	char *fullpath;
	1280	int error;
	1281
	1282	if (info->sfsp && info->count < info->maxcount) {
	1283	if (info->p && !chroot_visible_mnt(mp, info->p))
	1284	return(0);
	1285	sp = &mp->mnt_stat;
	1286
	1287	/*
	1288	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1289	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1290	* overrides MNT_WAIT.
	1291	*/
	1292	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1293	(info->flags & MNT_WAIT)) &&
	1294	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1295	return(0);
	1296	}
	1297	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1298
	1299	error = mount_path(info->p, mp, &fullpath, &freepath);
	1300	if (error) {
	1301	info->error = error;
	1302	return(-1);
	1303	}
	1304	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1305	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1306	kfree(freepath, M_TEMP);
	1307
	1308	error = copyout(sp, info->sfsp, sizeof(*sp));
	1309	if (error) {
	1310	info->error = error;
	1311	return (-1);
	1312	}
	1313	++info->sfsp;
	1314	}
	1315	info->count++;
	1316	return(0);
	1317	}
	1318
	1319	/*
	1320	* fchdir_args(int fd)
	1321	*
	1322	* Change current working directory to a given file descriptor.
	1323	*/
	1324	/* ARGSUSED */
	1325	int
	1326	sys_fchdir(struct fchdir_args *uap)
	1327	{
	1328	struct thread *td = curthread;
	1329	struct proc *p = td->td_proc;
	1330	struct filedesc *fdp = p->p_fd;
	1331	struct vnode vp, ovp;
	1332	struct mount *mp;
	1333	struct file *fp;
	1334	struct nchandle nch, onch, tnch;
	1335	int error;
	1336
	1337	if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
	1338	return (error);
	1339	vp = (struct vnode *)fp->f_data;
	1340	vref(vp);
	1341	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	1342	if (vp->v_type != VDIR \|\| fp->f_nchandle.ncp == NULL)
	1343	error = ENOTDIR;
	1344	else
	1345	error = VOP_ACCESS(vp, VEXEC, p->p_ucred);
	1346	if (error) {
	1347	vput(vp);
	1348	fdrop(fp);
	1349	return (error);
	1350	}
	1351	cache_copy(&fp->f_nchandle, &nch);
	1352
	1353	/*
	1354	* If the ncp has become a mount point, traverse through
	1355	* the mount point.
	1356	*/
	1357
	1358	while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
	1359	(mp = cache_findmount(&nch)) != NULL
	1360	) {
	1361	error = nlookup_mp(mp, &tnch);
	1362	if (error == 0) {
	1363	cache_unlock(&tnch); /* leave ref intact */
	1364	vput(vp);
	1365	vp = tnch.ncp->nc_vp;
	1366	error = vget(vp, LK_SHARED);
	1367	KKASSERT(error == 0);
	1368	cache_drop(&nch);
	1369	nch = tnch;
	1370	}
	1371	}
	1372	if (error == 0) {
	1373	ovp = fdp->fd_cdir;
	1374	onch = fdp->fd_ncdir;
	1375	vn_unlock(vp); /* leave ref intact */
	1376	fdp->fd_cdir = vp;
	1377	fdp->fd_ncdir = nch;
	1378	cache_drop(&onch);
	1379	vrele(ovp);
	1380	} else {
	1381	cache_drop(&nch);
	1382	vput(vp);
	1383	}
	1384	fdrop(fp);
	1385	return (error);
	1386	}
	1387
	1388	int
	1389	kern_chdir(struct nlookupdata *nd)
	1390	{
	1391	struct thread *td = curthread;
	1392	struct proc *p = td->td_proc;
	1393	struct filedesc *fdp = p->p_fd;
	1394	struct vnode vp, ovp;
	1395	struct nchandle onch;
	1396	int error;
	1397
	1398	if ((error = nlookup(nd)) != 0)
	1399	return (error);
	1400	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	1401	return (ENOENT);
	1402	if ((error = vget(vp, LK_SHARED)) != 0)
	1403	return (error);
	1404
	1405	error = checkvp_chdir(vp, td);
	1406	vn_unlock(vp);
	1407	if (error == 0) {
	1408	ovp = fdp->fd_cdir;
	1409	onch = fdp->fd_ncdir;
	1410	cache_unlock(&nd->nl_nch); /* leave reference intact */
	1411	fdp->fd_ncdir = nd->nl_nch;
	1412	fdp->fd_cdir = vp;
	1413	cache_drop(&onch);
	1414	vrele(ovp);
	1415	cache_zero(&nd->nl_nch);
	1416	} else {
	1417	vrele(vp);
	1418	}
	1419	return (error);
	1420	}
	1421
	1422	/*
	1423	* chdir_args(char *path)
	1424	*
	1425	* Change current working directory (``.'').
	1426	*/
	1427	int
	1428	sys_chdir(struct chdir_args *uap)
	1429	{
	1430	struct nlookupdata nd;
	1431	int error;
	1432
	1433	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1434	if (error == 0)
	1435	error = kern_chdir(&nd);
	1436	nlookup_done(&nd);
	1437	return (error);
	1438	}
	1439
	1440	/*
	1441	* Helper function for raised chroot(2) security function: Refuse if
	1442	* any filedescriptors are open directories.
	1443	*/
	1444	static int
	1445	chroot_refuse_vdir_fds(struct filedesc *fdp)
	1446	{
	1447	struct vnode *vp;
	1448	struct file *fp;
	1449	int error;
	1450	int fd;
	1451
	1452	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
	1453	if ((error = holdvnode(fdp, fd, &fp)) != 0)
	1454	continue;
	1455	vp = (struct vnode *)fp->f_data;
	1456	if (vp->v_type != VDIR) {
	1457	fdrop(fp);
	1458	continue;
	1459	}
	1460	fdrop(fp);
	1461	return(EPERM);
	1462	}
	1463	return (0);
	1464	}
	1465
	1466	/*
	1467	* This sysctl determines if we will allow a process to chroot(2) if it
	1468	* has a directory open:
	1469	* 0: disallowed for all processes.
	1470	* 1: allowed for processes that were not already chroot(2)'ed.
	1471	* 2: allowed for all processes.
	1472	*/
	1473
	1474	static int chroot_allow_open_directories = 1;
	1475
	1476	SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
	1477	&chroot_allow_open_directories, 0, "");
	1478
	1479	/*
	1480	* chroot to the specified namecache entry. We obtain the vp from the
	1481	* namecache data. The passed ncp must be locked and referenced and will
	1482	* remain locked and referenced on return.
	1483	*/
	1484	int
	1485	kern_chroot(struct nchandle *nch)
	1486	{
	1487	struct thread *td = curthread;
	1488	struct proc *p = td->td_proc;
	1489	struct filedesc *fdp = p->p_fd;
	1490	struct vnode *vp;
	1491	int error;
	1492
	1493	/*
	1494	* Only root can chroot
	1495	*/
	1496	if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)
	1497	return (error);
	1498
	1499	/*
	1500	* Disallow open directory descriptors (fchdir() breakouts).
	1501	*/
	1502	if (chroot_allow_open_directories == 0 \|\|
	1503	(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
	1504	if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
	1505	return (error);
	1506	}
	1507	if ((vp = nch->ncp->nc_vp) == NULL)
	1508	return (ENOENT);
	1509
	1510	if ((error = vget(vp, LK_SHARED)) != 0)
	1511	return (error);
	1512
	1513	/*
	1514	* Check the validity of vp as a directory to change to and
	1515	* associate it with rdir/jdir.
	1516	*/
	1517	error = checkvp_chdir(vp, td);
	1518	vn_unlock(vp); /* leave reference intact */
	1519	if (error == 0) {
	1520	vrele(fdp->fd_rdir);
	1521	fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
	1522	cache_drop(&fdp->fd_nrdir);
	1523	cache_copy(nch, &fdp->fd_nrdir);
	1524	if (fdp->fd_jdir == NULL) {
	1525	fdp->fd_jdir = vp;
	1526	vref(fdp->fd_jdir);
	1527	cache_copy(nch, &fdp->fd_njdir);
	1528	}
	1529	} else {
	1530	vrele(vp);
	1531	}
	1532	return (error);
	1533	}
	1534
	1535	/*
	1536	* chroot_args(char *path)
	1537	*
	1538	* Change notion of root (``/'') directory.
	1539	*/
	1540	/* ARGSUSED */
	1541	int
	1542	sys_chroot(struct chroot_args *uap)
	1543	{
	1544	struct thread *td = curthread;
	1545	struct nlookupdata nd;
	1546	int error;
	1547
	1548	KKASSERT(td->td_proc);
	1549	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1550	if (error) {
	1551	nlookup_done(&nd);
	1552	return(error);
	1553	}
	1554	error = nlookup(&nd);
	1555	if (error == 0)
	1556	error = kern_chroot(&nd.nl_nch);
	1557	nlookup_done(&nd);
	1558	return(error);
	1559	}
	1560
	1561	/*
	1562	* Common routine for chroot and chdir. Given a locked, referenced vnode,
	1563	* determine whether it is legal to chdir to the vnode. The vnode's state
	1564	* is not changed by this call.
	1565	*/
	1566	int
	1567	checkvp_chdir(struct vnode vp, struct thread td)
	1568	{
	1569	int error;
	1570
	1571	if (vp->v_type != VDIR)
	1572	error = ENOTDIR;
	1573	else
	1574	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred);
	1575	return (error);
	1576	}
	1577
	1578	int
	1579	kern_open(struct nlookupdata nd, int oflags, int mode, int res)
	1580	{
	1581	struct thread *td = curthread;
	1582	struct proc *p = td->td_proc;
	1583	struct lwp *lp = td->td_lwp;
	1584	struct filedesc *fdp = p->p_fd;
	1585	int cmode, flags;
	1586	struct file *nfp;
	1587	struct file *fp;
	1588	struct vnode *vp;
	1589	int type, indx, error;
	1590	struct flock lf;
	1591
	1592	if ((oflags & O_ACCMODE) == O_ACCMODE)
	1593	return (EINVAL);
	1594	flags = FFLAGS(oflags);
	1595	error = falloc(p, &nfp, NULL);
	1596	if (error)
	1597	return (error);
	1598	fp = nfp;
	1599	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
	1600
	1601	/*
	1602	* XXX p_dupfd is a real mess. It allows a device to return a
	1603	* file descriptor to be duplicated rather then doing the open
	1604	* itself.
	1605	*/
	1606	lp->lwp_dupfd = -1;
	1607
	1608	/*
	1609	* Call vn_open() to do the lookup and assign the vnode to the
	1610	* file pointer. vn_open() does not change the ref count on fp
	1611	* and the vnode, on success, will be inherited by the file pointer
	1612	* and unlocked.
	1613	*/
	1614	nd->nl_flags \|= NLC_LOCKVP;
	1615	error = vn_open(nd, fp, flags, cmode);
	1616	nlookup_done(nd);
	1617	if (error) {
	1618	/*
	1619	* handle special fdopen() case. bleh. dupfdopen() is
	1620	* responsible for dropping the old contents of ofiles[indx]
	1621	* if it succeeds.
	1622	*
	1623	* Note that fsetfd() will add a ref to fp which represents
	1624	* the fd_files[] assignment. We must still drop our
	1625	* reference.
	1626	*/
	1627	if ((error == ENODEV \|\| error == ENXIO) && lp->lwp_dupfd >= 0) {
	1628	if (fdalloc(p, 0, &indx) == 0) {
	1629	error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error);
	1630	if (error == 0) {
	1631	*res = indx;
	1632	fdrop(fp); /* our ref */
	1633	return (0);
	1634	}
	1635	fsetfd(p, NULL, indx);
	1636	}
	1637	}
	1638	fdrop(fp); /* our ref */
	1639	if (error == ERESTART)
	1640	error = EINTR;
	1641	return (error);
	1642	}
	1643
	1644	/*
	1645	* ref the vnode for ourselves so it can't be ripped out from under
	1646	* is. XXX need an ND flag to request that the vnode be returned
	1647	* anyway.
	1648	*
	1649	* Reserve a file descriptor but do not assign it until the open
	1650	* succeeds.
	1651	*/
	1652	vp = (struct vnode *)fp->f_data;
	1653	vref(vp);
	1654	if ((error = fdalloc(p, 0, &indx)) != 0) {
	1655	fdrop(fp);
	1656	vrele(vp);
	1657	return (error);
	1658	}
	1659
	1660	/*
	1661	* If no error occurs the vp will have been assigned to the file
	1662	* pointer.
	1663	*/
	1664	lp->lwp_dupfd = 0;
	1665
	1666	if (flags & (O_EXLOCK \| O_SHLOCK)) {
	1667	lf.l_whence = SEEK_SET;
	1668	lf.l_start = 0;
	1669	lf.l_len = 0;
	1670	if (flags & O_EXLOCK)
	1671	lf.l_type = F_WRLCK;
	1672	else
	1673	lf.l_type = F_RDLCK;
	1674	if (flags & FNONBLOCK)
	1675	type = 0;
	1676	else
	1677	type = F_WAIT;
	1678
	1679	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	1680	/*
	1681	* lock request failed. Clean up the reserved
	1682	* descriptor.
	1683	*/
	1684	vrele(vp);
	1685	fsetfd(p, NULL, indx);
	1686	fdrop(fp);
	1687	return (error);
	1688	}
	1689	fp->f_flag \|= FHASLOCK;
	1690	}
	1691	#if 0
	1692	/*
	1693	* Assert that all regular file vnodes were created with a object.
	1694	*/
	1695	KASSERT(vp->v_type != VREG \|\| vp->v_object != NULL,
	1696	("open: regular file has no backing object after vn_open"));
	1697	#endif
	1698
	1699	vrele(vp);
	1700
	1701	/*
	1702	* release our private reference, leaving the one associated with the
	1703	* descriptor table intact.
	1704	*/
	1705	fsetfd(p, fp, indx);
	1706	fdrop(fp);
	1707	*res = indx;
	1708	return (0);
	1709	}
	1710
	1711	/*
	1712	* open_args(char *path, int flags, int mode)
	1713	*
	1714	* Check permissions, allocate an open file structure,
	1715	* and call the device open routine if any.
	1716	*/
	1717	int
	1718	sys_open(struct open_args *uap)
	1719	{
	1720	struct nlookupdata nd;
	1721	int error;
	1722
	1723	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1724	if (error == 0) {
	1725	error = kern_open(&nd, uap->flags,
	1726	uap->mode, &uap->sysmsg_result);
	1727	}
	1728	nlookup_done(&nd);
	1729	return (error);
	1730	}
	1731
	1732	int
	1733	kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
	1734	{
	1735	struct thread *td = curthread;
	1736	struct proc *p = td->td_proc;
	1737	struct vnode *vp;
	1738	struct vattr vattr;
	1739	int error;
	1740	int whiteout = 0;
	1741
	1742	KKASSERT(p);
	1743
	1744	switch (mode & S_IFMT) {
	1745	case S_IFCHR:
	1746	case S_IFBLK:
	1747	error = suser(td);
	1748	break;
	1749	default:
	1750	error = suser_cred(p->p_ucred, PRISON_ROOT);
	1751	break;
	1752	}
	1753	if (error)
	1754	return (error);
	1755
	1756	bwillwrite();
	1757	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1758	if ((error = nlookup(nd)) != 0)
	1759	return (error);
	1760	if (nd->nl_nch.ncp->nc_vp)
	1761	return (EEXIST);
	1762	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1763	return (error);
	1764
	1765	VATTR_NULL(&vattr);
	1766	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1767	vattr.va_rmajor = rmajor;
	1768	vattr.va_rminor = rminor;
	1769	whiteout = 0;
	1770
	1771	switch (mode & S_IFMT) {
	1772	case S_IFMT: /* used by badsect to flag bad sectors */
	1773	vattr.va_type = VBAD;
	1774	break;
	1775	case S_IFCHR:
	1776	vattr.va_type = VCHR;
	1777	break;
	1778	case S_IFBLK:
	1779	vattr.va_type = VBLK;
	1780	break;
	1781	case S_IFWHT:
	1782	whiteout = 1;
	1783	break;
	1784	default:
	1785	error = EINVAL;
	1786	break;
	1787	}
	1788	if (error == 0) {
	1789	if (whiteout) {
	1790	error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
	1791	nd->nl_cred, NAMEI_CREATE);
	1792	} else {
	1793	vp = NULL;
	1794	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
	1795	&vp, nd->nl_cred, &vattr);
	1796	if (error == 0)
	1797	vput(vp);
	1798	}
	1799	}
	1800	return (error);
	1801	}
	1802
	1803	/*
	1804	* mknod_args(char *path, int mode, int dev)
	1805	*
	1806	* Create a special file.
	1807	*/
	1808	int
	1809	sys_mknod(struct mknod_args *uap)
	1810	{
	1811	struct nlookupdata nd;
	1812	int error;
	1813
	1814	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1815	if (error == 0) {
	1816	error = kern_mknod(&nd, uap->mode,
	1817	umajor(uap->dev), uminor(uap->dev));
	1818	}
	1819	nlookup_done(&nd);
	1820	return (error);
	1821	}
	1822
	1823	int
	1824	kern_mkfifo(struct nlookupdata *nd, int mode)
	1825	{
	1826	struct thread *td = curthread;
	1827	struct proc *p = td->td_proc;
	1828	struct vattr vattr;
	1829	struct vnode *vp;
	1830	int error;
	1831
	1832	bwillwrite();
	1833
	1834	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1835	if ((error = nlookup(nd)) != 0)
	1836	return (error);
	1837	if (nd->nl_nch.ncp->nc_vp)
	1838	return (EEXIST);
	1839	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1840	return (error);
	1841
	1842	VATTR_NULL(&vattr);
	1843	vattr.va_type = VFIFO;
	1844	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1845	vp = NULL;
	1846	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
	1847	if (error == 0)
	1848	vput(vp);
	1849	return (error);
	1850	}
	1851
	1852	/*
	1853	* mkfifo_args(char *path, int mode)
	1854	*
	1855	* Create a named pipe.
	1856	*/
	1857	int
	1858	sys_mkfifo(struct mkfifo_args *uap)
	1859	{
	1860	struct nlookupdata nd;
	1861	int error;
	1862
	1863	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1864	if (error == 0)
	1865	error = kern_mkfifo(&nd, uap->mode);
	1866	nlookup_done(&nd);
	1867	return (error);
	1868	}
	1869
	1870	static int hardlink_check_uid = 0;
	1871	SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
	1872	&hardlink_check_uid, 0,
	1873	"Unprivileged processes cannot create hard links to files owned by other "
	1874	"users");
	1875	static int hardlink_check_gid = 0;
	1876	SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
	1877	&hardlink_check_gid, 0,
	1878	"Unprivileged processes cannot create hard links to files owned by other "
	1879	"groups");
	1880
	1881	static int
	1882	can_hardlink(struct vnode vp, struct thread td, struct ucred *cred)
	1883	{
	1884	struct vattr va;
	1885	int error;
	1886
	1887	/*
	1888	* Shortcut if disabled
	1889	*/
	1890	if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
	1891	return (0);
	1892
	1893	/*
	1894	* root cred can always hardlink
	1895	*/
	1896	if (suser_cred(cred, PRISON_ROOT) == 0)
	1897	return (0);
	1898
	1899	/*
	1900	* Otherwise only if the originating file is owned by the
	1901	* same user or group. Note that any group is allowed if
	1902	* the file is owned by the caller.
	1903	*/
	1904	error = VOP_GETATTR(vp, &va);
	1905	if (error != 0)
	1906	return (error);
	1907
	1908	if (hardlink_check_uid) {
	1909	if (cred->cr_uid != va.va_uid)
	1910	return (EPERM);
	1911	}
	1912
	1913	if (hardlink_check_gid) {
	1914	if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
	1915	return (EPERM);
	1916	}
	1917
	1918	return (0);
	1919	}
	1920
	1921	int
	1922	kern_link(struct nlookupdata nd, struct nlookupdata linknd)
	1923	{
	1924	struct thread *td = curthread;
	1925	struct vnode *vp;
	1926	int error;
	1927
	1928	/*
	1929	* Lookup the source and obtained a locked vnode.
	1930	*
	1931	* XXX relookup on vget failure / race ?
	1932	*/
	1933	bwillwrite();
	1934	if ((error = nlookup(nd)) != 0)
	1935	return (error);
	1936	vp = nd->nl_nch.ncp->nc_vp;
	1937	KKASSERT(vp != NULL);
	1938	if (vp->v_type == VDIR)
	1939	return (EPERM); /* POSIX */
	1940	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1941	return (error);
	1942	if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
	1943	return (error);
	1944
	1945	/*
	1946	* Unlock the source so we can lookup the target without deadlocking
	1947	* (XXX vp is locked already, possible other deadlock?). The target
	1948	* must not exist.
	1949	*/
	1950	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
	1951	nd->nl_flags &= ~NLC_NCPISLOCKED;
	1952	cache_unlock(&nd->nl_nch);
	1953
	1954	linknd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1955	if ((error = nlookup(linknd)) != 0) {
	1956	vput(vp);
	1957	return (error);
	1958	}
	1959	if (linknd->nl_nch.ncp->nc_vp) {
	1960	vput(vp);
	1961	return (EEXIST);
	1962	}
	1963
	1964	/*
	1965	* Finally run the new API VOP.
	1966	*/
	1967	error = can_hardlink(vp, td, td->td_proc->p_ucred);
	1968	if (error == 0) {
	1969	error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
	1970	vp, linknd->nl_cred);
	1971	}
	1972	vput(vp);
	1973	return (error);
	1974	}
	1975
	1976	/*
	1977	* link_args(char path, char link)
	1978	*
	1979	* Make a hard file link.
	1980	*/
	1981	int
	1982	sys_link(struct link_args *uap)
	1983	{
	1984	struct nlookupdata nd, linknd;
	1985	int error;
	1986
	1987	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1988	if (error == 0) {
	1989	error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
	1990	if (error == 0)
	1991	error = kern_link(&nd, &linknd);
	1992	nlookup_done(&linknd);
	1993	}
	1994	nlookup_done(&nd);
	1995	return (error);
	1996	}
	1997
	1998	int
	1999	kern_symlink(struct nlookupdata nd, char path, int mode)
	2000	{
	2001	struct vattr vattr;
	2002	struct vnode *vp;
	2003	struct vnode *dvp;
	2004	int error;
	2005
	2006	bwillwrite();
	2007	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2008	if ((error = nlookup(nd)) != 0)
	2009	return (error);
	2010	if (nd->nl_nch.ncp->nc_vp)
	2011	return (EEXIST);
	2012	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2013	return (error);
	2014	dvp = nd->nl_dvp;
	2015	VATTR_NULL(&vattr);
	2016	vattr.va_mode = mode;
	2017	error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
	2018	if (error == 0)
	2019	vput(vp);
	2020	return (error);
	2021	}
	2022
	2023	/*
	2024	* symlink(char path, char link)
	2025	*
	2026	* Make a symbolic link.
	2027	*/
	2028	int
	2029	sys_symlink(struct symlink_args *uap)
	2030	{
	2031	struct thread *td = curthread;
	2032	struct nlookupdata nd;
	2033	char *path;
	2034	int error;
	2035	int mode;
	2036
	2037	path = objcache_get(namei_oc, M_WAITOK);
	2038	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	2039	if (error == 0) {
	2040	error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
	2041	if (error == 0) {
	2042	mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
	2043	error = kern_symlink(&nd, path, mode);
	2044	}
	2045	nlookup_done(&nd);
	2046	}
	2047	objcache_put(namei_oc, path);
	2048	return (error);
	2049	}
	2050
	2051	/*
	2052	* undelete_args(char *path)
	2053	*
	2054	* Delete a whiteout from the filesystem.
	2055	*/
	2056	/* ARGSUSED */
	2057	int
	2058	sys_undelete(struct undelete_args *uap)
	2059	{
	2060	struct nlookupdata nd;
	2061	int error;
	2062
	2063	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2064	bwillwrite();
	2065	nd.nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2066	if (error == 0)
	2067	error = nlookup(&nd);
	2068	if (error == 0)
	2069	error = ncp_writechk(&nd.nl_nch);
	2070	if (error == 0) {
	2071	error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
	2072	NAMEI_DELETE);
	2073	}
	2074	nlookup_done(&nd);
	2075	return (error);
	2076	}
	2077
	2078	int
	2079	kern_unlink(struct nlookupdata *nd)
	2080	{
	2081	int error;
	2082
	2083	bwillwrite();
	2084	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2085	if ((error = nlookup(nd)) != 0)
	2086	return (error);
	2087	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2088	return (error);
	2089	error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	2090	return (error);
	2091	}
	2092
	2093	/*
	2094	* unlink_args(char *path)
	2095	*
	2096	* Delete a name from the filesystem.
	2097	*/
	2098	int
	2099	sys_unlink(struct unlink_args *uap)
	2100	{
	2101	struct nlookupdata nd;
	2102	int error;
	2103
	2104	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2105	if (error == 0)
	2106	error = kern_unlink(&nd);
	2107	nlookup_done(&nd);
	2108	return (error);
	2109	}
	2110
	2111	int
	2112	kern_lseek(int fd, off_t offset, int whence, off_t *res)
	2113	{
	2114	struct thread *td = curthread;
	2115	struct proc *p = td->td_proc;
	2116	struct file *fp;
	2117	struct vattr vattr;
	2118	int error;
	2119
	2120	fp = holdfp(p->p_fd, fd, -1);
	2121	if (fp == NULL)
	2122	return (EBADF);
	2123	if (fp->f_type != DTYPE_VNODE) {
	2124	error = ESPIPE;
	2125	goto done;
	2126	}
	2127
	2128	switch (whence) {
	2129	case L_INCR:
	2130	fp->f_offset += offset;
	2131	error = 0;
	2132	break;
	2133	case L_XTND:
	2134	error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr);
	2135	if (error == 0)
	2136	fp->f_offset = offset + vattr.va_size;
	2137	break;
	2138	case L_SET:
	2139	fp->f_offset = offset;
	2140	error = 0;
	2141	break;
	2142	default:
	2143	error = EINVAL;
	2144	break;
	2145	}
	2146	*res = fp->f_offset;
	2147	done:
	2148	fdrop(fp);
	2149	return (error);
	2150	}
	2151
	2152	/*
	2153	* lseek_args(int fd, int pad, off_t offset, int whence)
	2154	*
	2155	* Reposition read/write file offset.
	2156	*/
	2157	int
	2158	sys_lseek(struct lseek_args *uap)
	2159	{
	2160	int error;
	2161
	2162	error = kern_lseek(uap->fd, uap->offset, uap->whence,
	2163	&uap->sysmsg_offset);
	2164
	2165	return (error);
	2166	}
	2167
	2168	int
	2169	kern_access(struct nlookupdata *nd, int aflags)
	2170	{
	2171	struct vnode *vp;
	2172	int error, flags;
	2173
	2174	if ((error = nlookup(nd)) != 0)
	2175	return (error);
	2176	retry:
	2177	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2178	if (error)
	2179	return (error);
	2180
	2181	/* Flags == 0 means only check for existence. */
	2182	if (aflags) {
	2183	flags = 0;
	2184	if (aflags & R_OK)
	2185	flags \|= VREAD;
	2186	if (aflags & W_OK)
	2187	flags \|= VWRITE;
	2188	if (aflags & X_OK)
	2189	flags \|= VEXEC;
	2190	if ((flags & VWRITE) == 0 \|\|
	2191	(error = vn_writechk(vp, &nd->nl_nch)) == 0)
	2192	error = VOP_ACCESS(vp, flags, nd->nl_cred);
	2193
	2194	/*
	2195	* If the file handle is stale we have to re-resolve the
	2196	* entry. This is a hack at the moment.
	2197	*/
	2198	if (error == ESTALE) {
	2199	vput(vp);
	2200	cache_setunresolved(&nd->nl_nch);
	2201	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2202	if (error == 0) {
	2203	vp = NULL;
	2204	goto retry;
	2205	}
	2206	return(error);
	2207	}
	2208	}
	2209	vput(vp);
	2210	return (error);
	2211	}
	2212
	2213	/*
	2214	* access_args(char *path, int flags)
	2215	*
	2216	* Check access permissions.
	2217	*/
	2218	int
	2219	sys_access(struct access_args *uap)
	2220	{
	2221	struct nlookupdata nd;
	2222	int error;
	2223
	2224	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2225	if (error == 0)
	2226	error = kern_access(&nd, uap->flags);
	2227	nlookup_done(&nd);
	2228	return (error);
	2229	}
	2230
	2231	int
	2232	kern_stat(struct nlookupdata nd, struct stat st)
	2233	{
	2234	int error;
	2235	struct vnode *vp;
	2236	thread_t td;
	2237
	2238	if ((error = nlookup(nd)) != 0)
	2239	return (error);
	2240	again:
	2241	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	2242	return (ENOENT);
	2243
	2244	td = curthread;
	2245	if ((error = vget(vp, LK_SHARED)) != 0)
	2246	return (error);
	2247	error = vn_stat(vp, st, nd->nl_cred);
	2248
	2249	/*
	2250	* If the file handle is stale we have to re-resolve the entry. This
	2251	* is a hack at the moment.
	2252	*/
	2253	if (error == ESTALE) {
	2254	vput(vp);
	2255	cache_setunresolved(&nd->nl_nch);
	2256	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2257	if (error == 0)
	2258	goto again;
	2259	} else {
	2260	vput(vp);
	2261	}
	2262	return (error);
	2263	}
	2264
	2265	/*
	2266	* stat_args(char path, struct stat ub)
	2267	*
	2268	* Get file status; this version follows links.
	2269	*/
	2270	int
	2271	sys_stat(struct stat_args *uap)
	2272	{
	2273	struct nlookupdata nd;
	2274	struct stat st;
	2275	int error;
	2276
	2277	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2278	if (error == 0) {
	2279	error = kern_stat(&nd, &st);
	2280	if (error == 0)
	2281	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2282	}
	2283	nlookup_done(&nd);
	2284	return (error);
	2285	}
	2286
	2287	/*
	2288	* lstat_args(char path, struct stat ub)
	2289	*
	2290	* Get file status; this version does not follow links.
	2291	*/
	2292	int
	2293	sys_lstat(struct lstat_args *uap)
	2294	{
	2295	struct nlookupdata nd;
	2296	struct stat st;
	2297	int error;
	2298
	2299	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2300	if (error == 0) {
	2301	error = kern_stat(&nd, &st);
	2302	if (error == 0)
	2303	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2304	}
	2305	nlookup_done(&nd);
	2306	return (error);
	2307	}
	2308
	2309	/*
	2310	* pathconf_Args(char *path, int name)
	2311	*
	2312	* Get configurable pathname variables.
	2313	*/
	2314	/* ARGSUSED */
	2315	int
	2316	sys_pathconf(struct pathconf_args *uap)
	2317	{
	2318	struct nlookupdata nd;
	2319	struct vnode *vp;
	2320	int error;
	2321
	2322	vp = NULL;
	2323	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2324	if (error == 0)
	2325	error = nlookup(&nd);
	2326	if (error == 0)
	2327	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	2328	nlookup_done(&nd);
	2329	if (error == 0) {
	2330	error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
	2331	vput(vp);
	2332	}
	2333	return (error);
	2334	}
	2335
	2336	/*
	2337	* XXX: daver
	2338	* kern_readlink isn't properly split yet. There is a copyin burried
	2339	* in VOP_READLINK().
	2340	*/
	2341	int
	2342	kern_readlink(struct nlookupdata nd, char buf, int count, int *res)
	2343	{
	2344	struct thread *td = curthread;
	2345	struct proc *p = td->td_proc;
	2346	struct vnode *vp;
	2347	struct iovec aiov;
	2348	struct uio auio;
	2349	int error;
	2350
	2351	if ((error = nlookup(nd)) != 0)
	2352	return (error);
	2353	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2354	if (error)
	2355	return (error);
	2356	if (vp->v_type != VLNK) {
	2357	error = EINVAL;
	2358	} else {
	2359	aiov.iov_base = buf;
	2360	aiov.iov_len = count;
	2361	auio.uio_iov = &aiov;
	2362	auio.uio_iovcnt = 1;
	2363	auio.uio_offset = 0;
	2364	auio.uio_rw = UIO_READ;
	2365	auio.uio_segflg = UIO_USERSPACE;
	2366	auio.uio_td = td;
	2367	auio.uio_resid = count;
	2368	error = VOP_READLINK(vp, &auio, p->p_ucred);
	2369	}
	2370	vput(vp);
	2371	*res = count - auio.uio_resid;
	2372	return (error);
	2373	}
	2374
	2375	/*
	2376	* readlink_args(char path, char buf, int count)
	2377	*
	2378	* Return target name of a symbolic link.
	2379	*/
	2380	int
	2381	sys_readlink(struct readlink_args *uap)
	2382	{
	2383	struct nlookupdata nd;
	2384	int error;
	2385
	2386	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2387	if (error == 0) {
	2388	error = kern_readlink(&nd, uap->buf, uap->count,
	2389	&uap->sysmsg_result);
	2390	}
	2391	nlookup_done(&nd);
	2392	return (error);
	2393	}
	2394
	2395	static int
	2396	setfflags(struct vnode *vp, int flags)
	2397	{
	2398	struct thread *td = curthread;
	2399	struct proc *p = td->td_proc;
	2400	int error;
	2401	struct vattr vattr;
	2402
	2403	/*
	2404	* Prevent non-root users from setting flags on devices. When
	2405	* a device is reused, users can retain ownership of the device
	2406	* if they are allowed to set flags and programs assume that
	2407	* chown can't fail when done as root.
	2408	*/
	2409	if ((vp->v_type == VCHR \|\| vp->v_type == VBLK) &&
	2410	((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0))
	2411	return (error);
	2412
	2413	/*
	2414	* note: vget is required for any operation that might mod the vnode
	2415	* so VINACTIVE is properly cleared.
	2416	*/
	2417	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2418	VATTR_NULL(&vattr);
	2419	vattr.va_flags = flags;
	2420	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2421	vput(vp);
	2422	}
	2423	return (error);
	2424	}
	2425
	2426	/*
	2427	* chflags(char *path, int flags)
	2428	*
	2429	* Change flags of a file given a path name.
	2430	*/
	2431	/* ARGSUSED */
	2432	int
	2433	sys_chflags(struct chflags_args *uap)
	2434	{
	2435	struct nlookupdata nd;
	2436	struct vnode *vp;
	2437	int error;
	2438
	2439	vp = NULL;
	2440	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2441	/* XXX Add NLC flag indicating modifying operation? */
	2442	if (error == 0)
	2443	error = nlookup(&nd);
	2444	if (error == 0)
	2445	error = ncp_writechk(&nd.nl_nch);
	2446	if (error == 0)
	2447	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2448	nlookup_done(&nd);
	2449	if (error == 0) {
	2450	error = setfflags(vp, uap->flags);
	2451	vrele(vp);
	2452	}
	2453	return (error);
	2454	}
	2455
	2456	/*
	2457	* fchflags_args(int fd, int flags)
	2458	*
	2459	* Change flags of a file given a file descriptor.
	2460	*/
	2461	/* ARGSUSED */
	2462	int
	2463	sys_fchflags(struct fchflags_args *uap)
	2464	{
	2465	struct thread *td = curthread;
	2466	struct proc *p = td->td_proc;
	2467	struct file *fp;
	2468	int error;
	2469
	2470	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2471	return (error);
	2472	if (fp->f_nchandle.ncp)
	2473	error = ncp_writechk(&fp->f_nchandle);
	2474	if (error == 0)
	2475	error = setfflags((struct vnode *) fp->f_data, uap->flags);
	2476	fdrop(fp);
	2477	return (error);
	2478	}
	2479
	2480	static int
	2481	setfmode(struct vnode *vp, int mode)
	2482	{
	2483	struct thread *td = curthread;
	2484	struct proc *p = td->td_proc;
	2485	int error;
	2486	struct vattr vattr;
	2487
	2488	/*
	2489	* note: vget is required for any operation that might mod the vnode
	2490	* so VINACTIVE is properly cleared.
	2491	*/
	2492	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2493	VATTR_NULL(&vattr);
	2494	vattr.va_mode = mode & ALLPERMS;
	2495	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2496	vput(vp);
	2497	}
	2498	return error;
	2499	}
	2500
	2501	int
	2502	kern_chmod(struct nlookupdata *nd, int mode)
	2503	{
	2504	struct vnode *vp;
	2505	int error;
	2506
	2507	/* XXX Add NLC flag indicating modifying operation? */
	2508	if ((error = nlookup(nd)) != 0)
	2509	return (error);
	2510	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2511	return (error);
	2512	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2513	error = setfmode(vp, mode);
	2514	vrele(vp);
	2515	return (error);
	2516	}
	2517
	2518	/*
	2519	* chmod_args(char *path, int mode)
	2520	*
	2521	* Change mode of a file given path name.
	2522	*/
	2523	/* ARGSUSED */
	2524	int
	2525	sys_chmod(struct chmod_args *uap)
	2526	{
	2527	struct nlookupdata nd;
	2528	int error;
	2529
	2530	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2531	if (error == 0)
	2532	error = kern_chmod(&nd, uap->mode);
	2533	nlookup_done(&nd);
	2534	return (error);
	2535	}
	2536
	2537	/*
	2538	* lchmod_args(char *path, int mode)
	2539	*
	2540	* Change mode of a file given path name (don't follow links.)
	2541	*/
	2542	/* ARGSUSED */
	2543	int
	2544	sys_lchmod(struct lchmod_args *uap)
	2545	{
	2546	struct nlookupdata nd;
	2547	int error;
	2548
	2549	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2550	if (error == 0)
	2551	error = kern_chmod(&nd, uap->mode);
	2552	nlookup_done(&nd);
	2553	return (error);
	2554	}
	2555
	2556	/*
	2557	* fchmod_args(int fd, int mode)
	2558	*
	2559	* Change mode of a file given a file descriptor.
	2560	*/
	2561	/* ARGSUSED */
	2562	int
	2563	sys_fchmod(struct fchmod_args *uap)
	2564	{
	2565	struct thread *td = curthread;
	2566	struct proc *p = td->td_proc;
	2567	struct file *fp;
	2568	int error;
	2569
	2570	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2571	return (error);
	2572	if (fp->f_nchandle.ncp)
	2573	error = ncp_writechk(&fp->f_nchandle);
	2574	if (error == 0)
	2575	error = setfmode((struct vnode *)fp->f_data, uap->mode);
	2576	fdrop(fp);
	2577	return (error);
	2578	}
	2579
	2580	static int
	2581	setfown(struct vnode *vp, uid_t uid, gid_t gid)
	2582	{
	2583	struct thread *td = curthread;
	2584	struct proc *p = td->td_proc;
	2585	int error;
	2586	struct vattr vattr;
	2587
	2588	/*
	2589	* note: vget is required for any operation that might mod the vnode
	2590	* so VINACTIVE is properly cleared.
	2591	*/
	2592	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2593	VATTR_NULL(&vattr);
	2594	vattr.va_uid = uid;
	2595	vattr.va_gid = gid;
	2596	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2597	vput(vp);
	2598	}
	2599	return error;
	2600	}
	2601
	2602	int
	2603	kern_chown(struct nlookupdata *nd, int uid, int gid)
	2604	{
	2605	struct vnode *vp;
	2606	int error;
	2607
	2608	/* XXX Add NLC flag indicating modifying operation? */
	2609	if ((error = nlookup(nd)) != 0)
	2610	return (error);
	2611	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2612	return (error);
	2613	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2614	error = setfown(vp, uid, gid);
	2615	vrele(vp);
	2616	return (error);
	2617	}
	2618
	2619	/*
	2620	* chown(char *path, int uid, int gid)
	2621	*
	2622	* Set ownership given a path name.
	2623	*/
	2624	int
	2625	sys_chown(struct chown_args *uap)
	2626	{
	2627	struct nlookupdata nd;
	2628	int error;
	2629
	2630	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2631	if (error == 0)
	2632	error = kern_chown(&nd, uap->uid, uap->gid);
	2633	nlookup_done(&nd);
	2634	return (error);
	2635	}
	2636
	2637	/*
	2638	* lchown_args(char *path, int uid, int gid)
	2639	*
	2640	* Set ownership given a path name, do not cross symlinks.
	2641	*/
	2642	int
	2643	sys_lchown(struct lchown_args *uap)
	2644	{
	2645	struct nlookupdata nd;
	2646	int error;
	2647
	2648	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2649	if (error == 0)
	2650	error = kern_chown(&nd, uap->uid, uap->gid);
	2651	nlookup_done(&nd);
	2652	return (error);
	2653	}
	2654
	2655	/*
	2656	* fchown_args(int fd, int uid, int gid)
	2657	*
	2658	* Set ownership given a file descriptor.
	2659	*/
	2660	/* ARGSUSED */
	2661	int
	2662	sys_fchown(struct fchown_args *uap)
	2663	{
	2664	struct thread *td = curthread;
	2665	struct proc *p = td->td_proc;
	2666	struct file *fp;
	2667	int error;
	2668
	2669	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2670	return (error);
	2671	if (fp->f_nchandle.ncp)
	2672	error = ncp_writechk(&fp->f_nchandle);
	2673	if (error == 0)
	2674	error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid);
	2675	fdrop(fp);
	2676	return (error);
	2677	}
	2678
	2679	static int
	2680	getutimes(const struct timeval tvp, struct timespec tsp)
	2681	{
	2682	struct timeval tv[2];
	2683
	2684	if (tvp == NULL) {
	2685	microtime(&tv[0]);
	2686	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	2687	tsp[1] = tsp[0];
	2688	} else {
	2689	TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
	2690	TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
	2691	}
	2692	return 0;
	2693	}
	2694
	2695	static int
	2696	setutimes(struct vnode vp, const struct timespec ts, int nullflag)
	2697	{
	2698	struct thread *td = curthread;
	2699	struct proc *p = td->td_proc;
	2700	int error;
	2701	struct vattr vattr;
	2702
	2703	/*
	2704	* note: vget is required for any operation that might mod the vnode
	2705	* so VINACTIVE is properly cleared.
	2706	*/
	2707	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2708	VATTR_NULL(&vattr);
	2709	vattr.va_atime = ts[0];
	2710	vattr.va_mtime = ts[1];
	2711	if (nullflag)
	2712	vattr.va_vaflags \|= VA_UTIMES_NULL;
	2713	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2714	vput(vp);
	2715	}
	2716	return error;
	2717	}
	2718
	2719	int
	2720	kern_utimes(struct nlookupdata nd, struct timeval tptr)
	2721	{
	2722	struct timespec ts[2];
	2723	struct vnode *vp;
	2724	int error;
	2725
	2726	if ((error = getutimes(tptr, ts)) != 0)
	2727	return (error);
	2728	/* XXX Add NLC flag indicating modifying operation? */
	2729	if ((error = nlookup(nd)) != 0)
	2730	return (error);
	2731	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2732	return (error);
	2733	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2734	return (error);
	2735	error = setutimes(vp, ts, tptr == NULL);
	2736	vrele(vp);
	2737	return (error);
	2738	}
	2739
	2740	/*
	2741	* utimes_args(char path, struct timeval tptr)
	2742	*
	2743	* Set the access and modification times of a file.
	2744	*/
	2745	int
	2746	sys_utimes(struct utimes_args *uap)
	2747	{
	2748	struct timeval tv[2];
	2749	struct nlookupdata nd;
	2750	int error;
	2751
	2752	if (uap->tptr) {
	2753	error = copyin(uap->tptr, tv, sizeof(tv));
	2754	if (error)
	2755	return (error);
	2756	}
	2757	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2758	if (error == 0)
	2759	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2760	nlookup_done(&nd);
	2761	return (error);
	2762	}
	2763
	2764	/*
	2765	* lutimes_args(char path, struct timeval tptr)
	2766	*
	2767	* Set the access and modification times of a file.
	2768	*/
	2769	int
	2770	sys_lutimes(struct lutimes_args *uap)
	2771	{
	2772	struct timeval tv[2];
	2773	struct nlookupdata nd;
	2774	int error;
	2775
	2776	if (uap->tptr) {
	2777	error = copyin(uap->tptr, tv, sizeof(tv));
	2778	if (error)
	2779	return (error);
	2780	}
	2781	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2782	if (error == 0)
	2783	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2784	nlookup_done(&nd);
	2785	return (error);
	2786	}
	2787
	2788	int
	2789	kern_futimes(int fd, struct timeval *tptr)
	2790	{
	2791	struct thread *td = curthread;
	2792	struct proc *p = td->td_proc;
	2793	struct timespec ts[2];
	2794	struct file *fp;
	2795	int error;
	2796
	2797	error = getutimes(tptr, ts);
	2798	if (error)
	2799	return (error);
	2800	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	2801	return (error);
	2802	if (fp->f_nchandle.ncp)
	2803	error = ncp_writechk(&fp->f_nchandle);
	2804	if (error == 0)
	2805	error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL);
	2806	fdrop(fp);
	2807	return (error);
	2808	}
	2809
	2810	/*
	2811	* futimes_args(int fd, struct timeval *tptr)
	2812	*
	2813	* Set the access and modification times of a file.
	2814	*/
	2815	int
	2816	sys_futimes(struct futimes_args *uap)
	2817	{
	2818	struct timeval tv[2];
	2819	int error;
	2820
	2821	if (uap->tptr) {
	2822	error = copyin(uap->tptr, tv, sizeof(tv));
	2823	if (error)
	2824	return (error);
	2825	}
	2826
	2827	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
	2828
	2829	return (error);
	2830	}
	2831
	2832	int
	2833	kern_truncate(struct nlookupdata *nd, off_t length)
	2834	{
	2835	struct vnode *vp;
	2836	struct vattr vattr;
	2837	int error;
	2838
	2839	if (length < 0)
	2840	return(EINVAL);
	2841	/* XXX Add NLC flag indicating modifying operation? */
	2842	if ((error = nlookup(nd)) != 0)
	2843	return (error);
	2844	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2845	return (error);
	2846	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2847	return (error);
	2848	if ((error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY)) != 0) {
	2849	vrele(vp);
	2850	return (error);
	2851	}
	2852	if (vp->v_type == VDIR) {
	2853	error = EISDIR;
	2854	} else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 &&
	2855	(error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) {
	2856	VATTR_NULL(&vattr);
	2857	vattr.va_size = length;
	2858	error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
	2859	}
	2860	vput(vp);
	2861	return (error);
	2862	}
	2863
	2864	/*
	2865	* truncate(char *path, int pad, off_t length)
	2866	*
	2867	* Truncate a file given its path name.
	2868	*/
	2869	int
	2870	sys_truncate(struct truncate_args *uap)
	2871	{
	2872	struct nlookupdata nd;
	2873	int error;
	2874
	2875	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2876	if (error == 0)
	2877	error = kern_truncate(&nd, uap->length);
	2878	nlookup_done(&nd);
	2879	return error;
	2880	}
	2881
	2882	int
	2883	kern_ftruncate(int fd, off_t length)
	2884	{
	2885	struct thread *td = curthread;
	2886	struct proc *p = td->td_proc;
	2887	struct vattr vattr;
	2888	struct vnode *vp;
	2889	struct file *fp;
	2890	int error;
	2891
	2892	if (length < 0)
	2893	return(EINVAL);
	2894	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	2895	return (error);
	2896	if (fp->f_nchandle.ncp) {
	2897	error = ncp_writechk(&fp->f_nchandle);
	2898	if (error)
	2899	goto done;
	2900	}
	2901	if ((fp->f_flag & FWRITE) == 0) {
	2902	error = EINVAL;
	2903	goto done;
	2904	}
	2905	vp = (struct vnode *)fp->f_data;
	2906	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	2907	if (vp->v_type == VDIR) {
	2908	error = EISDIR;
	2909	} else if ((error = vn_writechk(vp, NULL)) == 0) {
	2910	VATTR_NULL(&vattr);
	2911	vattr.va_size = length;
	2912	error = VOP_SETATTR(vp, &vattr, fp->f_cred);
	2913	}
	2914	vn_unlock(vp);
	2915	done:
	2916	fdrop(fp);
	2917	return (error);
	2918	}
	2919
	2920	/*
	2921	* ftruncate_args(int fd, int pad, off_t length)
	2922	*
	2923	* Truncate a file given a file descriptor.
	2924	*/
	2925	int
	2926	sys_ftruncate(struct ftruncate_args *uap)
	2927	{
	2928	int error;
	2929
	2930	error = kern_ftruncate(uap->fd, uap->length);
	2931
	2932	return (error);
	2933	}
	2934
	2935	/*
	2936	* fsync(int fd)
	2937	*
	2938	* Sync an open file.
	2939	*/
	2940	/* ARGSUSED */
	2941	int
	2942	sys_fsync(struct fsync_args *uap)
	2943	{
	2944	struct thread *td = curthread;
	2945	struct proc *p = td->td_proc;
	2946	struct vnode *vp;
	2947	struct file *fp;
	2948	vm_object_t obj;
	2949	int error;
	2950
	2951	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2952	return (error);
	2953	vp = (struct vnode *)fp->f_data;
	2954	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	2955	if ((obj = vp->v_object) != NULL)
	2956	vm_object_page_clean(obj, 0, 0, 0);
	2957	if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount)
	2958	error = buf_fsync(vp);
	2959	vn_unlock(vp);
	2960	fdrop(fp);
	2961	return (error);
	2962	}
	2963
	2964	int
	2965	kern_rename(struct nlookupdata fromnd, struct nlookupdata tond)
	2966	{
	2967	struct nchandle fnchd;
	2968	struct nchandle tnchd;
	2969	struct namecache *ncp;
	2970	struct vnode *fdvp;
	2971	struct vnode *tdvp;
	2972	struct mount *mp;
	2973	int error;
	2974
	2975	bwillwrite();
	2976	fromnd->nl_flags \|= NLC_REFDVP;
	2977	if ((error = nlookup(fromnd)) != 0)
	2978	return (error);
	2979	if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
	2980	return (ENOENT);
	2981	fnchd.mount = fromnd->nl_nch.mount;
	2982	cache_hold(&fnchd);
	2983
	2984	/*
	2985	* unlock the source nch so we can lookup the target nch without
	2986	* deadlocking. The target may or may not exist so we do not check
	2987	* for a target vp like kern_mkdir() and other creation functions do.
	2988	*
	2989	* The source and target directories are ref'd and rechecked after
	2990	* everything is relocked to determine if the source or target file
	2991	* has been renamed.
	2992	*/
	2993	KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
	2994	fromnd->nl_flags &= ~NLC_NCPISLOCKED;
	2995	cache_unlock(&fromnd->nl_nch);
	2996
	2997	tond->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2998	if ((error = nlookup(tond)) != 0) {
	2999	cache_drop(&fnchd);
	3000	return (error);
	3001	}
	3002	if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
	3003	cache_drop(&fnchd);
	3004	return (ENOENT);
	3005	}
	3006	tnchd.mount = tond->nl_nch.mount;
	3007	cache_hold(&tnchd);
	3008
	3009	/*
	3010	* If the source and target are the same there is nothing to do
	3011	*/
	3012	if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
	3013	cache_drop(&fnchd);
	3014	cache_drop(&tnchd);
	3015	return (0);
	3016	}
	3017
	3018	/*
	3019	* Mount points cannot be renamed or overwritten
	3020	*/
	3021	if ((fromnd->nl_nch.ncp->nc_flag \| tond->nl_nch.ncp->nc_flag) &
	3022	NCF_ISMOUNTPT
	3023	) {
	3024	cache_drop(&fnchd);
	3025	cache_drop(&tnchd);
	3026	return (EINVAL);
	3027	}
	3028
	3029	/*
	3030	* relock the source ncp. NOTE AFTER RELOCKING: the source ncp
	3031	* may have become invalid while it was unlocked, nc_vp and nc_mount
	3032	* could be NULL.
	3033	*/
	3034	if (cache_lock_nonblock(&fromnd->nl_nch) == 0) {
	3035	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3036	} else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) {
	3037	cache_lock(&fromnd->nl_nch);
	3038	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3039	} else {
	3040	cache_unlock(&tond->nl_nch);
	3041	cache_lock(&fromnd->nl_nch);
	3042	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3043	cache_lock(&tond->nl_nch);
	3044	cache_resolve(&tond->nl_nch, tond->nl_cred);
	3045	}
	3046	fromnd->nl_flags \|= NLC_NCPISLOCKED;
	3047
	3048	/*
	3049	* make sure the parent directories linkages are the same
	3050	*/
	3051	if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent \|\|
	3052	tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
	3053	cache_drop(&fnchd);
	3054	cache_drop(&tnchd);
	3055	return (ENOENT);
	3056	}
	3057
	3058	/*
	3059	* Both the source and target must be within the same filesystem and
	3060	* in the same filesystem as their parent directories within the
	3061	* namecache topology.
	3062	*
	3063	* NOTE: fromnd's nc_mount or nc_vp could be NULL.
	3064	*/
	3065	mp = fnchd.mount;
	3066	if (mp != tnchd.mount \|\| mp != fromnd->nl_nch.mount \|\|
	3067	mp != tond->nl_nch.mount) {
	3068	cache_drop(&fnchd);
	3069	cache_drop(&tnchd);
	3070	return (EXDEV);
	3071	}
	3072
	3073	/*
	3074	* Make sure the mount point is writable
	3075	*/
	3076	if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
	3077	cache_drop(&fnchd);
	3078	cache_drop(&tnchd);
	3079	return (error);
	3080	}
	3081
	3082	/*
	3083	* If the target exists and either the source or target is a directory,
	3084	* then both must be directories.
	3085	*
	3086	* Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
	3087	* have become NULL.
	3088	*/
	3089	if (tond->nl_nch.ncp->nc_vp) {
	3090	if (fromnd->nl_nch.ncp->nc_vp == NULL) {
	3091	error = ENOENT;
	3092	} else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3093	if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
	3094	error = ENOTDIR;
	3095	} else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3096	error = EISDIR;
	3097	}
	3098	}
	3099
	3100	/*
	3101	* You cannot rename a source into itself or a subdirectory of itself.
	3102	* We check this by travsersing the target directory upwards looking
	3103	* for a match against the source.
	3104	*/
	3105	if (error == 0) {
	3106	for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
	3107	if (fromnd->nl_nch.ncp == ncp) {
	3108	error = EINVAL;
	3109	break;
	3110	}
	3111	}
	3112	}
	3113
	3114	cache_drop(&fnchd);
	3115	cache_drop(&tnchd);
	3116
	3117	/*
	3118	* Even though the namespaces are different, they may still represent
	3119	* hardlinks to the same file. The filesystem might have a hard time
	3120	* with this so we issue a NREMOVE of the source instead of a NRENAME
	3121	* when we detect the situation.
	3122	*/
	3123	if (error == 0) {
	3124	fdvp = fromnd->nl_dvp;
	3125	tdvp = tond->nl_dvp;
	3126	if (fdvp == NULL \|\| tdvp == NULL) {
	3127	error = EPERM;
	3128	} else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
	3129	error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
	3130	fromnd->nl_cred);
	3131	} else {
	3132	error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
	3133	fdvp, tdvp, tond->nl_cred);
	3134	}
	3135	}
	3136	return (error);
	3137	}
	3138
	3139	/*
	3140	* rename_args(char from, char to)
	3141	*
	3142	* Rename files. Source and destination must either both be directories,
	3143	* or both not be directories. If target is a directory, it must be empty.
	3144	*/
	3145	int
	3146	sys_rename(struct rename_args *uap)
	3147	{
	3148	struct nlookupdata fromnd, tond;
	3149	int error;
	3150
	3151	error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
	3152	if (error == 0) {
	3153	error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
	3154	if (error == 0)
	3155	error = kern_rename(&fromnd, &tond);
	3156	nlookup_done(&tond);
	3157	}
	3158	nlookup_done(&fromnd);
	3159	return (error);
	3160	}
	3161
	3162	int
	3163	kern_mkdir(struct nlookupdata *nd, int mode)
	3164	{
	3165	struct thread *td = curthread;
	3166	struct proc *p = td->td_proc;
	3167	struct vnode *vp;
	3168	struct vattr vattr;
	3169	int error;
	3170
	3171	bwillwrite();
	3172	nd->nl_flags \|= NLC_WILLBEDIR \| NLC_CREATE \| NLC_REFDVP;
	3173	if ((error = nlookup(nd)) != 0)
	3174	return (error);
	3175
	3176	if (nd->nl_nch.ncp->nc_vp)
	3177	return (EEXIST);
	3178	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3179	return (error);
	3180	VATTR_NULL(&vattr);
	3181	vattr.va_type = VDIR;
	3182	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
	3183
	3184	vp = NULL;
	3185	error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr);
	3186	if (error == 0)
	3187	vput(vp);
	3188	return (error);
	3189	}
	3190
	3191	/*
	3192	* mkdir_args(char *path, int mode)
	3193	*
	3194	* Make a directory file.
	3195	*/
	3196	/* ARGSUSED */
	3197	int
	3198	sys_mkdir(struct mkdir_args *uap)
	3199	{
	3200	struct nlookupdata nd;
	3201	int error;
	3202
	3203	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3204	if (error == 0)
	3205	error = kern_mkdir(&nd, uap->mode);
	3206	nlookup_done(&nd);
	3207	return (error);
	3208	}
	3209
	3210	int
	3211	kern_rmdir(struct nlookupdata *nd)
	3212	{
	3213	int error;
	3214
	3215	bwillwrite();
	3216	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	3217	if ((error = nlookup(nd)) != 0)
	3218	return (error);
	3219
	3220	/*
	3221	* Do not allow directories representing mount points to be
	3222	* deleted, even if empty. Check write perms on mount point
	3223	* in case the vnode is aliased (aka nullfs).
	3224	*/
	3225	if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
	3226	return (EINVAL);
	3227	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3228	return (error);
	3229	error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	3230	return (error);
	3231	}
	3232
	3233	/*
	3234	* rmdir_args(char *path)
	3235	*
	3236	* Remove a directory file.
	3237	*/
	3238	/* ARGSUSED */
	3239	int
	3240	sys_rmdir(struct rmdir_args *uap)
	3241	{
	3242	struct nlookupdata nd;
	3243	int error;
	3244
	3245	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3246	if (error == 0)
	3247	error = kern_rmdir(&nd);
	3248	nlookup_done(&nd);
	3249	return (error);
	3250	}
	3251
	3252	int
	3253	kern_getdirentries(int fd, char buf, u_int count, long basep, int *res,
	3254	enum uio_seg direction)
	3255	{
	3256	struct thread *td = curthread;
	3257	struct proc *p = td->td_proc;
	3258	struct vnode *vp;
	3259	struct file *fp;
	3260	struct uio auio;
	3261	struct iovec aiov;
	3262	off_t loff;
	3263	int error, eofflag;
	3264
	3265	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3266	return (error);
	3267	if ((fp->f_flag & FREAD) == 0) {
	3268	error = EBADF;
	3269	goto done;
	3270	}
	3271	vp = (struct vnode *)fp->f_data;
	3272	unionread:
	3273	if (vp->v_type != VDIR) {
	3274	error = EINVAL;
	3275	goto done;
	3276	}
	3277	aiov.iov_base = buf;
	3278	aiov.iov_len = count;
	3279	auio.uio_iov = &aiov;
	3280	auio.uio_iovcnt = 1;
	3281	auio.uio_rw = UIO_READ;
	3282	auio.uio_segflg = direction;
	3283	auio.uio_td = td;
	3284	auio.uio_resid = count;
	3285	loff = auio.uio_offset = fp->f_offset;
	3286	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
	3287	fp->f_offset = auio.uio_offset;
	3288	if (error)
	3289	goto done;
	3290	if (count == auio.uio_resid) {
	3291	if (union_dircheckp) {
	3292	error = union_dircheckp(td, &vp, fp);
	3293	if (error == -1)
	3294	goto unionread;
	3295	if (error)
	3296	goto done;
	3297	}
	3298	#if 0
	3299	if ((vp->v_flag & VROOT) &&
	3300	(vp->v_mount->mnt_flag & MNT_UNION)) {
	3301	struct vnode *tvp = vp;
	3302	vp = vp->v_mount->mnt_vnodecovered;
	3303	vref(vp);
	3304	fp->f_data = vp;
	3305	fp->f_offset = 0;
	3306	vrele(tvp);
	3307	goto unionread;
	3308	}
	3309	#endif
	3310	}
	3311
	3312	/*
	3313	* WARNING! *basep may not be wide enough to accomodate the
	3314	* seek offset. XXX should we hack this to return the upper 32 bits
	3315	* for offsets greater then 4G?
	3316	*/
	3317	if (basep) {
	3318	*basep = (long)loff;
	3319	}
	3320	*res = count - auio.uio_resid;
	3321	done:
	3322	fdrop(fp);
	3323	return (error);
	3324	}
	3325
	3326	/*
	3327	* getdirentries_args(int fd, char buf, u_int conut, long basep)
	3328	*
	3329	* Read a block of directory entries in a file system independent format.
	3330	*/
	3331	int
	3332	sys_getdirentries(struct getdirentries_args *uap)
	3333	{
	3334	long base;
	3335	int error;
	3336
	3337	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
	3338	&uap->sysmsg_result, UIO_USERSPACE);
	3339
	3340	if (error == 0 && uap->basep)
	3341	error = copyout(&base, uap->basep, sizeof(*uap->basep));
	3342	return (error);
	3343	}
	3344
	3345	/*
	3346	* getdents_args(int fd, char *buf, size_t count)
	3347	*/
	3348	int
	3349	sys_getdents(struct getdents_args *uap)
	3350	{
	3351	int error;
	3352
	3353	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
	3354	&uap->sysmsg_result, UIO_USERSPACE);
	3355
	3356	return (error);
	3357	}
	3358
	3359	/*
	3360	* umask(int newmask)
	3361	*
	3362	* Set the mode mask for creation of filesystem nodes.
	3363	*
	3364	* MP SAFE
	3365	*/
	3366	int
	3367	sys_umask(struct umask_args *uap)
	3368	{
	3369	struct thread *td = curthread;
	3370	struct proc *p = td->td_proc;
	3371	struct filedesc *fdp;
	3372
	3373	fdp = p->p_fd;
	3374	uap->sysmsg_result = fdp->fd_cmask;
	3375	fdp->fd_cmask = uap->newmask & ALLPERMS;
	3376	return (0);
	3377	}
	3378
	3379	/*
	3380	* revoke(char *path)
	3381	*
	3382	* Void all references to file by ripping underlying filesystem
	3383	* away from vnode.
	3384	*/
	3385	/* ARGSUSED */
	3386	int
	3387	sys_revoke(struct revoke_args *uap)
	3388	{
	3389	struct nlookupdata nd;
	3390	struct vattr vattr;
	3391	struct vnode *vp;
	3392	struct ucred *cred;
	3393	int error;
	3394
	3395	vp = NULL;
	3396	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3397	if (error == 0)
	3398	error = nlookup(&nd);
	3399	if (error == 0)
	3400	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	3401	cred = crhold(nd.nl_cred);
	3402	nlookup_done(&nd);
	3403	if (error == 0) {
	3404	if (vp->v_type != VCHR && vp->v_type != VBLK)
	3405	error = EINVAL;
	3406	if (error == 0)
	3407	error = VOP_GETATTR(vp, &vattr);
	3408	if (error == 0 && cred->cr_uid != vattr.va_uid)
	3409	error = suser_cred(cred, PRISON_ROOT);
	3410	if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) {
	3411	error = 0;
	3412	vx_lock(vp);
	3413	VOP_REVOKE(vp, REVOKEALL);
	3414	vx_unlock(vp);
	3415	}
	3416	vrele(vp);
	3417	}
	3418	if (cred)
	3419	crfree(cred);
	3420	return (error);
	3421	}
	3422
	3423	/*
	3424	* getfh_args(char fname, fhandle_t fhp)
	3425	*
	3426	* Get (NFS) file handle
	3427	*/
	3428	int
	3429	sys_getfh(struct getfh_args *uap)
	3430	{
	3431	struct thread *td = curthread;
	3432	struct nlookupdata nd;
	3433	fhandle_t fh;
	3434	struct vnode *vp;
	3435	int error;
	3436
	3437	/*
	3438	* Must be super user
	3439	*/
	3440	if ((error = suser(td)) != 0)
	3441	return (error);
	3442
	3443	vp = NULL;
	3444	error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
	3445	if (error == 0)
	3446	error = nlookup(&nd);
	3447	if (error == 0)
	3448	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3449	nlookup_done(&nd);
	3450	if (error == 0) {
	3451	bzero(&fh, sizeof(fh));
	3452	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
	3453	error = VFS_VPTOFH(vp, &fh.fh_fid);
	3454	vput(vp);
	3455	if (error == 0)
	3456	error = copyout(&fh, uap->fhp, sizeof(fh));
	3457	}
	3458	return (error);
	3459	}
	3460
	3461	/*
	3462	* fhopen_args(const struct fhandle *u_fhp, int flags)
	3463	*
	3464	* syscall for the rpc.lockd to use to translate a NFS file handle into
	3465	* an open descriptor.
	3466	*
	3467	* warning: do not remove the suser() call or this becomes one giant
	3468	* security hole.
	3469	*/
	3470	int
	3471	sys_fhopen(struct fhopen_args *uap)
	3472	{
	3473	struct thread *td = curthread;
	3474	struct proc *p = td->td_proc;
	3475	struct mount *mp;
	3476	struct vnode *vp;
	3477	struct fhandle fhp;
	3478	struct vattr vat;
	3479	struct vattr *vap = &vat;
	3480	struct flock lf;
	3481	int fmode, mode, error, type;
	3482	struct file *nfp;
	3483	struct file *fp;
	3484	int indx;
	3485
	3486	/*
	3487	* Must be super user
	3488	*/
	3489	error = suser(td);
	3490	if (error)
	3491	return (error);
	3492
	3493	fmode = FFLAGS(uap->flags);
	3494	/* why not allow a non-read/write open for our lockd? */
	3495	if (((fmode & (FREAD \| FWRITE)) == 0) \|\| (fmode & O_CREAT))
	3496	return (EINVAL);
	3497	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
	3498	if (error)
	3499	return(error);
	3500	/* find the mount point */
	3501	mp = vfs_getvfs(&fhp.fh_fsid);
	3502	if (mp == NULL)
	3503	return (ESTALE);
	3504	/* now give me my vnode, it gets returned to me locked */
	3505	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
	3506	if (error)
	3507	return (error);
	3508	/*
	3509	* from now on we have to make sure not
	3510	* to forget about the vnode
	3511	* any error that causes an abort must vput(vp)
	3512	* just set error = err and 'goto bad;'.
	3513	*/
	3514
	3515	/*
	3516	* from vn_open
	3517	*/
	3518	if (vp->v_type == VLNK) {
	3519	error = EMLINK;
	3520	goto bad;
	3521	}
	3522	if (vp->v_type == VSOCK) {
	3523	error = EOPNOTSUPP;
	3524	goto bad;
	3525	}
	3526	mode = 0;
	3527	if (fmode & (FWRITE \| O_TRUNC)) {
	3528	if (vp->v_type == VDIR) {
	3529	error = EISDIR;
	3530	goto bad;
	3531	}
	3532	error = vn_writechk(vp, NULL);
	3533	if (error)
	3534	goto bad;
	3535	mode \|= VWRITE;
	3536	}
	3537	if (fmode & FREAD)
	3538	mode \|= VREAD;
	3539	if (mode) {
	3540	error = VOP_ACCESS(vp, mode, p->p_ucred);
	3541	if (error)
	3542	goto bad;
	3543	}
	3544	if (fmode & O_TRUNC) {
	3545	vn_unlock(vp); /* XXX */
	3546	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY); /* XXX */
	3547	VATTR_NULL(vap);
	3548	vap->va_size = 0;
	3549	error = VOP_SETATTR(vp, vap, p->p_ucred);
	3550	if (error)
	3551	goto bad;
	3552	}
	3553
	3554	/*
	3555	* VOP_OPEN needs the file pointer so it can potentially override
	3556	* it.
	3557	*
	3558	* WARNING! no f_nchandle will be associated when fhopen()ing a
	3559	* directory. XXX
	3560	*/
	3561	if ((error = falloc(p, &nfp, &indx)) != 0)
	3562	goto bad;
	3563	fp = nfp;
	3564
	3565	error = VOP_OPEN(vp, fmode, p->p_ucred, fp);
	3566	if (error) {
	3567	/*
	3568	* setting f_ops this way prevents VOP_CLOSE from being
	3569	* called or fdrop() releasing the vp from v_data. Since
	3570	* the VOP_OPEN failed we don't want to VOP_CLOSE.
	3571	*/
	3572	fp->f_ops = &badfileops;
	3573	fp->f_data = NULL;
	3574	goto bad_drop;
	3575	}
	3576
	3577	/*
	3578	* The fp is given its own reference, we still have our ref and lock.
	3579	*
	3580	* Assert that all regular files must be created with a VM object.
	3581	*/
	3582	if (vp->v_type == VREG && vp->v_object == NULL) {
	3583	kprintf("fhopen: regular file did not have VM object: %p\n", vp);
	3584	goto bad_drop;
	3585	}
	3586
	3587	/*
	3588	* The open was successful. Handle any locking requirements.
	3589	*/
	3590	if (fmode & (O_EXLOCK \| O_SHLOCK)) {
	3591	lf.l_whence = SEEK_SET;
	3592	lf.l_start = 0;
	3593	lf.l_len = 0;
	3594	if (fmode & O_EXLOCK)
	3595	lf.l_type = F_WRLCK;
	3596	else
	3597	lf.l_type = F_RDLCK;
	3598	if (fmode & FNONBLOCK)
	3599	type = 0;
	3600	else
	3601	type = F_WAIT;
	3602	vn_unlock(vp);
	3603	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	3604	/*
	3605	* release our private reference.
	3606	*/
	3607	fsetfd(p, NULL, indx);
	3608	fdrop(fp);
	3609	vrele(vp);
	3610	return (error);
	3611	}
	3612	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3613	fp->f_flag \|= FHASLOCK;
	3614	}
	3615
	3616	/*
	3617	* Clean up. Associate the file pointer with the previously
	3618	* reserved descriptor and return it.
	3619	*/
	3620	vput(vp);
	3621	fsetfd(p, fp, indx);
	3622	fdrop(fp);
	3623	uap->sysmsg_result = indx;
	3624	return (0);
	3625
	3626	bad_drop:
	3627	fsetfd(p, NULL, indx);
	3628	fdrop(fp);
	3629	bad:
	3630	vput(vp);
	3631	return (error);
	3632	}
	3633
	3634	/*
	3635	* fhstat_args(struct fhandle u_fhp, struct stat sb)
	3636	*/
	3637	int
	3638	sys_fhstat(struct fhstat_args *uap)
	3639	{
	3640	struct thread *td = curthread;
	3641	struct stat sb;
	3642	fhandle_t fh;
	3643	struct mount *mp;
	3644	struct vnode *vp;
	3645	int error;
	3646
	3647	/*
	3648	* Must be super user
	3649	*/
	3650	error = suser(td);
	3651	if (error)
	3652	return (error);
	3653
	3654	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
	3655	if (error)
	3656	return (error);
	3657
	3658	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3659	return (ESTALE);
	3660	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
	3661	return (error);
	3662	error = vn_stat(vp, &sb, td->td_proc->p_ucred);
	3663	vput(vp);
	3664	if (error)
	3665	return (error);
	3666	error = copyout(&sb, uap->sb, sizeof(sb));
	3667	return (error);
	3668	}
	3669
	3670	/*
	3671	* fhstatfs_args(struct fhandle u_fhp, struct statfs buf)
	3672	*/
	3673	int
	3674	sys_fhstatfs(struct fhstatfs_args *uap)
	3675	{
	3676	struct thread *td = curthread;
	3677	struct proc *p = td->td_proc;
	3678	struct statfs *sp;
	3679	struct mount *mp;
	3680	struct vnode *vp;
	3681	struct statfs sb;
	3682	char fullpath, freepath;
	3683	fhandle_t fh;
	3684	int error;
	3685
	3686	/*
	3687	* Must be super user
	3688	*/
	3689	if ((error = suser(td)))
	3690	return (error);
	3691
	3692	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	3693	return (error);
	3694
	3695	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3696	return (ESTALE);
	3697
	3698	if (p != NULL && !chroot_visible_mnt(mp, p))
	3699	return (ESTALE);
	3700
	3701	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
	3702	return (error);
	3703	mp = vp->v_mount;
	3704	sp = &mp->mnt_stat;
	3705	vput(vp);
	3706	if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0)
	3707	return (error);
	3708
	3709	error = mount_path(p, mp, &fullpath, &freepath);
	3710	if (error)
	3711	return(error);
	3712	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	3713	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	3714	kfree(freepath, M_TEMP);
	3715
	3716	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	3717	if (suser(td)) {
	3718	bcopy(sp, &sb, sizeof(sb));
	3719	sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
	3720	sp = &sb;
	3721	}
	3722	return (copyout(sp, uap->buf, sizeof(*sp)));
	3723	}
	3724
	3725	/*
	3726	* fhstatvfs_args(struct fhandle u_fhp, struct statvfs buf)
	3727	*/
	3728	int
	3729	sys_fhstatvfs(struct fhstatvfs_args *uap)
	3730	{
	3731	struct thread *td = curthread;
	3732	struct proc *p = td->td_proc;
	3733	struct statvfs *sp;
	3734	struct mount *mp;
	3735	struct vnode *vp;
	3736	fhandle_t fh;
	3737	int error;
	3738
	3739	/*
	3740	* Must be super user
	3741	*/
	3742	if ((error = suser(td)))
	3743	return (error);
	3744
	3745	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	3746	return (error);
	3747
	3748	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3749	return (ESTALE);
	3750
	3751	if (p != NULL && !chroot_visible_mnt(mp, p))
	3752	return (ESTALE);
	3753
	3754	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
	3755	return (error);
	3756	mp = vp->v_mount;
	3757	sp = &mp->mnt_vstat;
	3758	vput(vp);
	3759	if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0)
	3760	return (error);
	3761
	3762	sp->f_flag = 0;
	3763	if (mp->mnt_flag & MNT_RDONLY)
	3764	sp->f_flag \|= ST_RDONLY;
	3765	if (mp->mnt_flag & MNT_NOSUID)
	3766	sp->f_flag \|= ST_NOSUID;
	3767
	3768	return (copyout(sp, uap->buf, sizeof(*sp)));
	3769	}
	3770
	3771
	3772	/*
	3773	* Syscall to push extended attribute configuration information into the
	3774	* VFS. Accepts a path, which it converts to a mountpoint, as well as
	3775	* a command (int cmd), and attribute name and misc data. For now, the
	3776	* attribute name is left in userspace for consumption by the VFS_op.
	3777	* It will probably be changed to be copied into sysspace by the
	3778	* syscall in the future, once issues with various consumers of the
	3779	* attribute code have raised their hands.
	3780	*
	3781	* Currently this is used only by UFS Extended Attributes.
	3782	*/
	3783	int
	3784	sys_extattrctl(struct extattrctl_args *uap)
	3785	{
	3786	struct nlookupdata nd;
	3787	struct mount *mp;
	3788	struct vnode *vp;
	3789	int error;
	3790
	3791	vp = NULL;
	3792	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3793	if (error == 0)
	3794	error = nlookup(&nd);
	3795	if (error == 0) {
	3796	mp = nd.nl_nch.mount;
	3797	error = VFS_EXTATTRCTL(mp, uap->cmd,
	3798	uap->attrname, uap->arg,
	3799	nd.nl_cred);
	3800	}
	3801	nlookup_done(&nd);
	3802	return (error);
	3803	}
	3804
	3805	/*
	3806	* Syscall to set a named extended attribute on a file or directory.
	3807	* Accepts attribute name, and a uio structure pointing to the data to set.
	3808	* The uio is consumed in the style of writev(). The real work happens
	3809	* in VOP_SETEXTATTR().
	3810	*/
	3811	int
	3812	sys_extattr_set_file(struct extattr_set_file_args *uap)
	3813	{
	3814	char attrname[EXTATTR_MAXNAMELEN];
	3815	struct iovec aiov[UIO_SMALLIOV];
	3816	struct iovec *needfree;
	3817	struct nlookupdata nd;
	3818	struct iovec *iov;
	3819	struct vnode *vp;
	3820	struct uio auio;
	3821	u_int iovlen;
	3822	u_int cnt;
	3823	int error;
	3824	int i;
	3825
	3826	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3827	if (error)
	3828	return (error);
	3829
	3830	vp = NULL;
	3831	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3832	if (error == 0)
	3833	error = nlookup(&nd);
	3834	if (error == 0)
	3835	error = ncp_writechk(&nd.nl_nch);
	3836	if (error == 0)
	3837	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3838	if (error) {
	3839	nlookup_done(&nd);
	3840	return (error);
	3841	}
	3842
	3843	needfree = NULL;
	3844	iovlen = uap->iovcnt * sizeof(struct iovec);
	3845	if (uap->iovcnt > UIO_SMALLIOV) {
	3846	if (uap->iovcnt > UIO_MAXIOV) {
	3847	error = EINVAL;
	3848	goto done;
	3849	}
	3850	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	3851	needfree = iov;
	3852	} else {
	3853	iov = aiov;
	3854	}
	3855	auio.uio_iov = iov;
	3856	auio.uio_iovcnt = uap->iovcnt;
	3857	auio.uio_rw = UIO_WRITE;
	3858	auio.uio_segflg = UIO_USERSPACE;
	3859	auio.uio_td = nd.nl_td;
	3860	auio.uio_offset = 0;
	3861	if ((error = copyin(uap->iovp, iov, iovlen)))
	3862	goto done;
	3863	auio.uio_resid = 0;
	3864	for (i = 0; i < uap->iovcnt; i++) {
	3865	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	3866	error = EINVAL;
	3867	goto done;
	3868	}
	3869	auio.uio_resid += iov->iov_len;
	3870	iov++;
	3871	}
	3872	cnt = auio.uio_resid;
	3873	error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	3874	cnt -= auio.uio_resid;
	3875	uap->sysmsg_result = cnt;
	3876	done:
	3877	vput(vp);
	3878	nlookup_done(&nd);
	3879	if (needfree)
	3880	FREE(needfree, M_IOV);
	3881	return (error);
	3882	}
	3883
	3884	/*
	3885	* Syscall to get a named extended attribute on a file or directory.
	3886	* Accepts attribute name, and a uio structure pointing to a buffer for the
	3887	* data. The uio is consumed in the style of readv(). The real work
	3888	* happens in VOP_GETEXTATTR();
	3889	*/
	3890	int
	3891	sys_extattr_get_file(struct extattr_get_file_args *uap)
	3892	{
	3893	char attrname[EXTATTR_MAXNAMELEN];
	3894	struct iovec aiov[UIO_SMALLIOV];
	3895	struct iovec *needfree;
	3896	struct nlookupdata nd;
	3897	struct iovec *iov;
	3898	struct vnode *vp;
	3899	struct uio auio;
	3900	u_int iovlen;
	3901	u_int cnt;
	3902	int error;
	3903	int i;
	3904
	3905	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3906	if (error)
	3907	return (error);
	3908
	3909	vp = NULL;
	3910	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3911	if (error == 0)
	3912	error = nlookup(&nd);
	3913	if (error == 0)
	3914	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3915	if (error) {
	3916	nlookup_done(&nd);
	3917	return (error);
	3918	}
	3919
	3920	iovlen = uap->iovcnt * sizeof (struct iovec);
	3921	needfree = NULL;
	3922	if (uap->iovcnt > UIO_SMALLIOV) {
	3923	if (uap->iovcnt > UIO_MAXIOV) {
	3924	error = EINVAL;
	3925	goto done;
	3926	}
	3927	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	3928	needfree = iov;
	3929	} else {
	3930	iov = aiov;
	3931	}
	3932	auio.uio_iov = iov;
	3933	auio.uio_iovcnt = uap->iovcnt;
	3934	auio.uio_rw = UIO_READ;
	3935	auio.uio_segflg = UIO_USERSPACE;
	3936	auio.uio_td = nd.nl_td;
	3937	auio.uio_offset = 0;
	3938	if ((error = copyin(uap->iovp, iov, iovlen)))
	3939	goto done;
	3940	auio.uio_resid = 0;
	3941	for (i = 0; i < uap->iovcnt; i++) {
	3942	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	3943	error = EINVAL;
	3944	goto done;
	3945	}
	3946	auio.uio_resid += iov->iov_len;
	3947	iov++;
	3948	}
	3949	cnt = auio.uio_resid;
	3950	error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	3951	cnt -= auio.uio_resid;
	3952	uap->sysmsg_result = cnt;
	3953	done:
	3954	vput(vp);
	3955	nlookup_done(&nd);
	3956	if (needfree)
	3957	FREE(needfree, M_IOV);
	3958	return(error);
	3959	}
	3960
	3961	/*
	3962	* Syscall to delete a named extended attribute from a file or directory.
	3963	* Accepts attribute name. The real work happens in VOP_SETEXTATTR().
	3964	*/
	3965	int
	3966	sys_extattr_delete_file(struct extattr_delete_file_args *uap)
	3967	{
	3968	char attrname[EXTATTR_MAXNAMELEN];
	3969	struct nlookupdata nd;
	3970	struct vnode *vp;
	3971	int error;
	3972
	3973	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3974	if (error)
	3975	return(error);
	3976
	3977	vp = NULL;
	3978	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3979	if (error == 0)
	3980	error = nlookup(&nd);
	3981	if (error == 0)
	3982	error = ncp_writechk(&nd.nl_nch);
	3983	if (error == 0)
	3984	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3985	if (error) {
	3986	nlookup_done(&nd);
	3987	return (error);
	3988	}
	3989
	3990	error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred);
	3991	vput(vp);
	3992	nlookup_done(&nd);
	3993	return(error);
	3994	}
	3995
	3996	/*
	3997	* Determine if the mount is visible to the process.
	3998	*/
	3999	static int
	4000	chroot_visible_mnt(struct mount mp, struct proc p)
	4001	{
	4002	struct nchandle nch;
	4003
	4004	/*
	4005	* Traverse from the mount point upwards. If we hit the process
	4006	* root then the mount point is visible to the process.
	4007	*/
	4008	nch = mp->mnt_ncmountpt;
	4009	while (nch.ncp) {
	4010	if (nch.mount == p->p_fd->fd_nrdir.mount &&
	4011	nch.ncp == p->p_fd->fd_nrdir.ncp) {
	4012	return(1);
	4013	}
	4014	if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
	4015	nch = nch.mount->mnt_ncmounton;
	4016	} else {
	4017	nch.ncp = nch.ncp->nc_parent;
	4018	}
	4019	}
	4020
	4021	/*
	4022	* If the mount point is not visible to the process, but the
	4023	* process root is in a subdirectory of the mount, return
	4024	* TRUE anyway.
	4025	*/
	4026	if (p->p_fd->fd_nrdir.mount == mp)
	4027	return(1);
	4028
	4029	return(0);
	4030	}
	4031