gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
	39	* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $
	41	*/
	42
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
	45	#include <sys/buf.h>
	46	#include <sys/conf.h>
	47	#include <sys/sysent.h>
	48	#include <sys/malloc.h>
	49	#include <sys/mount.h>
	50	#include <sys/mountctl.h>
	51	#include <sys/sysproto.h>
	52	#include <sys/filedesc.h>
	53	#include <sys/kernel.h>
	54	#include <sys/fcntl.h>
	55	#include <sys/file.h>
	56	#include <sys/linker.h>
	57	#include <sys/stat.h>
	58	#include <sys/unistd.h>
	59	#include <sys/vnode.h>
	60	#include <sys/proc.h>
	61	#include <sys/priv.h>
	62	#include <sys/namei.h>
	63	#include <sys/nlookup.h>
	64	#include <sys/dirent.h>
	65	#include <sys/extattr.h>
	66	#include <sys/spinlock.h>
	67	#include <sys/kern_syscall.h>
	68	#include <sys/objcache.h>
	69	#include <sys/sysctl.h>
	70
	71	#include <sys/buf2.h>
	72	#include <sys/file2.h>
	73	#include <sys/spinlock2.h>
	74
	75	#include <vm/vm.h>
	76	#include <vm/vm_object.h>
	77	#include <vm/vm_page.h>
	78
	79	#include <machine/limits.h>
	80	#include <machine/stdarg.h>
	81
	82	#include <vfs/union/union.h>
	83
	84	static void mount_warning(struct mount mp, const char ctl, ...);
	85	static int mount_path(struct proc p, struct mount mp, char rb, char fb);
	86	static int checkvp_chdir (struct vnode vn, struct thread td);
	87	static void checkdirs (struct nchandle old_nch, struct nchandle new_nch);
	88	static int chroot_refuse_vdir_fds (struct filedesc *fdp);
	89	static int chroot_visible_mnt(struct mount mp, struct proc p);
	90	static int getutimes (const struct timeval , struct timespec );
	91	static int setfown (struct vnode *, uid_t, gid_t);
	92	static int setfmode (struct vnode *, int);
	93	static int setfflags (struct vnode *, int);
	94	static int setutimes (struct vnode , const struct timespec , int);
	95	static int usermount = 0; /* if 1, non-root can mount fs. */
	96
	97	int (union_dircheckp) (struct thread , struct vnode *, struct file );
	98
	99	SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
	100
	101	/*
	102	* Virtual File System System Calls
	103	*/
	104
	105	/*
	106	* Mount a file system.
	107	*/
	108	/*
	109	* mount_args(char type, char path, int flags, caddr_t data)
	110	*/
	111	/* ARGSUSED */
	112	int
	113	sys_mount(struct mount_args *uap)
	114	{
	115	struct thread *td = curthread;
	116	struct proc *p = td->td_proc;
	117	struct vnode *vp;
	118	struct nchandle nch;
	119	struct mount *mp;
	120	struct vfsconf *vfsp;
	121	int error, flag = 0, flag2 = 0;
	122	int hasmount;
	123	struct vattr va;
	124	struct nlookupdata nd;
	125	char fstypename[MFSNAMELEN];
	126	struct ucred *cred = p->p_ucred;
	127
	128	KKASSERT(p);
	129	if (jailed(cred))
	130	return (EPERM);
	131	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
	132	return (error);
	133	/*
	134	* Do not allow NFS export by non-root users.
	135	*/
	136	if (uap->flags & MNT_EXPORTED) {
	137	error = priv_check(td, PRIV_ROOT);
	138	if (error)
	139	return (error);
	140	}
	141	/*
	142	* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
	143	*/
	144	if (priv_check(td, PRIV_ROOT))
	145	uap->flags \|= MNT_NOSUID \| MNT_NODEV;
	146
	147	/*
	148	* Lookup the requested path and extract the nch and vnode.
	149	*/
	150	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	151	if (error == 0) {
	152	if ((error = nlookup(&nd)) == 0) {
	153	if (nd.nl_nch.ncp->nc_vp == NULL)
	154	error = ENOENT;
	155	}
	156	}
	157	if (error) {
	158	nlookup_done(&nd);
	159	return (error);
	160	}
	161
	162	/*
	163	* Extract the locked+refd ncp and cleanup the nd structure
	164	*/
	165	nch = nd.nl_nch;
	166	cache_zero(&nd.nl_nch);
	167	nlookup_done(&nd);
	168
	169	if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch))
	170	hasmount = 1;
	171	else
	172	hasmount = 0;
	173
	174
	175	/*
	176	* now we have the locked ref'd nch and unreferenced vnode.
	177	*/
	178	vp = nch.ncp->nc_vp;
	179	if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
	180	cache_put(&nch);
	181	return (error);
	182	}
	183	cache_unlock(&nch);
	184
	185	/*
	186	* Now we have an unlocked ref'd nch and a locked ref'd vp
	187	*/
	188	if (uap->flags & MNT_UPDATE) {
	189	if ((vp->v_flag & (VROOT\|VPFSROOT)) == 0) {
	190	cache_drop(&nch);
	191	vput(vp);
	192	return (EINVAL);
	193	}
	194	mp = vp->v_mount;
	195	flag = mp->mnt_flag;
	196	flag2 = mp->mnt_kern_flag;
	197	/*
	198	* We only allow the filesystem to be reloaded if it
	199	* is currently mounted read-only.
	200	*/
	201	if ((uap->flags & MNT_RELOAD) &&
	202	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	203	cache_drop(&nch);
	204	vput(vp);
	205	return (EOPNOTSUPP); /* Needs translation */
	206	}
	207	/*
	208	* Only root, or the user that did the original mount is
	209	* permitted to update it.
	210	*/
	211	if (mp->mnt_stat.f_owner != cred->cr_uid &&
	212	(error = priv_check(td, PRIV_ROOT))) {
	213	cache_drop(&nch);
	214	vput(vp);
	215	return (error);
	216	}
	217	if (vfs_busy(mp, LK_NOWAIT)) {
	218	cache_drop(&nch);
	219	vput(vp);
	220	return (EBUSY);
	221	}
	222	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	223	cache_drop(&nch);
	224	vfs_unbusy(mp);
	225	vput(vp);
	226	return (EBUSY);
	227	}
	228	vp->v_flag \|= VMOUNT;
	229	mp->mnt_flag \|=
	230	uap->flags & (MNT_RELOAD \| MNT_FORCE \| MNT_UPDATE);
	231	vn_unlock(vp);
	232	goto update;
	233	}
	234	/*
	235	* If the user is not root, ensure that they own the directory
	236	* onto which we are attempting to mount.
	237	*/
	238	if ((error = VOP_GETATTR(vp, &va)) \|\|
	239	(va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) {
	240	cache_drop(&nch);
	241	vput(vp);
	242	return (error);
	243	}
	244	if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
	245	cache_drop(&nch);
	246	vput(vp);
	247	return (error);
	248	}
	249	if (vp->v_type != VDIR) {
	250	cache_drop(&nch);
	251	vput(vp);
	252	return (ENOTDIR);
	253	}
	254	if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
	255	cache_drop(&nch);
	256	vput(vp);
	257	return (EPERM);
	258	}
	259	if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
	260	cache_drop(&nch);
	261	vput(vp);
	262	return (error);
	263	}
	264	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	265	if (!strcmp(vfsp->vfc_name, fstypename))
	266	break;
	267	}
	268	if (vfsp == NULL) {
	269	linker_file_t lf;
	270
	271	/* Only load modules for root (very important!) */
	272	if ((error = priv_check(td, PRIV_ROOT)) != 0) {
	273	cache_drop(&nch);
	274	vput(vp);
	275	return error;
	276	}
	277	error = linker_load_file(fstypename, &lf);
	278	if (error \|\| lf == NULL) {
	279	cache_drop(&nch);
	280	vput(vp);
	281	if (lf == NULL)
	282	error = ENODEV;
	283	return error;
	284	}
	285	lf->userrefs++;
	286	/* lookup again, see if the VFS was loaded */
	287	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	288	if (!strcmp(vfsp->vfc_name, fstypename))
	289	break;
	290	}
	291	if (vfsp == NULL) {
	292	lf->userrefs--;
	293	linker_file_unload(lf);
	294	cache_drop(&nch);
	295	vput(vp);
	296	return (ENODEV);
	297	}
	298	}
	299	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	300	cache_drop(&nch);
	301	vput(vp);
	302	return (EBUSY);
	303	}
	304	vp->v_flag \|= VMOUNT;
	305
	306	/*
	307	* Allocate and initialize the filesystem.
	308	*/
	309	mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO\|M_WAITOK);
	310	TAILQ_INIT(&mp->mnt_nvnodelist);
	311	TAILQ_INIT(&mp->mnt_reservedvnlist);
	312	TAILQ_INIT(&mp->mnt_jlist);
	313	mp->mnt_nvnodelistsize = 0;
	314	lockinit(&mp->mnt_lock, "vfslock", 0, 0);
	315	vfs_busy(mp, LK_NOWAIT);
	316	mp->mnt_op = vfsp->vfc_vfsops;
	317	mp->mnt_vfc = vfsp;
	318	vfsp->vfc_refcount++;
	319	mp->mnt_stat.f_type = vfsp->vfc_typenum;
	320	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	321	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
	322	mp->mnt_stat.f_owner = cred->cr_uid;
	323	mp->mnt_iosize_max = DFLTPHYS;
	324	vn_unlock(vp);
	325	update:
	326	/*
	327	* Set the mount level flags.
	328	*/
	329	if (uap->flags & MNT_RDONLY)
	330	mp->mnt_flag \|= MNT_RDONLY;
	331	else if (mp->mnt_flag & MNT_RDONLY)
	332	mp->mnt_kern_flag \|= MNTK_WANTRDWR;
	333	mp->mnt_flag &=~ (MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	334	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_NOATIME \|
	335	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	336	MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	337	mp->mnt_flag \|= uap->flags & (MNT_NOSUID \| MNT_NOEXEC \|
	338	MNT_NODEV \| MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_FORCE \|
	339	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	340	MNT_NOATIME \| MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	341	/*
	342	* Mount the filesystem.
	343	* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
	344	* get.
	345	*/
	346	error = VFS_MOUNT(mp, uap->path, uap->data, cred);
	347	if (mp->mnt_flag & MNT_UPDATE) {
	348	if (mp->mnt_kern_flag & MNTK_WANTRDWR)
	349	mp->mnt_flag &= ~MNT_RDONLY;
	350	mp->mnt_flag &=~ (MNT_UPDATE \| MNT_RELOAD \| MNT_FORCE);
	351	mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
	352	if (error) {
	353	mp->mnt_flag = flag;
	354	mp->mnt_kern_flag = flag2;
	355	}
	356	vfs_unbusy(mp);
	357	vp->v_flag &= ~VMOUNT;
	358	vrele(vp);
	359	cache_drop(&nch);
	360	return (error);
	361	}
	362	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	363	/*
	364	* Put the new filesystem on the mount list after root. The mount
	365	* point gets its own mnt_ncmountpt (unless the VFS already set one
	366	* up) which represents the root of the mount. The lookup code
	367	* detects the mount point going forward and checks the root of
	368	* the mount going backwards.
	369	*
	370	* It is not necessary to invalidate or purge the vnode underneath
	371	* because elements under the mount will be given their own glue
	372	* namecache record.
	373	*/
	374	if (!error) {
	375	if (mp->mnt_ncmountpt.ncp == NULL) {
	376	/*
	377	* allocate, then unlock, but leave the ref intact
	378	*/
	379	cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
	380	cache_unlock(&mp->mnt_ncmountpt);
	381	}
	382	mp->mnt_ncmounton = nch; /* inherits ref */
	383	nch.ncp->nc_flag \|= NCF_ISMOUNTPT;
	384
	385	/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
	386	vp->v_flag &= ~VMOUNT;
	387	mountlist_insert(mp, MNTINS_LAST);
	388	checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
	389	vn_unlock(vp);
	390	error = vfs_allocate_syncvnode(mp);
	391	vfs_unbusy(mp);
	392	error = VFS_START(mp, 0);
	393	vrele(vp);
	394	} else {
	395	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	396	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	397	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	398	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	399	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	400	vp->v_flag &= ~VMOUNT;
	401	mp->mnt_vfc->vfc_refcount--;
	402	vfs_unbusy(mp);
	403	kfree(mp, M_MOUNT);
	404	cache_drop(&nch);
	405	vput(vp);
	406	}
	407	return (error);
	408	}
	409
	410	/*
	411	* Scan all active processes to see if any of them have a current
	412	* or root directory onto which the new filesystem has just been
	413	* mounted. If so, replace them with the new mount point.
	414	*
	415	* The passed ncp is ref'd and locked (from the mount code) and
	416	* must be associated with the vnode representing the root of the
	417	* mount point.
	418	*/
	419	struct checkdirs_info {
	420	struct nchandle old_nch;
	421	struct nchandle new_nch;
	422	struct vnode *old_vp;
	423	struct vnode *new_vp;
	424	};
	425
	426	static int checkdirs_callback(struct proc p, void data);
	427
	428	static void
	429	checkdirs(struct nchandle old_nch, struct nchandle new_nch)
	430	{
	431	struct checkdirs_info info;
	432	struct vnode *olddp;
	433	struct vnode *newdp;
	434	struct mount *mp;
	435
	436	/*
	437	* If the old mount point's vnode has a usecount of 1, it is not
	438	* being held as a descriptor anywhere.
	439	*/
	440	olddp = old_nch->ncp->nc_vp;
	441	if (olddp == NULL \|\| olddp->v_sysref.refcnt == 1)
	442	return;
	443
	444	/*
	445	* Force the root vnode of the new mount point to be resolved
	446	* so we can update any matching processes.
	447	*/
	448	mp = new_nch->mount;
	449	if (VFS_ROOT(mp, &newdp))
	450	panic("mount: lost mount");
	451	cache_setunresolved(new_nch);
	452	cache_setvp(new_nch, newdp);
	453
	454	/*
	455	* Special handling of the root node
	456	*/
	457	if (rootvnode == olddp) {
	458	vref(newdp);
	459	vfs_cache_setroot(newdp, cache_hold(new_nch));
	460	}
	461
	462	/*
	463	* Pass newdp separately so the callback does not have to access
	464	* it via new_nch->ncp->nc_vp.
	465	*/
	466	info.old_nch = *old_nch;
	467	info.new_nch = *new_nch;
	468	info.new_vp = newdp;
	469	allproc_scan(checkdirs_callback, &info);
	470	vput(newdp);
	471	}
	472
	473	/*
	474	* NOTE: callback is not MP safe because the scanned process's filedesc
	475	* structure can be ripped out from under us, amoung other things.
	476	*/
	477	static int
	478	checkdirs_callback(struct proc p, void data)
	479	{
	480	struct checkdirs_info *info = data;
	481	struct filedesc *fdp;
	482	struct nchandle ncdrop1;
	483	struct nchandle ncdrop2;
	484	struct vnode *vprele1;
	485	struct vnode *vprele2;
	486
	487	if ((fdp = p->p_fd) != NULL) {
	488	cache_zero(&ncdrop1);
	489	cache_zero(&ncdrop2);
	490	vprele1 = NULL;
	491	vprele2 = NULL;
	492
	493	/*
	494	* MPUNSAFE - XXX fdp can be pulled out from under a
	495	* foreign process.
	496	*
	497	* A shared filedesc is ok, we don't have to copy it
	498	* because we are making this change globally.
	499	*/
	500	spin_lock_wr(&fdp->fd_spin);
	501	if (fdp->fd_ncdir.mount == info->old_nch.mount &&
	502	fdp->fd_ncdir.ncp == info->old_nch.ncp) {
	503	vprele1 = fdp->fd_cdir;
	504	vref(info->new_vp);
	505	fdp->fd_cdir = info->new_vp;
	506	ncdrop1 = fdp->fd_ncdir;
	507	cache_copy(&info->new_nch, &fdp->fd_ncdir);
	508	}
	509	if (fdp->fd_nrdir.mount == info->old_nch.mount &&
	510	fdp->fd_nrdir.ncp == info->old_nch.ncp) {
	511	vprele2 = fdp->fd_rdir;
	512	vref(info->new_vp);
	513	fdp->fd_rdir = info->new_vp;
	514	ncdrop2 = fdp->fd_nrdir;
	515	cache_copy(&info->new_nch, &fdp->fd_nrdir);
	516	}
	517	spin_unlock_wr(&fdp->fd_spin);
	518	if (ncdrop1.ncp)
	519	cache_drop(&ncdrop1);
	520	if (ncdrop2.ncp)
	521	cache_drop(&ncdrop2);
	522	if (vprele1)
	523	vrele(vprele1);
	524	if (vprele2)
	525	vrele(vprele2);
	526	}
	527	return(0);
	528	}
	529
	530	/*
	531	* Unmount a file system.
	532	*
	533	* Note: unmount takes a path to the vnode mounted on as argument,
	534	* not special file (as before).
	535	*/
	536	/*
	537	* umount_args(char *path, int flags)
	538	*/
	539	/* ARGSUSED */
	540	int
	541	sys_unmount(struct unmount_args *uap)
	542	{
	543	struct thread *td = curthread;
	544	struct proc *p = td->td_proc;
	545	struct mount *mp = NULL;
	546	int error;
	547	struct nlookupdata nd;
	548
	549	KKASSERT(p);
	550	if (p->p_ucred->cr_prison != NULL)
	551	return (EPERM);
	552	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
	553	return (error);
	554
	555	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	556	if (error == 0)
	557	error = nlookup(&nd);
	558	if (error)
	559	goto out;
	560
	561	mp = nd.nl_nch.mount;
	562
	563	/*
	564	* Only root, or the user that did the original mount is
	565	* permitted to unmount this filesystem.
	566	*/
	567	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
	568	(error = priv_check(td, PRIV_ROOT)))
	569	goto out;
	570
	571	/*
	572	* Don't allow unmounting the root file system.
	573	*/
	574	if (mp->mnt_flag & MNT_ROOTFS) {
	575	error = EINVAL;
	576	goto out;
	577	}
	578
	579	/*
	580	* Must be the root of the filesystem
	581	*/
	582	if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
	583	error = EINVAL;
	584	goto out;
	585	}
	586
	587	out:
	588	nlookup_done(&nd);
	589	if (error)
	590	return (error);
	591	return (dounmount(mp, uap->flags));
	592	}
	593
	594	/*
	595	* Do the actual file system unmount.
	596	*/
	597	static int
	598	dounmount_interlock(struct mount *mp)
	599	{
	600	if (mp->mnt_kern_flag & MNTK_UNMOUNT)
	601	return (EBUSY);
	602	mp->mnt_kern_flag \|= MNTK_UNMOUNT;
	603	return(0);
	604	}
	605
	606	int
	607	dounmount(struct mount *mp, int flags)
	608	{
	609	struct namecache *ncp;
	610	struct nchandle nch;
	611	struct vnode *vp;
	612	int error;
	613	int async_flag;
	614	int lflags;
	615	int freeok = 1;
	616
	617	/*
	618	* Exclusive access for unmounting purposes
	619	*/
	620	if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
	621	return (error);
	622
	623	/*
	624	* Allow filesystems to detect that a forced unmount is in progress.
	625	*/
	626	if (flags & MNT_FORCE)
	627	mp->mnt_kern_flag \|= MNTK_UNMOUNTF;
	628	lflags = LK_EXCLUSIVE \| ((flags & MNT_FORCE) ? 0 : LK_NOWAIT);
	629	error = lockmgr(&mp->mnt_lock, lflags);
	630	if (error) {
	631	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	632	if (mp->mnt_kern_flag & MNTK_MWAIT)
	633	wakeup(mp);
	634	return (error);
	635	}
	636
	637	if (mp->mnt_flag & MNT_EXPUBLIC)
	638	vfs_setpublicfs(NULL, NULL, NULL);
	639
	640	vfs_msync(mp, MNT_WAIT);
	641	async_flag = mp->mnt_flag & MNT_ASYNC;
	642	mp->mnt_flag &=~ MNT_ASYNC;
	643
	644	/*
	645	* If this filesystem isn't aliasing other filesystems,
	646	* try to invalidate any remaining namecache entries and
	647	* check the count afterwords.
	648	*/
	649	if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
	650	cache_lock(&mp->mnt_ncmountpt);
	651	cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY\|CINV_CHILDREN);
	652	cache_unlock(&mp->mnt_ncmountpt);
	653
	654	if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
	655	(ncp->nc_refs != 1 \|\| TAILQ_FIRST(&ncp->nc_list))) {
	656
	657	if ((flags & MNT_FORCE) == 0) {
	658	error = EBUSY;
	659	mount_warning(mp, "Cannot unmount: "
	660	"%d namecache "
	661	"references still "
	662	"present",
	663	ncp->nc_refs - 1);
	664	} else {
	665	mount_warning(mp, "Forced unmount: "
	666	"%d namecache "
	667	"references still "
	668	"present",
	669	ncp->nc_refs - 1);
	670	freeok = 0;
	671	}
	672	}
	673	}
	674
	675	/*
	676	* nchandle records ref the mount structure. Expect a count of 1
	677	* (our mount->mnt_ncmountpt).
	678	*/
	679	if (mp->mnt_refs != 1) {
	680	if ((flags & MNT_FORCE) == 0) {
	681	mount_warning(mp, "Cannot unmount: "
	682	"%d process references still "
	683	"present", mp->mnt_refs);
	684	error = EBUSY;
	685	} else {
	686	mount_warning(mp, "Forced unmount: "
	687	"%d process references still "
	688	"present", mp->mnt_refs);
	689	freeok = 0;
	690	}
	691	}
	692
	693	/*
	694	* Decomission our special mnt_syncer vnode. This also stops
	695	* the vnlru code. If we are unable to unmount we recommission
	696	* the vnode.
	697	*/
	698	if (error == 0) {
	699	if ((vp = mp->mnt_syncer) != NULL) {
	700	mp->mnt_syncer = NULL;
	701	vrele(vp);
	702	}
	703	if (((mp->mnt_flag & MNT_RDONLY) \|\|
	704	(error = VFS_SYNC(mp, MNT_WAIT)) == 0) \|\|
	705	(flags & MNT_FORCE)) {
	706	error = VFS_UNMOUNT(mp, flags);
	707	}
	708	}
	709	if (error) {
	710	if (mp->mnt_syncer == NULL)
	711	vfs_allocate_syncvnode(mp);
	712	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	713	mp->mnt_flag \|= async_flag;
	714	lockmgr(&mp->mnt_lock, LK_RELEASE);
	715	if (mp->mnt_kern_flag & MNTK_MWAIT)
	716	wakeup(mp);
	717	return (error);
	718	}
	719	/*
	720	* Clean up any journals still associated with the mount after
	721	* filesystem activity has ceased.
	722	*/
	723	journal_remove_all_journals(mp,
	724	((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
	725
	726	mountlist_remove(mp);
	727
	728	/*
	729	* Remove any installed vnode ops here so the individual VFSs don't
	730	* have to.
	731	*/
	732	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	733	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	734	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	735	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	736	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	737
	738	if (mp->mnt_ncmountpt.ncp != NULL) {
	739	nch = mp->mnt_ncmountpt;
	740	cache_zero(&mp->mnt_ncmountpt);
	741	cache_clrmountpt(&nch);
	742	cache_drop(&nch);
	743	}
	744	if (mp->mnt_ncmounton.ncp != NULL) {
	745	nch = mp->mnt_ncmounton;
	746	cache_zero(&mp->mnt_ncmounton);
	747	cache_clrmountpt(&nch);
	748	cache_drop(&nch);
	749	}
	750
	751	mp->mnt_vfc->vfc_refcount--;
	752	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
	753	panic("unmount: dangling vnode");
	754	lockmgr(&mp->mnt_lock, LK_RELEASE);
	755	if (mp->mnt_kern_flag & MNTK_MWAIT)
	756	wakeup(mp);
	757	if (freeok)
	758	kfree(mp, M_MOUNT);
	759	return (0);
	760	}
	761
	762	static
	763	void
	764	mount_warning(struct mount mp, const char ctl, ...)
	765	{
	766	char *ptr;
	767	char *buf;
	768	__va_list va;
	769
	770	__va_start(va, ctl);
	771	if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) {
	772	kprintf("unmount(%s): ", ptr);
	773	kvprintf(ctl, va);
	774	kprintf("\n");
	775	kfree(buf, M_TEMP);
	776	} else {
	777	kprintf("unmount(%p", mp);
	778	if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
	779	kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
	780	kprintf("): ");
	781	kvprintf(ctl, va);
	782	kprintf("\n");
	783	}
	784	__va_end(va);
	785	}
	786
	787	/*
	788	* Shim cache_fullpath() to handle the case where a process is chrooted into
	789	* a subdirectory of a mount. In this case if the root mount matches the
	790	* process root directory's mount we have to specify the process's root
	791	* directory instead of the mount point, because the mount point might
	792	* be above the root directory.
	793	*/
	794	static
	795	int
	796	mount_path(struct proc p, struct mount mp, char rb, char fb)
	797	{
	798	struct nchandle *nch;
	799
	800	if (p && p->p_fd->fd_nrdir.mount == mp)
	801	nch = &p->p_fd->fd_nrdir;
	802	else
	803	nch = &mp->mnt_ncmountpt;
	804	return(cache_fullpath(p, nch, rb, fb));
	805	}
	806
	807	/*
	808	* Sync each mounted filesystem.
	809	*/
	810
	811	#ifdef DEBUG
	812	static int syncprt = 0;
	813	SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
	814	#endif /* DEBUG */
	815
	816	static int sync_callback(struct mount mp, void data);
	817
	818	/* ARGSUSED */
	819	int
	820	sys_sync(struct sync_args *uap)
	821	{
	822	mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
	823	#ifdef DEBUG
	824	/*
	825	* print out buffer pool stat information on each sync() call.
	826	*/
	827	if (syncprt)
	828	vfs_bufstats();
	829	#endif /* DEBUG */
	830	return (0);
	831	}
	832
	833	static
	834	int
	835	sync_callback(struct mount mp, void data __unused)
	836	{
	837	int asyncflag;
	838
	839	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	840	asyncflag = mp->mnt_flag & MNT_ASYNC;
	841	mp->mnt_flag &= ~MNT_ASYNC;
	842	vfs_msync(mp, MNT_NOWAIT);
	843	VFS_SYNC(mp, MNT_NOWAIT);
	844	mp->mnt_flag \|= asyncflag;
	845	}
	846	return(0);
	847	}
	848
	849	/* XXX PRISON: could be per prison flag */
	850	static int prison_quotas;
	851	#if 0
	852	SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
	853	#endif
	854
	855	/*
	856	* quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
	857	*
	858	* Change filesystem quotas.
	859	*/
	860	/* ARGSUSED */
	861	int
	862	sys_quotactl(struct quotactl_args *uap)
	863	{
	864	struct nlookupdata nd;
	865	struct thread *td;
	866	struct proc *p;
	867	struct mount *mp;
	868	int error;
	869
	870	td = curthread;
	871	p = td->td_proc;
	872	if (p->p_ucred->cr_prison && !prison_quotas)
	873	return (EPERM);
	874
	875	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	876	if (error == 0)
	877	error = nlookup(&nd);
	878	if (error == 0) {
	879	mp = nd.nl_nch.mount;
	880	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
	881	uap->arg, nd.nl_cred);
	882	}
	883	nlookup_done(&nd);
	884	return (error);
	885	}
	886
	887	/*
	888	* mountctl(char path, int op, int fd, const void ctl, int ctllen,
	889	* void *buf, int buflen)
	890	*
	891	* This function operates on a mount point and executes the specified
	892	* operation using the specified control data, and possibly returns data.
	893	*
	894	* The actual number of bytes stored in the result buffer is returned, 0
	895	* if none, otherwise an error is returned.
	896	*/
	897	/* ARGSUSED */
	898	int
	899	sys_mountctl(struct mountctl_args *uap)
	900	{
	901	struct thread *td = curthread;
	902	struct proc *p = td->td_proc;
	903	struct file *fp;
	904	void *ctl = NULL;
	905	void *buf = NULL;
	906	char *path = NULL;
	907	int error;
	908
	909	/*
	910	* Sanity and permissions checks. We must be root.
	911	*/
	912	KKASSERT(p);
	913	if (p->p_ucred->cr_prison != NULL)
	914	return (EPERM);
	915	if ((error = priv_check(td, PRIV_ROOT)) != 0)
	916	return (error);
	917
	918	/*
	919	* Argument length checks
	920	*/
	921	if (uap->ctllen < 0 \|\| uap->ctllen > 1024)
	922	return (EINVAL);
	923	if (uap->buflen < 0 \|\| uap->buflen > 16 * 1024)
	924	return (EINVAL);
	925	if (uap->path == NULL)
	926	return (EINVAL);
	927
	928	/*
	929	* Allocate the necessary buffers and copyin data
	930	*/
	931	path = objcache_get(namei_oc, M_WAITOK);
	932	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	933	if (error)
	934	goto done;
	935
	936	if (uap->ctllen) {
	937	ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	938	error = copyin(uap->ctl, ctl, uap->ctllen);
	939	if (error)
	940	goto done;
	941	}
	942	if (uap->buflen)
	943	buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	944
	945	/*
	946	* Validate the descriptor
	947	*/
	948	if (uap->fd >= 0) {
	949	fp = holdfp(p->p_fd, uap->fd, -1);
	950	if (fp == NULL) {
	951	error = EBADF;
	952	goto done;
	953	}
	954	} else {
	955	fp = NULL;
	956	}
	957
	958	/*
	959	* Execute the internal kernel function and clean up.
	960	*/
	961	error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
	962	if (fp)
	963	fdrop(fp);
	964	if (error == 0 && uap->sysmsg_result > 0)
	965	error = copyout(buf, uap->buf, uap->sysmsg_result);
	966	done:
	967	if (path)
	968	objcache_put(namei_oc, path);
	969	if (ctl)
	970	kfree(ctl, M_TEMP);
	971	if (buf)
	972	kfree(buf, M_TEMP);
	973	return (error);
	974	}
	975
	976	/*
	977	* Execute a mount control operation by resolving the path to a mount point
	978	* and calling vop_mountctl().
	979	*
	980	* Use the mount point from the nch instead of the vnode so nullfs mounts
	981	* can properly spike the VOP.
	982	*/
	983	int
	984	kern_mountctl(const char path, int op, struct file fp,
	985	const void *ctl, int ctllen,
	986	void buf, int buflen, int res)
	987	{
	988	struct vnode *vp;
	989	struct mount *mp;
	990	struct nlookupdata nd;
	991	int error;
	992
	993	*res = 0;
	994	vp = NULL;
	995	error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
	996	if (error == 0)
	997	error = nlookup(&nd);
	998	if (error == 0)
	999	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	1000	mp = nd.nl_nch.mount;
	1001	nlookup_done(&nd);
	1002	if (error)
	1003	return (error);
	1004
	1005	/*
	1006	* Must be the root of the filesystem
	1007	*/
	1008	if ((vp->v_flag & (VROOT\|VPFSROOT)) == 0) {
	1009	vput(vp);
	1010	return (EINVAL);
	1011	}
	1012	error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen,
	1013	buf, buflen, res);
	1014	vput(vp);
	1015	return (error);
	1016	}
	1017
	1018	int
	1019	kern_statfs(struct nlookupdata nd, struct statfs buf)
	1020	{
	1021	struct thread *td = curthread;
	1022	struct proc *p = td->td_proc;
	1023	struct mount *mp;
	1024	struct statfs *sp;
	1025	char fullpath, freepath;
	1026	int error;
	1027
	1028	if ((error = nlookup(nd)) != 0)
	1029	return (error);
	1030	mp = nd->nl_nch.mount;
	1031	sp = &mp->mnt_stat;
	1032	if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
	1033	return (error);
	1034
	1035	error = mount_path(p, mp, &fullpath, &freepath);
	1036	if (error)
	1037	return(error);
	1038	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1039	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1040	kfree(freepath, M_TEMP);
	1041
	1042	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1043	bcopy(sp, buf, sizeof(*buf));
	1044	/* Only root should have access to the fsid's. */
	1045	if (priv_check(td, PRIV_ROOT))
	1046	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1047	return (0);
	1048	}
	1049
	1050	/*
	1051	* statfs_args(char path, struct statfs buf)
	1052	*
	1053	* Get filesystem statistics.
	1054	*/
	1055	int
	1056	sys_statfs(struct statfs_args *uap)
	1057	{
	1058	struct nlookupdata nd;
	1059	struct statfs buf;
	1060	int error;
	1061
	1062	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1063	if (error == 0)
	1064	error = kern_statfs(&nd, &buf);
	1065	nlookup_done(&nd);
	1066	if (error == 0)
	1067	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1068	return (error);
	1069	}
	1070
	1071	int
	1072	kern_fstatfs(int fd, struct statfs *buf)
	1073	{
	1074	struct thread *td = curthread;
	1075	struct proc *p = td->td_proc;
	1076	struct file *fp;
	1077	struct mount *mp;
	1078	struct statfs *sp;
	1079	char fullpath, freepath;
	1080	int error;
	1081
	1082	KKASSERT(p);
	1083	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1084	return (error);
	1085	mp = ((struct vnode *)fp->f_data)->v_mount;
	1086	if (mp == NULL) {
	1087	error = EBADF;
	1088	goto done;
	1089	}
	1090	if (fp->f_cred == NULL) {
	1091	error = EINVAL;
	1092	goto done;
	1093	}
	1094	sp = &mp->mnt_stat;
	1095	if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
	1096	goto done;
	1097
	1098	if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
	1099	goto done;
	1100	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1101	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1102	kfree(freepath, M_TEMP);
	1103
	1104	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1105	bcopy(sp, buf, sizeof(*buf));
	1106
	1107	/* Only root should have access to the fsid's. */
	1108	if (priv_check(td, PRIV_ROOT))
	1109	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1110	error = 0;
	1111	done:
	1112	fdrop(fp);
	1113	return (error);
	1114	}
	1115
	1116	/*
	1117	* fstatfs_args(int fd, struct statfs *buf)
	1118	*
	1119	* Get filesystem statistics.
	1120	*/
	1121	int
	1122	sys_fstatfs(struct fstatfs_args *uap)
	1123	{
	1124	struct statfs buf;
	1125	int error;
	1126
	1127	error = kern_fstatfs(uap->fd, &buf);
	1128
	1129	if (error == 0)
	1130	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1131	return (error);
	1132	}
	1133
	1134	int
	1135	kern_statvfs(struct nlookupdata nd, struct statvfs buf)
	1136	{
	1137	struct mount *mp;
	1138	struct statvfs *sp;
	1139	int error;
	1140
	1141	if ((error = nlookup(nd)) != 0)
	1142	return (error);
	1143	mp = nd->nl_nch.mount;
	1144	sp = &mp->mnt_vstat;
	1145	if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
	1146	return (error);
	1147
	1148	sp->f_flag = 0;
	1149	if (mp->mnt_flag & MNT_RDONLY)
	1150	sp->f_flag \|= ST_RDONLY;
	1151	if (mp->mnt_flag & MNT_NOSUID)
	1152	sp->f_flag \|= ST_NOSUID;
	1153	bcopy(sp, buf, sizeof(*buf));
	1154	return (0);
	1155	}
	1156
	1157	/*
	1158	* statfs_args(char path, struct statfs buf)
	1159	*
	1160	* Get filesystem statistics.
	1161	*/
	1162	int
	1163	sys_statvfs(struct statvfs_args *uap)
	1164	{
	1165	struct nlookupdata nd;
	1166	struct statvfs buf;
	1167	int error;
	1168
	1169	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1170	if (error == 0)
	1171	error = kern_statvfs(&nd, &buf);
	1172	nlookup_done(&nd);
	1173	if (error == 0)
	1174	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1175	return (error);
	1176	}
	1177
	1178	int
	1179	kern_fstatvfs(int fd, struct statvfs *buf)
	1180	{
	1181	struct thread *td = curthread;
	1182	struct proc *p = td->td_proc;
	1183	struct file *fp;
	1184	struct mount *mp;
	1185	struct statvfs *sp;
	1186	int error;
	1187
	1188	KKASSERT(p);
	1189	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1190	return (error);
	1191	mp = ((struct vnode *)fp->f_data)->v_mount;
	1192	if (mp == NULL) {
	1193	error = EBADF;
	1194	goto done;
	1195	}
	1196	if (fp->f_cred == NULL) {
	1197	error = EINVAL;
	1198	goto done;
	1199	}
	1200	sp = &mp->mnt_vstat;
	1201	if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
	1202	goto done;
	1203
	1204	sp->f_flag = 0;
	1205	if (mp->mnt_flag & MNT_RDONLY)
	1206	sp->f_flag \|= ST_RDONLY;
	1207	if (mp->mnt_flag & MNT_NOSUID)
	1208	sp->f_flag \|= ST_NOSUID;
	1209
	1210	bcopy(sp, buf, sizeof(*buf));
	1211	error = 0;
	1212	done:
	1213	fdrop(fp);
	1214	return (error);
	1215	}
	1216
	1217	/*
	1218	* fstatfs_args(int fd, struct statfs *buf)
	1219	*
	1220	* Get filesystem statistics.
	1221	*/
	1222	int
	1223	sys_fstatvfs(struct fstatvfs_args *uap)
	1224	{
	1225	struct statvfs buf;
	1226	int error;
	1227
	1228	error = kern_fstatvfs(uap->fd, &buf);
	1229
	1230	if (error == 0)
	1231	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1232	return (error);
	1233	}
	1234
	1235	/*
	1236	* getfsstat_args(struct statfs *buf, long bufsize, int flags)
	1237	*
	1238	* Get statistics on all filesystems.
	1239	*/
	1240
	1241	struct getfsstat_info {
	1242	struct statfs *sfsp;
	1243	long count;
	1244	long maxcount;
	1245	int error;
	1246	int flags;
	1247	struct proc *p;
	1248	};
	1249
	1250	static int getfsstat_callback(struct mount , void );
	1251
	1252	/* ARGSUSED */
	1253	int
	1254	sys_getfsstat(struct getfsstat_args *uap)
	1255	{
	1256	struct thread *td = curthread;
	1257	struct proc *p = td->td_proc;
	1258	struct getfsstat_info info;
	1259
	1260	bzero(&info, sizeof(info));
	1261
	1262	info.maxcount = uap->bufsize / sizeof(struct statfs);
	1263	info.sfsp = uap->buf;
	1264	info.count = 0;
	1265	info.flags = uap->flags;
	1266	info.p = p;
	1267
	1268	mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
	1269	if (info.sfsp && info.count > info.maxcount)
	1270	uap->sysmsg_result = info.maxcount;
	1271	else
	1272	uap->sysmsg_result = info.count;
	1273	return (info.error);
	1274	}
	1275
	1276	static int
	1277	getfsstat_callback(struct mount mp, void data)
	1278	{
	1279	struct getfsstat_info *info = data;
	1280	struct statfs *sp;
	1281	char *freepath;
	1282	char *fullpath;
	1283	int error;
	1284
	1285	if (info->sfsp && info->count < info->maxcount) {
	1286	if (info->p && !chroot_visible_mnt(mp, info->p))
	1287	return(0);
	1288	sp = &mp->mnt_stat;
	1289
	1290	/*
	1291	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1292	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1293	* overrides MNT_WAIT.
	1294	*/
	1295	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1296	(info->flags & MNT_WAIT)) &&
	1297	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1298	return(0);
	1299	}
	1300	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1301
	1302	error = mount_path(info->p, mp, &fullpath, &freepath);
	1303	if (error) {
	1304	info->error = error;
	1305	return(-1);
	1306	}
	1307	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1308	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1309	kfree(freepath, M_TEMP);
	1310
	1311	error = copyout(sp, info->sfsp, sizeof(*sp));
	1312	if (error) {
	1313	info->error = error;
	1314	return (-1);
	1315	}
	1316	++info->sfsp;
	1317	}
	1318	info->count++;
	1319	return(0);
	1320	}
	1321
	1322	/*
	1323	* getvfsstat_args(struct statfs buf, struct statvfs vbuf,
	1324	long bufsize, int flags)
	1325	*
	1326	* Get statistics on all filesystems.
	1327	*/
	1328
	1329	struct getvfsstat_info {
	1330	struct statfs *sfsp;
	1331	struct statvfs *vsfsp;
	1332	long count;
	1333	long maxcount;
	1334	int error;
	1335	int flags;
	1336	struct proc *p;
	1337	};
	1338
	1339	static int getvfsstat_callback(struct mount , void );
	1340
	1341	/* ARGSUSED */
	1342	int
	1343	sys_getvfsstat(struct getvfsstat_args *uap)
	1344	{
	1345	struct thread *td = curthread;
	1346	struct proc *p = td->td_proc;
	1347	struct getvfsstat_info info;
	1348
	1349	bzero(&info, sizeof(info));
	1350
	1351	info.maxcount = uap->vbufsize / sizeof(struct statvfs);
	1352	info.sfsp = uap->buf;
	1353	info.vsfsp = uap->vbuf;
	1354	info.count = 0;
	1355	info.flags = uap->flags;
	1356	info.p = p;
	1357
	1358	mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
	1359	if (info.vsfsp && info.count > info.maxcount)
	1360	uap->sysmsg_result = info.maxcount;
	1361	else
	1362	uap->sysmsg_result = info.count;
	1363	return (info.error);
	1364	}
	1365
	1366	static int
	1367	getvfsstat_callback(struct mount mp, void data)
	1368	{
	1369	struct getvfsstat_info *info = data;
	1370	struct statfs *sp;
	1371	struct statvfs *vsp;
	1372	char *freepath;
	1373	char *fullpath;
	1374	int error;
	1375
	1376	if (info->vsfsp && info->count < info->maxcount) {
	1377	if (info->p && !chroot_visible_mnt(mp, info->p))
	1378	return(0);
	1379	sp = &mp->mnt_stat;
	1380	vsp = &mp->mnt_vstat;
	1381
	1382	/*
	1383	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1384	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1385	* overrides MNT_WAIT.
	1386	*/
	1387	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1388	(info->flags & MNT_WAIT)) &&
	1389	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1390	return(0);
	1391	}
	1392	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1393
	1394	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1395	(info->flags & MNT_WAIT)) &&
	1396	(error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) {
	1397	return(0);
	1398	}
	1399	vsp->f_flag = 0;
	1400	if (mp->mnt_flag & MNT_RDONLY)
	1401	vsp->f_flag \|= ST_RDONLY;
	1402	if (mp->mnt_flag & MNT_NOSUID)
	1403	vsp->f_flag \|= ST_NOSUID;
	1404
	1405	error = mount_path(info->p, mp, &fullpath, &freepath);
	1406	if (error) {
	1407	info->error = error;
	1408	return(-1);
	1409	}
	1410	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1411	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1412	kfree(freepath, M_TEMP);
	1413
	1414	error = copyout(sp, info->sfsp, sizeof(*sp));
	1415	if (error == 0)
	1416	error = copyout(vsp, info->vsfsp, sizeof(*vsp));
	1417	if (error) {
	1418	info->error = error;
	1419	return (-1);
	1420	}
	1421	++info->sfsp;
	1422	++info->vsfsp;
	1423	}
	1424	info->count++;
	1425	return(0);
	1426	}
	1427
	1428
	1429	/*
	1430	* fchdir_args(int fd)
	1431	*
	1432	* Change current working directory to a given file descriptor.
	1433	*/
	1434	/* ARGSUSED */
	1435	int
	1436	sys_fchdir(struct fchdir_args *uap)
	1437	{
	1438	struct thread *td = curthread;
	1439	struct proc *p = td->td_proc;
	1440	struct filedesc *fdp = p->p_fd;
	1441	struct vnode vp, ovp;
	1442	struct mount *mp;
	1443	struct file *fp;
	1444	struct nchandle nch, onch, tnch;
	1445	int error;
	1446
	1447	if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
	1448	return (error);
	1449	vp = (struct vnode *)fp->f_data;
	1450	vref(vp);
	1451	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	1452	if (vp->v_type != VDIR \|\| fp->f_nchandle.ncp == NULL)
	1453	error = ENOTDIR;
	1454	else
	1455	error = VOP_ACCESS(vp, VEXEC, p->p_ucred);
	1456	if (error) {
	1457	vput(vp);
	1458	fdrop(fp);
	1459	return (error);
	1460	}
	1461	cache_copy(&fp->f_nchandle, &nch);
	1462
	1463	/*
	1464	* If the ncp has become a mount point, traverse through
	1465	* the mount point.
	1466	*/
	1467
	1468	while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
	1469	(mp = cache_findmount(&nch)) != NULL
	1470	) {
	1471	error = nlookup_mp(mp, &tnch);
	1472	if (error == 0) {
	1473	cache_unlock(&tnch); /* leave ref intact */
	1474	vput(vp);
	1475	vp = tnch.ncp->nc_vp;
	1476	error = vget(vp, LK_SHARED);
	1477	KKASSERT(error == 0);
	1478	cache_drop(&nch);
	1479	nch = tnch;
	1480	}
	1481	}
	1482	if (error == 0) {
	1483	ovp = fdp->fd_cdir;
	1484	onch = fdp->fd_ncdir;
	1485	vn_unlock(vp); /* leave ref intact */
	1486	fdp->fd_cdir = vp;
	1487	fdp->fd_ncdir = nch;
	1488	cache_drop(&onch);
	1489	vrele(ovp);
	1490	} else {
	1491	cache_drop(&nch);
	1492	vput(vp);
	1493	}
	1494	fdrop(fp);
	1495	return (error);
	1496	}
	1497
	1498	int
	1499	kern_chdir(struct nlookupdata *nd)
	1500	{
	1501	struct thread *td = curthread;
	1502	struct proc *p = td->td_proc;
	1503	struct filedesc *fdp = p->p_fd;
	1504	struct vnode vp, ovp;
	1505	struct nchandle onch;
	1506	int error;
	1507
	1508	if ((error = nlookup(nd)) != 0)
	1509	return (error);
	1510	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	1511	return (ENOENT);
	1512	if ((error = vget(vp, LK_SHARED)) != 0)
	1513	return (error);
	1514
	1515	error = checkvp_chdir(vp, td);
	1516	vn_unlock(vp);
	1517	if (error == 0) {
	1518	ovp = fdp->fd_cdir;
	1519	onch = fdp->fd_ncdir;
	1520	cache_unlock(&nd->nl_nch); /* leave reference intact */
	1521	fdp->fd_ncdir = nd->nl_nch;
	1522	fdp->fd_cdir = vp;
	1523	cache_drop(&onch);
	1524	vrele(ovp);
	1525	cache_zero(&nd->nl_nch);
	1526	} else {
	1527	vrele(vp);
	1528	}
	1529	return (error);
	1530	}
	1531
	1532	/*
	1533	* chdir_args(char *path)
	1534	*
	1535	* Change current working directory (``.'').
	1536	*/
	1537	int
	1538	sys_chdir(struct chdir_args *uap)
	1539	{
	1540	struct nlookupdata nd;
	1541	int error;
	1542
	1543	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1544	if (error == 0)
	1545	error = kern_chdir(&nd);
	1546	nlookup_done(&nd);
	1547	return (error);
	1548	}
	1549
	1550	/*
	1551	* Helper function for raised chroot(2) security function: Refuse if
	1552	* any filedescriptors are open directories.
	1553	*/
	1554	static int
	1555	chroot_refuse_vdir_fds(struct filedesc *fdp)
	1556	{
	1557	struct vnode *vp;
	1558	struct file *fp;
	1559	int error;
	1560	int fd;
	1561
	1562	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
	1563	if ((error = holdvnode(fdp, fd, &fp)) != 0)
	1564	continue;
	1565	vp = (struct vnode *)fp->f_data;
	1566	if (vp->v_type != VDIR) {
	1567	fdrop(fp);
	1568	continue;
	1569	}
	1570	fdrop(fp);
	1571	return(EPERM);
	1572	}
	1573	return (0);
	1574	}
	1575
	1576	/*
	1577	* This sysctl determines if we will allow a process to chroot(2) if it
	1578	* has a directory open:
	1579	* 0: disallowed for all processes.
	1580	* 1: allowed for processes that were not already chroot(2)'ed.
	1581	* 2: allowed for all processes.
	1582	*/
	1583
	1584	static int chroot_allow_open_directories = 1;
	1585
	1586	SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
	1587	&chroot_allow_open_directories, 0, "");
	1588
	1589	/*
	1590	* chroot to the specified namecache entry. We obtain the vp from the
	1591	* namecache data. The passed ncp must be locked and referenced and will
	1592	* remain locked and referenced on return.
	1593	*/
	1594	int
	1595	kern_chroot(struct nchandle *nch)
	1596	{
	1597	struct thread *td = curthread;
	1598	struct proc *p = td->td_proc;
	1599	struct filedesc *fdp = p->p_fd;
	1600	struct vnode *vp;
	1601	int error;
	1602
	1603	/*
	1604	* Only root can chroot
	1605	*/
	1606	if ((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0)
	1607	return (error);
	1608
	1609	/*
	1610	* Disallow open directory descriptors (fchdir() breakouts).
	1611	*/
	1612	if (chroot_allow_open_directories == 0 \|\|
	1613	(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
	1614	if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
	1615	return (error);
	1616	}
	1617	if ((vp = nch->ncp->nc_vp) == NULL)
	1618	return (ENOENT);
	1619
	1620	if ((error = vget(vp, LK_SHARED)) != 0)
	1621	return (error);
	1622
	1623	/*
	1624	* Check the validity of vp as a directory to change to and
	1625	* associate it with rdir/jdir.
	1626	*/
	1627	error = checkvp_chdir(vp, td);
	1628	vn_unlock(vp); /* leave reference intact */
	1629	if (error == 0) {
	1630	vrele(fdp->fd_rdir);
	1631	fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
	1632	cache_drop(&fdp->fd_nrdir);
	1633	cache_copy(nch, &fdp->fd_nrdir);
	1634	if (fdp->fd_jdir == NULL) {
	1635	fdp->fd_jdir = vp;
	1636	vref(fdp->fd_jdir);
	1637	cache_copy(nch, &fdp->fd_njdir);
	1638	}
	1639	} else {
	1640	vrele(vp);
	1641	}
	1642	return (error);
	1643	}
	1644
	1645	/*
	1646	* chroot_args(char *path)
	1647	*
	1648	* Change notion of root (``/'') directory.
	1649	*/
	1650	/* ARGSUSED */
	1651	int
	1652	sys_chroot(struct chroot_args *uap)
	1653	{
	1654	struct thread *td = curthread;
	1655	struct nlookupdata nd;
	1656	int error;
	1657
	1658	KKASSERT(td->td_proc);
	1659	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1660	if (error) {
	1661	nlookup_done(&nd);
	1662	return(error);
	1663	}
	1664	error = nlookup(&nd);
	1665	if (error == 0)
	1666	error = kern_chroot(&nd.nl_nch);
	1667	nlookup_done(&nd);
	1668	return(error);
	1669	}
	1670
	1671	/*
	1672	* Common routine for chroot and chdir. Given a locked, referenced vnode,
	1673	* determine whether it is legal to chdir to the vnode. The vnode's state
	1674	* is not changed by this call.
	1675	*/
	1676	int
	1677	checkvp_chdir(struct vnode vp, struct thread td)
	1678	{
	1679	int error;
	1680
	1681	if (vp->v_type != VDIR)
	1682	error = ENOTDIR;
	1683	else
	1684	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred);
	1685	return (error);
	1686	}
	1687
	1688	int
	1689	kern_open(struct nlookupdata nd, int oflags, int mode, int res)
	1690	{
	1691	struct thread *td = curthread;
	1692	struct proc *p = td->td_proc;
	1693	struct lwp *lp = td->td_lwp;
	1694	struct filedesc *fdp = p->p_fd;
	1695	int cmode, flags;
	1696	struct file *nfp;
	1697	struct file *fp;
	1698	struct vnode *vp;
	1699	int type, indx, error;
	1700	struct flock lf;
	1701
	1702	if ((oflags & O_ACCMODE) == O_ACCMODE)
	1703	return (EINVAL);
	1704	flags = FFLAGS(oflags);
	1705	error = falloc(p, &nfp, NULL);
	1706	if (error)
	1707	return (error);
	1708	fp = nfp;
	1709	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
	1710
	1711	/*
	1712	* XXX p_dupfd is a real mess. It allows a device to return a
	1713	* file descriptor to be duplicated rather then doing the open
	1714	* itself.
	1715	*/
	1716	lp->lwp_dupfd = -1;
	1717
	1718	/*
	1719	* Call vn_open() to do the lookup and assign the vnode to the
	1720	* file pointer. vn_open() does not change the ref count on fp
	1721	* and the vnode, on success, will be inherited by the file pointer
	1722	* and unlocked.
	1723	*/
	1724	nd->nl_flags \|= NLC_LOCKVP;
	1725	error = vn_open(nd, fp, flags, cmode);
	1726	nlookup_done(nd);
	1727	if (error) {
	1728	/*
	1729	* handle special fdopen() case. bleh. dupfdopen() is
	1730	* responsible for dropping the old contents of ofiles[indx]
	1731	* if it succeeds.
	1732	*
	1733	* Note that fsetfd() will add a ref to fp which represents
	1734	* the fd_files[] assignment. We must still drop our
	1735	* reference.
	1736	*/
	1737	if ((error == ENODEV \|\| error == ENXIO) && lp->lwp_dupfd >= 0) {
	1738	if (fdalloc(p, 0, &indx) == 0) {
	1739	error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error);
	1740	if (error == 0) {
	1741	*res = indx;
	1742	fdrop(fp); /* our ref */
	1743	return (0);
	1744	}
	1745	fsetfd(p, NULL, indx);
	1746	}
	1747	}
	1748	fdrop(fp); /* our ref */
	1749	if (error == ERESTART)
	1750	error = EINTR;
	1751	return (error);
	1752	}
	1753
	1754	/*
	1755	* ref the vnode for ourselves so it can't be ripped out from under
	1756	* is. XXX need an ND flag to request that the vnode be returned
	1757	* anyway.
	1758	*
	1759	* Reserve a file descriptor but do not assign it until the open
	1760	* succeeds.
	1761	*/
	1762	vp = (struct vnode *)fp->f_data;
	1763	vref(vp);
	1764	if ((error = fdalloc(p, 0, &indx)) != 0) {
	1765	fdrop(fp);
	1766	vrele(vp);
	1767	return (error);
	1768	}
	1769
	1770	/*
	1771	* If no error occurs the vp will have been assigned to the file
	1772	* pointer.
	1773	*/
	1774	lp->lwp_dupfd = 0;
	1775
	1776	if (flags & (O_EXLOCK \| O_SHLOCK)) {
	1777	lf.l_whence = SEEK_SET;
	1778	lf.l_start = 0;
	1779	lf.l_len = 0;
	1780	if (flags & O_EXLOCK)
	1781	lf.l_type = F_WRLCK;
	1782	else
	1783	lf.l_type = F_RDLCK;
	1784	if (flags & FNONBLOCK)
	1785	type = 0;
	1786	else
	1787	type = F_WAIT;
	1788
	1789	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	1790	/*
	1791	* lock request failed. Clean up the reserved
	1792	* descriptor.
	1793	*/
	1794	vrele(vp);
	1795	fsetfd(p, NULL, indx);
	1796	fdrop(fp);
	1797	return (error);
	1798	}
	1799	fp->f_flag \|= FHASLOCK;
	1800	}
	1801	#if 0
	1802	/*
	1803	* Assert that all regular file vnodes were created with a object.
	1804	*/
	1805	KASSERT(vp->v_type != VREG \|\| vp->v_object != NULL,
	1806	("open: regular file has no backing object after vn_open"));
	1807	#endif
	1808
	1809	vrele(vp);
	1810
	1811	/*
	1812	* release our private reference, leaving the one associated with the
	1813	* descriptor table intact.
	1814	*/
	1815	fsetfd(p, fp, indx);
	1816	fdrop(fp);
	1817	*res = indx;
	1818	return (0);
	1819	}
	1820
	1821	/*
	1822	* open_args(char *path, int flags, int mode)
	1823	*
	1824	* Check permissions, allocate an open file structure,
	1825	* and call the device open routine if any.
	1826	*/
	1827	int
	1828	sys_open(struct open_args *uap)
	1829	{
	1830	struct nlookupdata nd;
	1831	int error;
	1832
	1833	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1834	if (error == 0) {
	1835	error = kern_open(&nd, uap->flags,
	1836	uap->mode, &uap->sysmsg_result);
	1837	}
	1838	nlookup_done(&nd);
	1839	return (error);
	1840	}
	1841
	1842	int
	1843	kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
	1844	{
	1845	struct thread *td = curthread;
	1846	struct proc *p = td->td_proc;
	1847	struct vnode *vp;
	1848	struct vattr vattr;
	1849	int error;
	1850	int whiteout = 0;
	1851
	1852	KKASSERT(p);
	1853
	1854	switch (mode & S_IFMT) {
	1855	case S_IFCHR:
	1856	case S_IFBLK:
	1857	error = priv_check(td, PRIV_ROOT);
	1858	break;
	1859	default:
	1860	error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT);
	1861	break;
	1862	}
	1863	if (error)
	1864	return (error);
	1865
	1866	bwillinode(1);
	1867	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1868	if ((error = nlookup(nd)) != 0)
	1869	return (error);
	1870	if (nd->nl_nch.ncp->nc_vp)
	1871	return (EEXIST);
	1872	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1873	return (error);
	1874
	1875	VATTR_NULL(&vattr);
	1876	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1877	vattr.va_rmajor = rmajor;
	1878	vattr.va_rminor = rminor;
	1879	whiteout = 0;
	1880
	1881	switch (mode & S_IFMT) {
	1882	case S_IFMT: /* used by badsect to flag bad sectors */
	1883	vattr.va_type = VBAD;
	1884	break;
	1885	case S_IFCHR:
	1886	vattr.va_type = VCHR;
	1887	break;
	1888	case S_IFBLK:
	1889	vattr.va_type = VBLK;
	1890	break;
	1891	case S_IFWHT:
	1892	whiteout = 1;
	1893	break;
	1894	case S_IFDIR:
	1895	/* special directories support for HAMMER */
	1896	vattr.va_type = VDIR;
	1897	break;
	1898	default:
	1899	error = EINVAL;
	1900	break;
	1901	}
	1902	if (error == 0) {
	1903	if (whiteout) {
	1904	error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
	1905	nd->nl_cred, NAMEI_CREATE);
	1906	} else {
	1907	vp = NULL;
	1908	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
	1909	&vp, nd->nl_cred, &vattr);
	1910	if (error == 0)
	1911	vput(vp);
	1912	}
	1913	}
	1914	return (error);
	1915	}
	1916
	1917	/*
	1918	* mknod_args(char *path, int mode, int dev)
	1919	*
	1920	* Create a special file.
	1921	*/
	1922	int
	1923	sys_mknod(struct mknod_args *uap)
	1924	{
	1925	struct nlookupdata nd;
	1926	int error;
	1927
	1928	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1929	if (error == 0) {
	1930	error = kern_mknod(&nd, uap->mode,
	1931	umajor(uap->dev), uminor(uap->dev));
	1932	}
	1933	nlookup_done(&nd);
	1934	return (error);
	1935	}
	1936
	1937	int
	1938	kern_mkfifo(struct nlookupdata *nd, int mode)
	1939	{
	1940	struct thread *td = curthread;
	1941	struct proc *p = td->td_proc;
	1942	struct vattr vattr;
	1943	struct vnode *vp;
	1944	int error;
	1945
	1946	bwillinode(1);
	1947
	1948	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1949	if ((error = nlookup(nd)) != 0)
	1950	return (error);
	1951	if (nd->nl_nch.ncp->nc_vp)
	1952	return (EEXIST);
	1953	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1954	return (error);
	1955
	1956	VATTR_NULL(&vattr);
	1957	vattr.va_type = VFIFO;
	1958	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1959	vp = NULL;
	1960	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
	1961	if (error == 0)
	1962	vput(vp);
	1963	return (error);
	1964	}
	1965
	1966	/*
	1967	* mkfifo_args(char *path, int mode)
	1968	*
	1969	* Create a named pipe.
	1970	*/
	1971	int
	1972	sys_mkfifo(struct mkfifo_args *uap)
	1973	{
	1974	struct nlookupdata nd;
	1975	int error;
	1976
	1977	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1978	if (error == 0)
	1979	error = kern_mkfifo(&nd, uap->mode);
	1980	nlookup_done(&nd);
	1981	return (error);
	1982	}
	1983
	1984	static int hardlink_check_uid = 0;
	1985	SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
	1986	&hardlink_check_uid, 0,
	1987	"Unprivileged processes cannot create hard links to files owned by other "
	1988	"users");
	1989	static int hardlink_check_gid = 0;
	1990	SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
	1991	&hardlink_check_gid, 0,
	1992	"Unprivileged processes cannot create hard links to files owned by other "
	1993	"groups");
	1994
	1995	static int
	1996	can_hardlink(struct vnode vp, struct thread td, struct ucred *cred)
	1997	{
	1998	struct vattr va;
	1999	int error;
	2000
	2001	/*
	2002	* Shortcut if disabled
	2003	*/
	2004	if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
	2005	return (0);
	2006
	2007	/*
	2008	* root cred can always hardlink
	2009	*/
	2010	if (priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT) == 0)
	2011	return (0);
	2012
	2013	/*
	2014	* Otherwise only if the originating file is owned by the
	2015	* same user or group. Note that any group is allowed if
	2016	* the file is owned by the caller.
	2017	*/
	2018	error = VOP_GETATTR(vp, &va);
	2019	if (error != 0)
	2020	return (error);
	2021
	2022	if (hardlink_check_uid) {
	2023	if (cred->cr_uid != va.va_uid)
	2024	return (EPERM);
	2025	}
	2026
	2027	if (hardlink_check_gid) {
	2028	if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
	2029	return (EPERM);
	2030	}
	2031
	2032	return (0);
	2033	}
	2034
	2035	int
	2036	kern_link(struct nlookupdata nd, struct nlookupdata linknd)
	2037	{
	2038	struct thread *td = curthread;
	2039	struct vnode *vp;
	2040	int error;
	2041
	2042	/*
	2043	* Lookup the source and obtained a locked vnode.
	2044	*
	2045	* XXX relookup on vget failure / race ?
	2046	*/
	2047	bwillinode(1);
	2048	if ((error = nlookup(nd)) != 0)
	2049	return (error);
	2050	vp = nd->nl_nch.ncp->nc_vp;
	2051	KKASSERT(vp != NULL);
	2052	if (vp->v_type == VDIR)
	2053	return (EPERM); /* POSIX */
	2054	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2055	return (error);
	2056	if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
	2057	return (error);
	2058
	2059	/*
	2060	* Unlock the source so we can lookup the target without deadlocking
	2061	* (XXX vp is locked already, possible other deadlock?). The target
	2062	* must not exist.
	2063	*/
	2064	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
	2065	nd->nl_flags &= ~NLC_NCPISLOCKED;
	2066	cache_unlock(&nd->nl_nch);
	2067
	2068	linknd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2069	if ((error = nlookup(linknd)) != 0) {
	2070	vput(vp);
	2071	return (error);
	2072	}
	2073	if (linknd->nl_nch.ncp->nc_vp) {
	2074	vput(vp);
	2075	return (EEXIST);
	2076	}
	2077
	2078	/*
	2079	* Finally run the new API VOP.
	2080	*/
	2081	error = can_hardlink(vp, td, td->td_proc->p_ucred);
	2082	if (error == 0) {
	2083	error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
	2084	vp, linknd->nl_cred);
	2085	}
	2086	vput(vp);
	2087	return (error);
	2088	}
	2089
	2090	/*
	2091	* link_args(char path, char link)
	2092	*
	2093	* Make a hard file link.
	2094	*/
	2095	int
	2096	sys_link(struct link_args *uap)
	2097	{
	2098	struct nlookupdata nd, linknd;
	2099	int error;
	2100
	2101	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2102	if (error == 0) {
	2103	error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
	2104	if (error == 0)
	2105	error = kern_link(&nd, &linknd);
	2106	nlookup_done(&linknd);
	2107	}
	2108	nlookup_done(&nd);
	2109	return (error);
	2110	}
	2111
	2112	int
	2113	kern_symlink(struct nlookupdata nd, char path, int mode)
	2114	{
	2115	struct vattr vattr;
	2116	struct vnode *vp;
	2117	struct vnode *dvp;
	2118	int error;
	2119
	2120	bwillinode(1);
	2121	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2122	if ((error = nlookup(nd)) != 0)
	2123	return (error);
	2124	if (nd->nl_nch.ncp->nc_vp)
	2125	return (EEXIST);
	2126	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2127	return (error);
	2128	dvp = nd->nl_dvp;
	2129	VATTR_NULL(&vattr);
	2130	vattr.va_mode = mode;
	2131	error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
	2132	if (error == 0)
	2133	vput(vp);
	2134	return (error);
	2135	}
	2136
	2137	/*
	2138	* symlink(char path, char link)
	2139	*
	2140	* Make a symbolic link.
	2141	*/
	2142	int
	2143	sys_symlink(struct symlink_args *uap)
	2144	{
	2145	struct thread *td = curthread;
	2146	struct nlookupdata nd;
	2147	char *path;
	2148	int error;
	2149	int mode;
	2150
	2151	path = objcache_get(namei_oc, M_WAITOK);
	2152	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	2153	if (error == 0) {
	2154	error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
	2155	if (error == 0) {
	2156	mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
	2157	error = kern_symlink(&nd, path, mode);
	2158	}
	2159	nlookup_done(&nd);
	2160	}
	2161	objcache_put(namei_oc, path);
	2162	return (error);
	2163	}
	2164
	2165	/*
	2166	* undelete_args(char *path)
	2167	*
	2168	* Delete a whiteout from the filesystem.
	2169	*/
	2170	/* ARGSUSED */
	2171	int
	2172	sys_undelete(struct undelete_args *uap)
	2173	{
	2174	struct nlookupdata nd;
	2175	int error;
	2176
	2177	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2178	bwillinode(1);
	2179	nd.nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2180	if (error == 0)
	2181	error = nlookup(&nd);
	2182	if (error == 0)
	2183	error = ncp_writechk(&nd.nl_nch);
	2184	if (error == 0) {
	2185	error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
	2186	NAMEI_DELETE);
	2187	}
	2188	nlookup_done(&nd);
	2189	return (error);
	2190	}
	2191
	2192	int
	2193	kern_unlink(struct nlookupdata *nd)
	2194	{
	2195	int error;
	2196
	2197	bwillinode(1);
	2198	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2199	if ((error = nlookup(nd)) != 0)
	2200	return (error);
	2201	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2202	return (error);
	2203	error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	2204	return (error);
	2205	}
	2206
	2207	/*
	2208	* unlink_args(char *path)
	2209	*
	2210	* Delete a name from the filesystem.
	2211	*/
	2212	int
	2213	sys_unlink(struct unlink_args *uap)
	2214	{
	2215	struct nlookupdata nd;
	2216	int error;
	2217
	2218	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2219	if (error == 0)
	2220	error = kern_unlink(&nd);
	2221	nlookup_done(&nd);
	2222	return (error);
	2223	}
	2224
	2225	int
	2226	kern_lseek(int fd, off_t offset, int whence, off_t *res)
	2227	{
	2228	struct thread *td = curthread;
	2229	struct proc *p = td->td_proc;
	2230	struct file *fp;
	2231	struct vnode *vp;
	2232	struct vattr vattr;
	2233	off_t new_offset;
	2234	int error;
	2235
	2236	fp = holdfp(p->p_fd, fd, -1);
	2237	if (fp == NULL)
	2238	return (EBADF);
	2239	if (fp->f_type != DTYPE_VNODE) {
	2240	error = ESPIPE;
	2241	goto done;
	2242	}
	2243	vp = (struct vnode *)fp->f_data;
	2244
	2245	switch (whence) {
	2246	case L_INCR:
	2247	new_offset = fp->f_offset + offset;
	2248	error = 0;
	2249	break;
	2250	case L_XTND:
	2251	error = VOP_GETATTR(vp, &vattr);
	2252	new_offset = offset + vattr.va_size;
	2253	break;
	2254	case L_SET:
	2255	new_offset = offset;
	2256	error = 0;
	2257	break;
	2258	default:
	2259	new_offset = 0;
	2260	error = EINVAL;
	2261	break;
	2262	}
	2263
	2264	/*
	2265	* Validate the seek position. Negative offsets are not allowed
	2266	* for regular files, block specials, or directories.
	2267	*/
	2268	if (error == 0) {
	2269	if (new_offset < 0 &&
	2270	(vp->v_type == VREG \|\| vp->v_type == VDIR \|\|
	2271	vp->v_type == VCHR \|\| vp->v_type == VBLK)) {
	2272	error = EINVAL;
	2273	} else {
	2274	fp->f_offset = new_offset;
	2275	}
	2276	}
	2277	*res = fp->f_offset;
	2278	done:
	2279	fdrop(fp);
	2280	return (error);
	2281	}
	2282
	2283	/*
	2284	* lseek_args(int fd, int pad, off_t offset, int whence)
	2285	*
	2286	* Reposition read/write file offset.
	2287	*/
	2288	int
	2289	sys_lseek(struct lseek_args *uap)
	2290	{
	2291	int error;
	2292
	2293	error = kern_lseek(uap->fd, uap->offset, uap->whence,
	2294	&uap->sysmsg_offset);
	2295
	2296	return (error);
	2297	}
	2298
	2299	int
	2300	kern_access(struct nlookupdata *nd, int aflags)
	2301	{
	2302	struct vnode *vp;
	2303	int error, flags;
	2304
	2305	if ((error = nlookup(nd)) != 0)
	2306	return (error);
	2307	retry:
	2308	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2309	if (error)
	2310	return (error);
	2311
	2312	/* Flags == 0 means only check for existence. */
	2313	if (aflags) {
	2314	flags = 0;
	2315	if (aflags & R_OK)
	2316	flags \|= VREAD;
	2317	if (aflags & W_OK)
	2318	flags \|= VWRITE;
	2319	if (aflags & X_OK)
	2320	flags \|= VEXEC;
	2321	if ((flags & VWRITE) == 0 \|\|
	2322	(error = vn_writechk(vp, &nd->nl_nch)) == 0)
	2323	error = VOP_ACCESS(vp, flags, nd->nl_cred);
	2324
	2325	/*
	2326	* If the file handle is stale we have to re-resolve the
	2327	* entry. This is a hack at the moment.
	2328	*/
	2329	if (error == ESTALE) {
	2330	vput(vp);
	2331	cache_setunresolved(&nd->nl_nch);
	2332	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2333	if (error == 0) {
	2334	vp = NULL;
	2335	goto retry;
	2336	}
	2337	return(error);
	2338	}
	2339	}
	2340	vput(vp);
	2341	return (error);
	2342	}
	2343
	2344	/*
	2345	* access_args(char *path, int flags)
	2346	*
	2347	* Check access permissions.
	2348	*/
	2349	int
	2350	sys_access(struct access_args *uap)
	2351	{
	2352	struct nlookupdata nd;
	2353	int error;
	2354
	2355	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2356	if (error == 0)
	2357	error = kern_access(&nd, uap->flags);
	2358	nlookup_done(&nd);
	2359	return (error);
	2360	}
	2361
	2362	int
	2363	kern_stat(struct nlookupdata nd, struct stat st)
	2364	{
	2365	int error;
	2366	struct vnode *vp;
	2367	thread_t td;
	2368
	2369	if ((error = nlookup(nd)) != 0)
	2370	return (error);
	2371	again:
	2372	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	2373	return (ENOENT);
	2374
	2375	td = curthread;
	2376	if ((error = vget(vp, LK_SHARED)) != 0)
	2377	return (error);
	2378	error = vn_stat(vp, st, nd->nl_cred);
	2379
	2380	/*
	2381	* If the file handle is stale we have to re-resolve the entry. This
	2382	* is a hack at the moment.
	2383	*/
	2384	if (error == ESTALE) {
	2385	vput(vp);
	2386	cache_setunresolved(&nd->nl_nch);
	2387	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2388	if (error == 0)
	2389	goto again;
	2390	} else {
	2391	vput(vp);
	2392	}
	2393	return (error);
	2394	}
	2395
	2396	/*
	2397	* stat_args(char path, struct stat ub)
	2398	*
	2399	* Get file status; this version follows links.
	2400	*/
	2401	int
	2402	sys_stat(struct stat_args *uap)
	2403	{
	2404	struct nlookupdata nd;
	2405	struct stat st;
	2406	int error;
	2407
	2408	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2409	if (error == 0) {
	2410	error = kern_stat(&nd, &st);
	2411	if (error == 0)
	2412	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2413	}
	2414	nlookup_done(&nd);
	2415	return (error);
	2416	}
	2417
	2418	/*
	2419	* lstat_args(char path, struct stat ub)
	2420	*
	2421	* Get file status; this version does not follow links.
	2422	*/
	2423	int
	2424	sys_lstat(struct lstat_args *uap)
	2425	{
	2426	struct nlookupdata nd;
	2427	struct stat st;
	2428	int error;
	2429
	2430	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2431	if (error == 0) {
	2432	error = kern_stat(&nd, &st);
	2433	if (error == 0)
	2434	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2435	}
	2436	nlookup_done(&nd);
	2437	return (error);
	2438	}
	2439
	2440	/*
	2441	* pathconf_Args(char *path, int name)
	2442	*
	2443	* Get configurable pathname variables.
	2444	*/
	2445	/* ARGSUSED */
	2446	int
	2447	sys_pathconf(struct pathconf_args *uap)
	2448	{
	2449	struct nlookupdata nd;
	2450	struct vnode *vp;
	2451	int error;
	2452
	2453	vp = NULL;
	2454	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2455	if (error == 0)
	2456	error = nlookup(&nd);
	2457	if (error == 0)
	2458	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	2459	nlookup_done(&nd);
	2460	if (error == 0) {
	2461	error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
	2462	vput(vp);
	2463	}
	2464	return (error);
	2465	}
	2466
	2467	/*
	2468	* XXX: daver
	2469	* kern_readlink isn't properly split yet. There is a copyin burried
	2470	* in VOP_READLINK().
	2471	*/
	2472	int
	2473	kern_readlink(struct nlookupdata nd, char buf, int count, int *res)
	2474	{
	2475	struct thread *td = curthread;
	2476	struct proc *p = td->td_proc;
	2477	struct vnode *vp;
	2478	struct iovec aiov;
	2479	struct uio auio;
	2480	int error;
	2481
	2482	if ((error = nlookup(nd)) != 0)
	2483	return (error);
	2484	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2485	if (error)
	2486	return (error);
	2487	if (vp->v_type != VLNK) {
	2488	error = EINVAL;
	2489	} else {
	2490	aiov.iov_base = buf;
	2491	aiov.iov_len = count;
	2492	auio.uio_iov = &aiov;
	2493	auio.uio_iovcnt = 1;
	2494	auio.uio_offset = 0;
	2495	auio.uio_rw = UIO_READ;
	2496	auio.uio_segflg = UIO_USERSPACE;
	2497	auio.uio_td = td;
	2498	auio.uio_resid = count;
	2499	error = VOP_READLINK(vp, &auio, p->p_ucred);
	2500	}
	2501	vput(vp);
	2502	*res = count - auio.uio_resid;
	2503	return (error);
	2504	}
	2505
	2506	/*
	2507	* readlink_args(char path, char buf, int count)
	2508	*
	2509	* Return target name of a symbolic link.
	2510	*/
	2511	int
	2512	sys_readlink(struct readlink_args *uap)
	2513	{
	2514	struct nlookupdata nd;
	2515	int error;
	2516
	2517	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2518	if (error == 0) {
	2519	error = kern_readlink(&nd, uap->buf, uap->count,
	2520	&uap->sysmsg_result);
	2521	}
	2522	nlookup_done(&nd);
	2523	return (error);
	2524	}
	2525
	2526	static int
	2527	setfflags(struct vnode *vp, int flags)
	2528	{
	2529	struct thread *td = curthread;
	2530	struct proc *p = td->td_proc;
	2531	int error;
	2532	struct vattr vattr;
	2533
	2534	/*
	2535	* Prevent non-root users from setting flags on devices. When
	2536	* a device is reused, users can retain ownership of the device
	2537	* if they are allowed to set flags and programs assume that
	2538	* chown can't fail when done as root.
	2539	*/
	2540	if ((vp->v_type == VCHR \|\| vp->v_type == VBLK) &&
	2541	((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0))
	2542	return (error);
	2543
	2544	/*
	2545	* note: vget is required for any operation that might mod the vnode
	2546	* so VINACTIVE is properly cleared.
	2547	*/
	2548	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2549	VATTR_NULL(&vattr);
	2550	vattr.va_flags = flags;
	2551	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2552	vput(vp);
	2553	}
	2554	return (error);
	2555	}
	2556
	2557	/*
	2558	* chflags(char *path, int flags)
	2559	*
	2560	* Change flags of a file given a path name.
	2561	*/
	2562	/* ARGSUSED */
	2563	int
	2564	sys_chflags(struct chflags_args *uap)
	2565	{
	2566	struct nlookupdata nd;
	2567	struct vnode *vp;
	2568	int error;
	2569
	2570	vp = NULL;
	2571	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2572	/* XXX Add NLC flag indicating modifying operation? */
	2573	if (error == 0)
	2574	error = nlookup(&nd);
	2575	if (error == 0)
	2576	error = ncp_writechk(&nd.nl_nch);
	2577	if (error == 0)
	2578	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2579	nlookup_done(&nd);
	2580	if (error == 0) {
	2581	error = setfflags(vp, uap->flags);
	2582	vrele(vp);
	2583	}
	2584	return (error);
	2585	}
	2586
	2587	/*
	2588	* lchflags(char *path, int flags)
	2589	*
	2590	* Change flags of a file given a path name, but don't follow symlinks.
	2591	*/
	2592	/* ARGSUSED */
	2593	int
	2594	sys_lchflags(struct lchflags_args *uap)
	2595	{
	2596	struct nlookupdata nd;
	2597	struct vnode *vp;
	2598	int error;
	2599
	2600	vp = NULL;
	2601	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2602	/* XXX Add NLC flag indicating modifying operation? */
	2603	if (error == 0)
	2604	error = nlookup(&nd);
	2605	if (error == 0)
	2606	error = ncp_writechk(&nd.nl_nch);
	2607	if (error == 0)
	2608	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2609	nlookup_done(&nd);
	2610	if (error == 0) {
	2611	error = setfflags(vp, uap->flags);
	2612	vrele(vp);
	2613	}
	2614	return (error);
	2615	}
	2616
	2617	/*
	2618	* fchflags_args(int fd, int flags)
	2619	*
	2620	* Change flags of a file given a file descriptor.
	2621	*/
	2622	/* ARGSUSED */
	2623	int
	2624	sys_fchflags(struct fchflags_args *uap)
	2625	{
	2626	struct thread *td = curthread;
	2627	struct proc *p = td->td_proc;
	2628	struct file *fp;
	2629	int error;
	2630
	2631	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2632	return (error);
	2633	if (fp->f_nchandle.ncp)
	2634	error = ncp_writechk(&fp->f_nchandle);
	2635	if (error == 0)
	2636	error = setfflags((struct vnode *) fp->f_data, uap->flags);
	2637	fdrop(fp);
	2638	return (error);
	2639	}
	2640
	2641	static int
	2642	setfmode(struct vnode *vp, int mode)
	2643	{
	2644	struct thread *td = curthread;
	2645	struct proc *p = td->td_proc;
	2646	int error;
	2647	struct vattr vattr;
	2648
	2649	/*
	2650	* note: vget is required for any operation that might mod the vnode
	2651	* so VINACTIVE is properly cleared.
	2652	*/
	2653	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2654	VATTR_NULL(&vattr);
	2655	vattr.va_mode = mode & ALLPERMS;
	2656	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2657	vput(vp);
	2658	}
	2659	return error;
	2660	}
	2661
	2662	int
	2663	kern_chmod(struct nlookupdata *nd, int mode)
	2664	{
	2665	struct vnode *vp;
	2666	int error;
	2667
	2668	/* XXX Add NLC flag indicating modifying operation? */
	2669	if ((error = nlookup(nd)) != 0)
	2670	return (error);
	2671	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2672	return (error);
	2673	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2674	error = setfmode(vp, mode);
	2675	vrele(vp);
	2676	return (error);
	2677	}
	2678
	2679	/*
	2680	* chmod_args(char *path, int mode)
	2681	*
	2682	* Change mode of a file given path name.
	2683	*/
	2684	/* ARGSUSED */
	2685	int
	2686	sys_chmod(struct chmod_args *uap)
	2687	{
	2688	struct nlookupdata nd;
	2689	int error;
	2690
	2691	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2692	if (error == 0)
	2693	error = kern_chmod(&nd, uap->mode);
	2694	nlookup_done(&nd);
	2695	return (error);
	2696	}
	2697
	2698	/*
	2699	* lchmod_args(char *path, int mode)
	2700	*
	2701	* Change mode of a file given path name (don't follow links.)
	2702	*/
	2703	/* ARGSUSED */
	2704	int
	2705	sys_lchmod(struct lchmod_args *uap)
	2706	{
	2707	struct nlookupdata nd;
	2708	int error;
	2709
	2710	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2711	if (error == 0)
	2712	error = kern_chmod(&nd, uap->mode);
	2713	nlookup_done(&nd);
	2714	return (error);
	2715	}
	2716
	2717	/*
	2718	* fchmod_args(int fd, int mode)
	2719	*
	2720	* Change mode of a file given a file descriptor.
	2721	*/
	2722	/* ARGSUSED */
	2723	int
	2724	sys_fchmod(struct fchmod_args *uap)
	2725	{
	2726	struct thread *td = curthread;
	2727	struct proc *p = td->td_proc;
	2728	struct file *fp;
	2729	int error;
	2730
	2731	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2732	return (error);
	2733	if (fp->f_nchandle.ncp)
	2734	error = ncp_writechk(&fp->f_nchandle);
	2735	if (error == 0)
	2736	error = setfmode((struct vnode *)fp->f_data, uap->mode);
	2737	fdrop(fp);
	2738	return (error);
	2739	}
	2740
	2741	static int
	2742	setfown(struct vnode *vp, uid_t uid, gid_t gid)
	2743	{
	2744	struct thread *td = curthread;
	2745	struct proc *p = td->td_proc;
	2746	int error;
	2747	struct vattr vattr;
	2748
	2749	/*
	2750	* note: vget is required for any operation that might mod the vnode
	2751	* so VINACTIVE is properly cleared.
	2752	*/
	2753	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2754	VATTR_NULL(&vattr);
	2755	vattr.va_uid = uid;
	2756	vattr.va_gid = gid;
	2757	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2758	vput(vp);
	2759	}
	2760	return error;
	2761	}
	2762
	2763	int
	2764	kern_chown(struct nlookupdata *nd, int uid, int gid)
	2765	{
	2766	struct vnode *vp;
	2767	int error;
	2768
	2769	/* XXX Add NLC flag indicating modifying operation? */
	2770	if ((error = nlookup(nd)) != 0)
	2771	return (error);
	2772	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2773	return (error);
	2774	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2775	error = setfown(vp, uid, gid);
	2776	vrele(vp);
	2777	return (error);
	2778	}
	2779
	2780	/*
	2781	* chown(char *path, int uid, int gid)
	2782	*
	2783	* Set ownership given a path name.
	2784	*/
	2785	int
	2786	sys_chown(struct chown_args *uap)
	2787	{
	2788	struct nlookupdata nd;
	2789	int error;
	2790
	2791	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2792	if (error == 0)
	2793	error = kern_chown(&nd, uap->uid, uap->gid);
	2794	nlookup_done(&nd);
	2795	return (error);
	2796	}
	2797
	2798	/*
	2799	* lchown_args(char *path, int uid, int gid)
	2800	*
	2801	* Set ownership given a path name, do not cross symlinks.
	2802	*/
	2803	int
	2804	sys_lchown(struct lchown_args *uap)
	2805	{
	2806	struct nlookupdata nd;
	2807	int error;
	2808
	2809	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2810	if (error == 0)
	2811	error = kern_chown(&nd, uap->uid, uap->gid);
	2812	nlookup_done(&nd);
	2813	return (error);
	2814	}
	2815
	2816	/*
	2817	* fchown_args(int fd, int uid, int gid)
	2818	*
	2819	* Set ownership given a file descriptor.
	2820	*/
	2821	/* ARGSUSED */
	2822	int
	2823	sys_fchown(struct fchown_args *uap)
	2824	{
	2825	struct thread *td = curthread;
	2826	struct proc *p = td->td_proc;
	2827	struct file *fp;
	2828	int error;
	2829
	2830	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2831	return (error);
	2832	if (fp->f_nchandle.ncp)
	2833	error = ncp_writechk(&fp->f_nchandle);
	2834	if (error == 0)
	2835	error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid);
	2836	fdrop(fp);
	2837	return (error);
	2838	}
	2839
	2840	static int
	2841	getutimes(const struct timeval tvp, struct timespec tsp)
	2842	{
	2843	struct timeval tv[2];
	2844
	2845	if (tvp == NULL) {
	2846	microtime(&tv[0]);
	2847	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	2848	tsp[1] = tsp[0];
	2849	} else {
	2850	TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
	2851	TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
	2852	}
	2853	return 0;
	2854	}
	2855
	2856	static int
	2857	setutimes(struct vnode vp, const struct timespec ts, int nullflag)
	2858	{
	2859	struct thread *td = curthread;
	2860	struct proc *p = td->td_proc;
	2861	int error;
	2862	struct vattr vattr;
	2863
	2864	/*
	2865	* note: vget is required for any operation that might mod the vnode
	2866	* so VINACTIVE is properly cleared.
	2867	*/
	2868	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2869	VATTR_NULL(&vattr);
	2870	vattr.va_atime = ts[0];
	2871	vattr.va_mtime = ts[1];
	2872	if (nullflag)
	2873	vattr.va_vaflags \|= VA_UTIMES_NULL;
	2874	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2875	vput(vp);
	2876	}
	2877	return error;
	2878	}
	2879
	2880	int
	2881	kern_utimes(struct nlookupdata nd, struct timeval tptr)
	2882	{
	2883	struct timespec ts[2];
	2884	struct vnode *vp;
	2885	int error;
	2886
	2887	if ((error = getutimes(tptr, ts)) != 0)
	2888	return (error);
	2889	/* XXX Add NLC flag indicating modifying operation? */
	2890	if ((error = nlookup(nd)) != 0)
	2891	return (error);
	2892	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2893	return (error);
	2894	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2895	return (error);
	2896	error = setutimes(vp, ts, tptr == NULL);
	2897	vrele(vp);
	2898	return (error);
	2899	}
	2900
	2901	/*
	2902	* utimes_args(char path, struct timeval tptr)
	2903	*
	2904	* Set the access and modification times of a file.
	2905	*/
	2906	int
	2907	sys_utimes(struct utimes_args *uap)
	2908	{
	2909	struct timeval tv[2];
	2910	struct nlookupdata nd;
	2911	int error;
	2912
	2913	if (uap->tptr) {
	2914	error = copyin(uap->tptr, tv, sizeof(tv));
	2915	if (error)
	2916	return (error);
	2917	}
	2918	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2919	if (error == 0)
	2920	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2921	nlookup_done(&nd);
	2922	return (error);
	2923	}
	2924
	2925	/*
	2926	* lutimes_args(char path, struct timeval tptr)
	2927	*
	2928	* Set the access and modification times of a file.
	2929	*/
	2930	int
	2931	sys_lutimes(struct lutimes_args *uap)
	2932	{
	2933	struct timeval tv[2];
	2934	struct nlookupdata nd;
	2935	int error;
	2936
	2937	if (uap->tptr) {
	2938	error = copyin(uap->tptr, tv, sizeof(tv));
	2939	if (error)
	2940	return (error);
	2941	}
	2942	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2943	if (error == 0)
	2944	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2945	nlookup_done(&nd);
	2946	return (error);
	2947	}
	2948
	2949	int
	2950	kern_futimes(int fd, struct timeval *tptr)
	2951	{
	2952	struct thread *td = curthread;
	2953	struct proc *p = td->td_proc;
	2954	struct timespec ts[2];
	2955	struct file *fp;
	2956	int error;
	2957
	2958	error = getutimes(tptr, ts);
	2959	if (error)
	2960	return (error);
	2961	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	2962	return (error);
	2963	if (fp->f_nchandle.ncp)
	2964	error = ncp_writechk(&fp->f_nchandle);
	2965	if (error == 0)
	2966	error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL);
	2967	fdrop(fp);
	2968	return (error);
	2969	}
	2970
	2971	/*
	2972	* futimes_args(int fd, struct timeval *tptr)
	2973	*
	2974	* Set the access and modification times of a file.
	2975	*/
	2976	int
	2977	sys_futimes(struct futimes_args *uap)
	2978	{
	2979	struct timeval tv[2];
	2980	int error;
	2981
	2982	if (uap->tptr) {
	2983	error = copyin(uap->tptr, tv, sizeof(tv));
	2984	if (error)
	2985	return (error);
	2986	}
	2987
	2988	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
	2989
	2990	return (error);
	2991	}
	2992
	2993	int
	2994	kern_truncate(struct nlookupdata *nd, off_t length)
	2995	{
	2996	struct vnode *vp;
	2997	struct vattr vattr;
	2998	int error;
	2999
	3000	if (length < 0)
	3001	return(EINVAL);
	3002	/* XXX Add NLC flag indicating modifying operation? */
	3003	if ((error = nlookup(nd)) != 0)
	3004	return (error);
	3005	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3006	return (error);
	3007	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	3008	return (error);
	3009	if ((error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY)) != 0) {
	3010	vrele(vp);
	3011	return (error);
	3012	}
	3013	if (vp->v_type == VDIR) {
	3014	error = EISDIR;
	3015	} else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 &&
	3016	(error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) {
	3017	VATTR_NULL(&vattr);
	3018	vattr.va_size = length;
	3019	error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
	3020	}
	3021	vput(vp);
	3022	return (error);
	3023	}
	3024
	3025	/*
	3026	* truncate(char *path, int pad, off_t length)
	3027	*
	3028	* Truncate a file given its path name.
	3029	*/
	3030	int
	3031	sys_truncate(struct truncate_args *uap)
	3032	{
	3033	struct nlookupdata nd;
	3034	int error;
	3035
	3036	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3037	if (error == 0)
	3038	error = kern_truncate(&nd, uap->length);
	3039	nlookup_done(&nd);
	3040	return error;
	3041	}
	3042
	3043	int
	3044	kern_ftruncate(int fd, off_t length)
	3045	{
	3046	struct thread *td = curthread;
	3047	struct proc *p = td->td_proc;
	3048	struct vattr vattr;
	3049	struct vnode *vp;
	3050	struct file *fp;
	3051	int error;
	3052
	3053	if (length < 0)
	3054	return(EINVAL);
	3055	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3056	return (error);
	3057	if (fp->f_nchandle.ncp) {
	3058	error = ncp_writechk(&fp->f_nchandle);
	3059	if (error)
	3060	goto done;
	3061	}
	3062	if ((fp->f_flag & FWRITE) == 0) {
	3063	error = EINVAL;
	3064	goto done;
	3065	}
	3066	vp = (struct vnode *)fp->f_data;
	3067	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3068	if (vp->v_type == VDIR) {
	3069	error = EISDIR;
	3070	} else if ((error = vn_writechk(vp, NULL)) == 0) {
	3071	VATTR_NULL(&vattr);
	3072	vattr.va_size = length;
	3073	error = VOP_SETATTR(vp, &vattr, fp->f_cred);
	3074	}
	3075	vn_unlock(vp);
	3076	done:
	3077	fdrop(fp);
	3078	return (error);
	3079	}
	3080
	3081	/*
	3082	* ftruncate_args(int fd, int pad, off_t length)
	3083	*
	3084	* Truncate a file given a file descriptor.
	3085	*/
	3086	int
	3087	sys_ftruncate(struct ftruncate_args *uap)
	3088	{
	3089	int error;
	3090
	3091	error = kern_ftruncate(uap->fd, uap->length);
	3092
	3093	return (error);
	3094	}
	3095
	3096	/*
	3097	* fsync(int fd)
	3098	*
	3099	* Sync an open file.
	3100	*/
	3101	/* ARGSUSED */
	3102	int
	3103	sys_fsync(struct fsync_args *uap)
	3104	{
	3105	struct thread *td = curthread;
	3106	struct proc *p = td->td_proc;
	3107	struct vnode *vp;
	3108	struct file *fp;
	3109	vm_object_t obj;
	3110	int error;
	3111
	3112	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	3113	return (error);
	3114	vp = (struct vnode *)fp->f_data;
	3115	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3116	if ((obj = vp->v_object) != NULL)
	3117	vm_object_page_clean(obj, 0, 0, 0);
	3118	if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount)
	3119	error = buf_fsync(vp);
	3120	vn_unlock(vp);
	3121	fdrop(fp);
	3122	return (error);
	3123	}
	3124
	3125	int
	3126	kern_rename(struct nlookupdata fromnd, struct nlookupdata tond)
	3127	{
	3128	struct nchandle fnchd;
	3129	struct nchandle tnchd;
	3130	struct namecache *ncp;
	3131	struct vnode *fdvp;
	3132	struct vnode *tdvp;
	3133	struct mount *mp;
	3134	int error;
	3135
	3136	bwillinode(1);
	3137	fromnd->nl_flags \|= NLC_REFDVP;
	3138	if ((error = nlookup(fromnd)) != 0)
	3139	return (error);
	3140	if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
	3141	return (ENOENT);
	3142	fnchd.mount = fromnd->nl_nch.mount;
	3143	cache_hold(&fnchd);
	3144
	3145	/*
	3146	* unlock the source nch so we can lookup the target nch without
	3147	* deadlocking. The target may or may not exist so we do not check
	3148	* for a target vp like kern_mkdir() and other creation functions do.
	3149	*
	3150	* The source and target directories are ref'd and rechecked after
	3151	* everything is relocked to determine if the source or target file
	3152	* has been renamed.
	3153	*/
	3154	KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
	3155	fromnd->nl_flags &= ~NLC_NCPISLOCKED;
	3156	cache_unlock(&fromnd->nl_nch);
	3157
	3158	tond->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	3159	if ((error = nlookup(tond)) != 0) {
	3160	cache_drop(&fnchd);
	3161	return (error);
	3162	}
	3163	if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
	3164	cache_drop(&fnchd);
	3165	return (ENOENT);
	3166	}
	3167	tnchd.mount = tond->nl_nch.mount;
	3168	cache_hold(&tnchd);
	3169
	3170	/*
	3171	* If the source and target are the same there is nothing to do
	3172	*/
	3173	if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
	3174	cache_drop(&fnchd);
	3175	cache_drop(&tnchd);
	3176	return (0);
	3177	}
	3178
	3179	/*
	3180	* Mount points cannot be renamed or overwritten
	3181	*/
	3182	if ((fromnd->nl_nch.ncp->nc_flag \| tond->nl_nch.ncp->nc_flag) &
	3183	NCF_ISMOUNTPT
	3184	) {
	3185	cache_drop(&fnchd);
	3186	cache_drop(&tnchd);
	3187	return (EINVAL);
	3188	}
	3189
	3190	/*
	3191	* relock the source ncp. NOTE AFTER RELOCKING: the source ncp
	3192	* may have become invalid while it was unlocked, nc_vp and nc_mount
	3193	* could be NULL.
	3194	*/
	3195	if (cache_lock_nonblock(&fromnd->nl_nch) == 0) {
	3196	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3197	} else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) {
	3198	cache_lock(&fromnd->nl_nch);
	3199	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3200	} else {
	3201	cache_unlock(&tond->nl_nch);
	3202	cache_lock(&fromnd->nl_nch);
	3203	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3204	cache_lock(&tond->nl_nch);
	3205	cache_resolve(&tond->nl_nch, tond->nl_cred);
	3206	}
	3207	fromnd->nl_flags \|= NLC_NCPISLOCKED;
	3208
	3209	/*
	3210	* make sure the parent directories linkages are the same
	3211	*/
	3212	if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent \|\|
	3213	tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
	3214	cache_drop(&fnchd);
	3215	cache_drop(&tnchd);
	3216	return (ENOENT);
	3217	}
	3218
	3219	/*
	3220	* Both the source and target must be within the same filesystem and
	3221	* in the same filesystem as their parent directories within the
	3222	* namecache topology.
	3223	*
	3224	* NOTE: fromnd's nc_mount or nc_vp could be NULL.
	3225	*/
	3226	mp = fnchd.mount;
	3227	if (mp != tnchd.mount \|\| mp != fromnd->nl_nch.mount \|\|
	3228	mp != tond->nl_nch.mount) {
	3229	cache_drop(&fnchd);
	3230	cache_drop(&tnchd);
	3231	return (EXDEV);
	3232	}
	3233
	3234	/*
	3235	* Make sure the mount point is writable
	3236	*/
	3237	if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
	3238	cache_drop(&fnchd);
	3239	cache_drop(&tnchd);
	3240	return (error);
	3241	}
	3242
	3243	/*
	3244	* If the target exists and either the source or target is a directory,
	3245	* then both must be directories.
	3246	*
	3247	* Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
	3248	* have become NULL.
	3249	*/
	3250	if (tond->nl_nch.ncp->nc_vp) {
	3251	if (fromnd->nl_nch.ncp->nc_vp == NULL) {
	3252	error = ENOENT;
	3253	} else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3254	if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
	3255	error = ENOTDIR;
	3256	} else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3257	error = EISDIR;
	3258	}
	3259	}
	3260
	3261	/*
	3262	* You cannot rename a source into itself or a subdirectory of itself.
	3263	* We check this by travsersing the target directory upwards looking
	3264	* for a match against the source.
	3265	*/
	3266	if (error == 0) {
	3267	for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
	3268	if (fromnd->nl_nch.ncp == ncp) {
	3269	error = EINVAL;
	3270	break;
	3271	}
	3272	}
	3273	}
	3274
	3275	cache_drop(&fnchd);
	3276	cache_drop(&tnchd);
	3277
	3278	/*
	3279	* Even though the namespaces are different, they may still represent
	3280	* hardlinks to the same file. The filesystem might have a hard time
	3281	* with this so we issue a NREMOVE of the source instead of a NRENAME
	3282	* when we detect the situation.
	3283	*/
	3284	if (error == 0) {
	3285	fdvp = fromnd->nl_dvp;
	3286	tdvp = tond->nl_dvp;
	3287	if (fdvp == NULL \|\| tdvp == NULL) {
	3288	error = EPERM;
	3289	} else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
	3290	error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
	3291	fromnd->nl_cred);
	3292	} else {
	3293	error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
	3294	fdvp, tdvp, tond->nl_cred);
	3295	}
	3296	}
	3297	return (error);
	3298	}
	3299
	3300	/*
	3301	* rename_args(char from, char to)
	3302	*
	3303	* Rename files. Source and destination must either both be directories,
	3304	* or both not be directories. If target is a directory, it must be empty.
	3305	*/
	3306	int
	3307	sys_rename(struct rename_args *uap)
	3308	{
	3309	struct nlookupdata fromnd, tond;
	3310	int error;
	3311
	3312	error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
	3313	if (error == 0) {
	3314	error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
	3315	if (error == 0)
	3316	error = kern_rename(&fromnd, &tond);
	3317	nlookup_done(&tond);
	3318	}
	3319	nlookup_done(&fromnd);
	3320	return (error);
	3321	}
	3322
	3323	int
	3324	kern_mkdir(struct nlookupdata *nd, int mode)
	3325	{
	3326	struct thread *td = curthread;
	3327	struct proc *p = td->td_proc;
	3328	struct vnode *vp;
	3329	struct vattr vattr;
	3330	int error;
	3331
	3332	bwillinode(1);
	3333	nd->nl_flags \|= NLC_WILLBEDIR \| NLC_CREATE \| NLC_REFDVP;
	3334	if ((error = nlookup(nd)) != 0)
	3335	return (error);
	3336
	3337	if (nd->nl_nch.ncp->nc_vp)
	3338	return (EEXIST);
	3339	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3340	return (error);
	3341	VATTR_NULL(&vattr);
	3342	vattr.va_type = VDIR;
	3343	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
	3344
	3345	vp = NULL;
	3346	error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr);
	3347	if (error == 0)
	3348	vput(vp);
	3349	return (error);
	3350	}
	3351
	3352	/*
	3353	* mkdir_args(char *path, int mode)
	3354	*
	3355	* Make a directory file.
	3356	*/
	3357	/* ARGSUSED */
	3358	int
	3359	sys_mkdir(struct mkdir_args *uap)
	3360	{
	3361	struct nlookupdata nd;
	3362	int error;
	3363
	3364	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3365	if (error == 0)
	3366	error = kern_mkdir(&nd, uap->mode);
	3367	nlookup_done(&nd);
	3368	return (error);
	3369	}
	3370
	3371	int
	3372	kern_rmdir(struct nlookupdata *nd)
	3373	{
	3374	int error;
	3375
	3376	bwillinode(1);
	3377	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	3378	if ((error = nlookup(nd)) != 0)
	3379	return (error);
	3380
	3381	/*
	3382	* Do not allow directories representing mount points to be
	3383	* deleted, even if empty. Check write perms on mount point
	3384	* in case the vnode is aliased (aka nullfs).
	3385	*/
	3386	if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
	3387	return (EINVAL);
	3388	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3389	return (error);
	3390	error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	3391	return (error);
	3392	}
	3393
	3394	/*
	3395	* rmdir_args(char *path)
	3396	*
	3397	* Remove a directory file.
	3398	*/
	3399	/* ARGSUSED */
	3400	int
	3401	sys_rmdir(struct rmdir_args *uap)
	3402	{
	3403	struct nlookupdata nd;
	3404	int error;
	3405
	3406	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3407	if (error == 0)
	3408	error = kern_rmdir(&nd);
	3409	nlookup_done(&nd);
	3410	return (error);
	3411	}
	3412
	3413	int
	3414	kern_getdirentries(int fd, char buf, u_int count, long basep, int *res,
	3415	enum uio_seg direction)
	3416	{
	3417	struct thread *td = curthread;
	3418	struct proc *p = td->td_proc;
	3419	struct vnode *vp;
	3420	struct file *fp;
	3421	struct uio auio;
	3422	struct iovec aiov;
	3423	off_t loff;
	3424	int error, eofflag;
	3425
	3426	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3427	return (error);
	3428	if ((fp->f_flag & FREAD) == 0) {
	3429	error = EBADF;
	3430	goto done;
	3431	}
	3432	vp = (struct vnode *)fp->f_data;
	3433	unionread:
	3434	if (vp->v_type != VDIR) {
	3435	error = EINVAL;
	3436	goto done;
	3437	}
	3438	aiov.iov_base = buf;
	3439	aiov.iov_len = count;
	3440	auio.uio_iov = &aiov;
	3441	auio.uio_iovcnt = 1;
	3442	auio.uio_rw = UIO_READ;
	3443	auio.uio_segflg = direction;
	3444	auio.uio_td = td;
	3445	auio.uio_resid = count;
	3446	loff = auio.uio_offset = fp->f_offset;
	3447	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
	3448	fp->f_offset = auio.uio_offset;
	3449	if (error)
	3450	goto done;
	3451	if (count == auio.uio_resid) {
	3452	if (union_dircheckp) {
	3453	error = union_dircheckp(td, &vp, fp);
	3454	if (error == -1)
	3455	goto unionread;
	3456	if (error)
	3457	goto done;
	3458	}
	3459	#if 0
	3460	if ((vp->v_flag & VROOT) &&
	3461	(vp->v_mount->mnt_flag & MNT_UNION)) {
	3462	struct vnode *tvp = vp;
	3463	vp = vp->v_mount->mnt_vnodecovered;
	3464	vref(vp);
	3465	fp->f_data = vp;
	3466	fp->f_offset = 0;
	3467	vrele(tvp);
	3468	goto unionread;
	3469	}
	3470	#endif
	3471	}
	3472
	3473	/*
	3474	* WARNING! *basep may not be wide enough to accomodate the
	3475	* seek offset. XXX should we hack this to return the upper 32 bits
	3476	* for offsets greater then 4G?
	3477	*/
	3478	if (basep) {
	3479	*basep = (long)loff;
	3480	}
	3481	*res = count - auio.uio_resid;
	3482	done:
	3483	fdrop(fp);
	3484	return (error);
	3485	}
	3486
	3487	/*
	3488	* getdirentries_args(int fd, char buf, u_int conut, long basep)
	3489	*
	3490	* Read a block of directory entries in a file system independent format.
	3491	*/
	3492	int
	3493	sys_getdirentries(struct getdirentries_args *uap)
	3494	{
	3495	long base;
	3496	int error;
	3497
	3498	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
	3499	&uap->sysmsg_result, UIO_USERSPACE);
	3500
	3501	if (error == 0 && uap->basep)
	3502	error = copyout(&base, uap->basep, sizeof(*uap->basep));
	3503	return (error);
	3504	}
	3505
	3506	/*
	3507	* getdents_args(int fd, char *buf, size_t count)
	3508	*/
	3509	int
	3510	sys_getdents(struct getdents_args *uap)
	3511	{
	3512	int error;
	3513
	3514	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
	3515	&uap->sysmsg_result, UIO_USERSPACE);
	3516
	3517	return (error);
	3518	}
	3519
	3520	/*
	3521	* umask(int newmask)
	3522	*
	3523	* Set the mode mask for creation of filesystem nodes.
	3524	*
	3525	* MP SAFE
	3526	*/
	3527	int
	3528	sys_umask(struct umask_args *uap)
	3529	{
	3530	struct thread *td = curthread;
	3531	struct proc *p = td->td_proc;
	3532	struct filedesc *fdp;
	3533
	3534	fdp = p->p_fd;
	3535	uap->sysmsg_result = fdp->fd_cmask;
	3536	fdp->fd_cmask = uap->newmask & ALLPERMS;
	3537	return (0);
	3538	}
	3539
	3540	/*
	3541	* revoke(char *path)
	3542	*
	3543	* Void all references to file by ripping underlying filesystem
	3544	* away from vnode.
	3545	*/
	3546	/* ARGSUSED */
	3547	int
	3548	sys_revoke(struct revoke_args *uap)
	3549	{
	3550	struct nlookupdata nd;
	3551	struct vattr vattr;
	3552	struct vnode *vp;
	3553	struct ucred *cred;
	3554	int error;
	3555
	3556	vp = NULL;
	3557	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3558	if (error == 0)
	3559	error = nlookup(&nd);
	3560	if (error == 0)
	3561	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	3562	cred = crhold(nd.nl_cred);
	3563	nlookup_done(&nd);
	3564	if (error == 0) {
	3565	if (vp->v_type != VCHR && vp->v_type != VBLK)
	3566	error = EINVAL;
	3567	if (error == 0)
	3568	error = VOP_GETATTR(vp, &vattr);
	3569	if (error == 0 && cred->cr_uid != vattr.va_uid)
	3570	error = priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT);
	3571	if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) {
	3572	error = 0;
	3573	vx_lock(vp);
	3574	VOP_REVOKE(vp, REVOKEALL);
	3575	vx_unlock(vp);
	3576	}
	3577	vrele(vp);
	3578	}
	3579	if (cred)
	3580	crfree(cred);
	3581	return (error);
	3582	}
	3583
	3584	/*
	3585	* getfh_args(char fname, fhandle_t fhp)
	3586	*
	3587	* Get (NFS) file handle
	3588	*
	3589	* NOTE: We use the fsid of the covering mount, even if it is a nullfs
	3590	* mount. This allows nullfs mounts to be explicitly exported.
	3591	*
	3592	* WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
	3593	*
	3594	* nullfs mounts of subdirectories are not safe. That is, it will
	3595	* work, but you do not really have protection against access to
	3596	* the related parent directories.
	3597	*/
	3598	int
	3599	sys_getfh(struct getfh_args *uap)
	3600	{
	3601	struct thread *td = curthread;
	3602	struct nlookupdata nd;
	3603	fhandle_t fh;
	3604	struct vnode *vp;
	3605	struct mount *mp;
	3606	int error;
	3607
	3608	/*
	3609	* Must be super user
	3610	*/
	3611	if ((error = priv_check(td, PRIV_ROOT)) != 0)
	3612	return (error);
	3613
	3614	vp = NULL;
	3615	error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
	3616	if (error == 0)
	3617	error = nlookup(&nd);
	3618	if (error == 0)
	3619	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3620	mp = nd.nl_nch.mount;
	3621	nlookup_done(&nd);
	3622	if (error == 0) {
	3623	bzero(&fh, sizeof(fh));
	3624	fh.fh_fsid = mp->mnt_stat.f_fsid;
	3625	error = VFS_VPTOFH(vp, &fh.fh_fid);
	3626	vput(vp);
	3627	if (error == 0)
	3628	error = copyout(&fh, uap->fhp, sizeof(fh));
	3629	}
	3630	return (error);
	3631	}
	3632
	3633	/*
	3634	* fhopen_args(const struct fhandle *u_fhp, int flags)
	3635	*
	3636	* syscall for the rpc.lockd to use to translate a NFS file handle into
	3637	* an open descriptor.
	3638	*
	3639	* warning: do not remove the priv_check() call or this becomes one giant
	3640	* security hole.
	3641	*/
	3642	int
	3643	sys_fhopen(struct fhopen_args *uap)
	3644	{
	3645	struct thread *td = curthread;
	3646	struct proc *p = td->td_proc;
	3647	struct mount *mp;
	3648	struct vnode *vp;
	3649	struct fhandle fhp;
	3650	struct vattr vat;
	3651	struct vattr *vap = &vat;
	3652	struct flock lf;
	3653	int fmode, mode, error, type;
	3654	struct file *nfp;
	3655	struct file *fp;
	3656	int indx;
	3657
	3658	/*
	3659	* Must be super user
	3660	*/
	3661	error = priv_check(td, PRIV_ROOT);
	3662	if (error)
	3663	return (error);
	3664
	3665	fmode = FFLAGS(uap->flags);
	3666	/* why not allow a non-read/write open for our lockd? */
	3667	if (((fmode & (FREAD \| FWRITE)) == 0) \|\| (fmode & O_CREAT))
	3668	return (EINVAL);
	3669	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
	3670	if (error)
	3671	return(error);
	3672	/* find the mount point */
	3673	mp = vfs_getvfs(&fhp.fh_fsid);
	3674	if (mp == NULL)
	3675	return (ESTALE);
	3676	/* now give me my vnode, it gets returned to me locked */
	3677	error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
	3678	if (error)
	3679	return (error);
	3680	/*
	3681	* from now on we have to make sure not
	3682	* to forget about the vnode
	3683	* any error that causes an abort must vput(vp)
	3684	* just set error = err and 'goto bad;'.
	3685	*/
	3686
	3687	/*
	3688	* from vn_open
	3689	*/
	3690	if (vp->v_type == VLNK) {
	3691	error = EMLINK;
	3692	goto bad;
	3693	}
	3694	if (vp->v_type == VSOCK) {
	3695	error = EOPNOTSUPP;
	3696	goto bad;
	3697	}
	3698	mode = 0;
	3699	if (fmode & (FWRITE \| O_TRUNC)) {
	3700	if (vp->v_type == VDIR) {
	3701	error = EISDIR;
	3702	goto bad;
	3703	}
	3704	error = vn_writechk(vp, NULL);
	3705	if (error)
	3706	goto bad;
	3707	mode \|= VWRITE;
	3708	}
	3709	if (fmode & FREAD)
	3710	mode \|= VREAD;
	3711	if (mode) {
	3712	error = VOP_ACCESS(vp, mode, p->p_ucred);
	3713	if (error)
	3714	goto bad;
	3715	}
	3716	if (fmode & O_TRUNC) {
	3717	vn_unlock(vp); /* XXX */
	3718	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY); /* XXX */
	3719	VATTR_NULL(vap);
	3720	vap->va_size = 0;
	3721	error = VOP_SETATTR(vp, vap, p->p_ucred);
	3722	if (error)
	3723	goto bad;
	3724	}
	3725
	3726	/*
	3727	* VOP_OPEN needs the file pointer so it can potentially override
	3728	* it.
	3729	*
	3730	* WARNING! no f_nchandle will be associated when fhopen()ing a
	3731	* directory. XXX
	3732	*/
	3733	if ((error = falloc(p, &nfp, &indx)) != 0)
	3734	goto bad;
	3735	fp = nfp;
	3736
	3737	error = VOP_OPEN(vp, fmode, p->p_ucred, fp);
	3738	if (error) {
	3739	/*
	3740	* setting f_ops this way prevents VOP_CLOSE from being
	3741	* called or fdrop() releasing the vp from v_data. Since
	3742	* the VOP_OPEN failed we don't want to VOP_CLOSE.
	3743	*/
	3744	fp->f_ops = &badfileops;
	3745	fp->f_data = NULL;
	3746	goto bad_drop;
	3747	}
	3748
	3749	/*
	3750	* The fp is given its own reference, we still have our ref and lock.
	3751	*
	3752	* Assert that all regular files must be created with a VM object.
	3753	*/
	3754	if (vp->v_type == VREG && vp->v_object == NULL) {
	3755	kprintf("fhopen: regular file did not have VM object: %p\n", vp);
	3756	goto bad_drop;
	3757	}
	3758
	3759	/*
	3760	* The open was successful. Handle any locking requirements.
	3761	*/
	3762	if (fmode & (O_EXLOCK \| O_SHLOCK)) {
	3763	lf.l_whence = SEEK_SET;
	3764	lf.l_start = 0;
	3765	lf.l_len = 0;
	3766	if (fmode & O_EXLOCK)
	3767	lf.l_type = F_WRLCK;
	3768	else
	3769	lf.l_type = F_RDLCK;
	3770	if (fmode & FNONBLOCK)
	3771	type = 0;
	3772	else
	3773	type = F_WAIT;
	3774	vn_unlock(vp);
	3775	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	3776	/*
	3777	* release our private reference.
	3778	*/
	3779	fsetfd(p, NULL, indx);
	3780	fdrop(fp);
	3781	vrele(vp);
	3782	return (error);
	3783	}
	3784	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3785	fp->f_flag \|= FHASLOCK;
	3786	}
	3787
	3788	/*
	3789	* Clean up. Associate the file pointer with the previously
	3790	* reserved descriptor and return it.
	3791	*/
	3792	vput(vp);
	3793	fsetfd(p, fp, indx);
	3794	fdrop(fp);
	3795	uap->sysmsg_result = indx;
	3796	return (0);
	3797
	3798	bad_drop:
	3799	fsetfd(p, NULL, indx);
	3800	fdrop(fp);
	3801	bad:
	3802	vput(vp);
	3803	return (error);
	3804	}
	3805
	3806	/*
	3807	* fhstat_args(struct fhandle u_fhp, struct stat sb)
	3808	*/
	3809	int
	3810	sys_fhstat(struct fhstat_args *uap)
	3811	{
	3812	struct thread *td = curthread;
	3813	struct stat sb;
	3814	fhandle_t fh;
	3815	struct mount *mp;
	3816	struct vnode *vp;
	3817	int error;
	3818
	3819	/*
	3820	* Must be super user
	3821	*/
	3822	error = priv_check(td, PRIV_ROOT);
	3823	if (error)
	3824	return (error);
	3825
	3826	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
	3827	if (error)
	3828	return (error);
	3829
	3830	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3831	return (ESTALE);
	3832	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	3833	return (error);
	3834	error = vn_stat(vp, &sb, td->td_proc->p_ucred);
	3835	vput(vp);
	3836	if (error)
	3837	return (error);
	3838	error = copyout(&sb, uap->sb, sizeof(sb));
	3839	return (error);
	3840	}
	3841
	3842	/*
	3843	* fhstatfs_args(struct fhandle u_fhp, struct statfs buf)
	3844	*/
	3845	int
	3846	sys_fhstatfs(struct fhstatfs_args *uap)
	3847	{
	3848	struct thread *td = curthread;
	3849	struct proc *p = td->td_proc;
	3850	struct statfs *sp;
	3851	struct mount *mp;
	3852	struct vnode *vp;
	3853	struct statfs sb;
	3854	char fullpath, freepath;
	3855	fhandle_t fh;
	3856	int error;
	3857
	3858	/*
	3859	* Must be super user
	3860	*/
	3861	if ((error = priv_check(td, PRIV_ROOT)))
	3862	return (error);
	3863
	3864	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	3865	return (error);
	3866
	3867	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3868	return (ESTALE);
	3869
	3870	if (p != NULL && !chroot_visible_mnt(mp, p))
	3871	return (ESTALE);
	3872
	3873	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	3874	return (error);
	3875	mp = vp->v_mount;
	3876	sp = &mp->mnt_stat;
	3877	vput(vp);
	3878	if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0)
	3879	return (error);
	3880
	3881	error = mount_path(p, mp, &fullpath, &freepath);
	3882	if (error)
	3883	return(error);
	3884	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	3885	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	3886	kfree(freepath, M_TEMP);
	3887
	3888	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	3889	if (priv_check(td, PRIV_ROOT)) {
	3890	bcopy(sp, &sb, sizeof(sb));
	3891	sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
	3892	sp = &sb;
	3893	}
	3894	return (copyout(sp, uap->buf, sizeof(*sp)));
	3895	}
	3896
	3897	/*
	3898	* fhstatvfs_args(struct fhandle u_fhp, struct statvfs buf)
	3899	*/
	3900	int
	3901	sys_fhstatvfs(struct fhstatvfs_args *uap)
	3902	{
	3903	struct thread *td = curthread;
	3904	struct proc *p = td->td_proc;
	3905	struct statvfs *sp;
	3906	struct mount *mp;
	3907	struct vnode *vp;
	3908	fhandle_t fh;
	3909	int error;
	3910
	3911	/*
	3912	* Must be super user
	3913	*/
	3914	if ((error = priv_check(td, PRIV_ROOT)))
	3915	return (error);
	3916
	3917	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	3918	return (error);
	3919
	3920	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3921	return (ESTALE);
	3922
	3923	if (p != NULL && !chroot_visible_mnt(mp, p))
	3924	return (ESTALE);
	3925
	3926	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	3927	return (error);
	3928	mp = vp->v_mount;
	3929	sp = &mp->mnt_vstat;
	3930	vput(vp);
	3931	if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0)
	3932	return (error);
	3933
	3934	sp->f_flag = 0;
	3935	if (mp->mnt_flag & MNT_RDONLY)
	3936	sp->f_flag \|= ST_RDONLY;
	3937	if (mp->mnt_flag & MNT_NOSUID)
	3938	sp->f_flag \|= ST_NOSUID;
	3939
	3940	return (copyout(sp, uap->buf, sizeof(*sp)));
	3941	}
	3942
	3943
	3944	/*
	3945	* Syscall to push extended attribute configuration information into the
	3946	* VFS. Accepts a path, which it converts to a mountpoint, as well as
	3947	* a command (int cmd), and attribute name and misc data. For now, the
	3948	* attribute name is left in userspace for consumption by the VFS_op.
	3949	* It will probably be changed to be copied into sysspace by the
	3950	* syscall in the future, once issues with various consumers of the
	3951	* attribute code have raised their hands.
	3952	*
	3953	* Currently this is used only by UFS Extended Attributes.
	3954	*/
	3955	int
	3956	sys_extattrctl(struct extattrctl_args *uap)
	3957	{
	3958	struct nlookupdata nd;
	3959	struct mount *mp;
	3960	struct vnode *vp;
	3961	int error;
	3962
	3963	vp = NULL;
	3964	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3965	if (error == 0)
	3966	error = nlookup(&nd);
	3967	if (error == 0) {
	3968	mp = nd.nl_nch.mount;
	3969	error = VFS_EXTATTRCTL(mp, uap->cmd,
	3970	uap->attrname, uap->arg,
	3971	nd.nl_cred);
	3972	}
	3973	nlookup_done(&nd);
	3974	return (error);
	3975	}
	3976
	3977	/*
	3978	* Syscall to set a named extended attribute on a file or directory.
	3979	* Accepts attribute name, and a uio structure pointing to the data to set.
	3980	* The uio is consumed in the style of writev(). The real work happens
	3981	* in VOP_SETEXTATTR().
	3982	*/
	3983	int
	3984	sys_extattr_set_file(struct extattr_set_file_args *uap)
	3985	{
	3986	char attrname[EXTATTR_MAXNAMELEN];
	3987	struct iovec aiov[UIO_SMALLIOV];
	3988	struct iovec *needfree;
	3989	struct nlookupdata nd;
	3990	struct iovec *iov;
	3991	struct vnode *vp;
	3992	struct uio auio;
	3993	u_int iovlen;
	3994	u_int cnt;
	3995	int error;
	3996	int i;
	3997
	3998	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3999	if (error)
	4000	return (error);
	4001
	4002	vp = NULL;
	4003	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4004	if (error == 0)
	4005	error = nlookup(&nd);
	4006	if (error == 0)
	4007	error = ncp_writechk(&nd.nl_nch);
	4008	if (error == 0)
	4009	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4010	if (error) {
	4011	nlookup_done(&nd);
	4012	return (error);
	4013	}
	4014
	4015	needfree = NULL;
	4016	iovlen = uap->iovcnt * sizeof(struct iovec);
	4017	if (uap->iovcnt > UIO_SMALLIOV) {
	4018	if (uap->iovcnt > UIO_MAXIOV) {
	4019	error = EINVAL;
	4020	goto done;
	4021	}
	4022	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	4023	needfree = iov;
	4024	} else {
	4025	iov = aiov;
	4026	}
	4027	auio.uio_iov = iov;
	4028	auio.uio_iovcnt = uap->iovcnt;
	4029	auio.uio_rw = UIO_WRITE;
	4030	auio.uio_segflg = UIO_USERSPACE;
	4031	auio.uio_td = nd.nl_td;
	4032	auio.uio_offset = 0;
	4033	if ((error = copyin(uap->iovp, iov, iovlen)))
	4034	goto done;
	4035	auio.uio_resid = 0;
	4036	for (i = 0; i < uap->iovcnt; i++) {
	4037	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	4038	error = EINVAL;
	4039	goto done;
	4040	}
	4041	auio.uio_resid += iov->iov_len;
	4042	iov++;
	4043	}
	4044	cnt = auio.uio_resid;
	4045	error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	4046	cnt -= auio.uio_resid;
	4047	uap->sysmsg_result = cnt;
	4048	done:
	4049	vput(vp);
	4050	nlookup_done(&nd);
	4051	if (needfree)
	4052	FREE(needfree, M_IOV);
	4053	return (error);
	4054	}
	4055
	4056	/*
	4057	* Syscall to get a named extended attribute on a file or directory.
	4058	* Accepts attribute name, and a uio structure pointing to a buffer for the
	4059	* data. The uio is consumed in the style of readv(). The real work
	4060	* happens in VOP_GETEXTATTR();
	4061	*/
	4062	int
	4063	sys_extattr_get_file(struct extattr_get_file_args *uap)
	4064	{
	4065	char attrname[EXTATTR_MAXNAMELEN];
	4066	struct iovec aiov[UIO_SMALLIOV];
	4067	struct iovec *needfree;
	4068	struct nlookupdata nd;
	4069	struct iovec *iov;
	4070	struct vnode *vp;
	4071	struct uio auio;
	4072	u_int iovlen;
	4073	u_int cnt;
	4074	int error;
	4075	int i;
	4076
	4077	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	4078	if (error)
	4079	return (error);
	4080
	4081	vp = NULL;
	4082	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4083	if (error == 0)
	4084	error = nlookup(&nd);
	4085	if (error == 0)
	4086	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4087	if (error) {
	4088	nlookup_done(&nd);
	4089	return (error);
	4090	}
	4091
	4092	iovlen = uap->iovcnt * sizeof (struct iovec);
	4093	needfree = NULL;
	4094	if (uap->iovcnt > UIO_SMALLIOV) {
	4095	if (uap->iovcnt > UIO_MAXIOV) {
	4096	error = EINVAL;
	4097	goto done;
	4098	}
	4099	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	4100	needfree = iov;
	4101	} else {
	4102	iov = aiov;
	4103	}
	4104	auio.uio_iov = iov;
	4105	auio.uio_iovcnt = uap->iovcnt;
	4106	auio.uio_rw = UIO_READ;
	4107	auio.uio_segflg = UIO_USERSPACE;
	4108	auio.uio_td = nd.nl_td;
	4109	auio.uio_offset = 0;
	4110	if ((error = copyin(uap->iovp, iov, iovlen)))
	4111	goto done;
	4112	auio.uio_resid = 0;
	4113	for (i = 0; i < uap->iovcnt; i++) {
	4114	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	4115	error = EINVAL;
	4116	goto done;
	4117	}
	4118	auio.uio_resid += iov->iov_len;
	4119	iov++;
	4120	}
	4121	cnt = auio.uio_resid;
	4122	error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	4123	cnt -= auio.uio_resid;
	4124	uap->sysmsg_result = cnt;
	4125	done:
	4126	vput(vp);
	4127	nlookup_done(&nd);
	4128	if (needfree)
	4129	FREE(needfree, M_IOV);
	4130	return(error);
	4131	}
	4132
	4133	/*
	4134	* Syscall to delete a named extended attribute from a file or directory.
	4135	* Accepts attribute name. The real work happens in VOP_SETEXTATTR().
	4136	*/
	4137	int
	4138	sys_extattr_delete_file(struct extattr_delete_file_args *uap)
	4139	{
	4140	char attrname[EXTATTR_MAXNAMELEN];
	4141	struct nlookupdata nd;
	4142	struct vnode *vp;
	4143	int error;
	4144
	4145	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	4146	if (error)
	4147	return(error);
	4148
	4149	vp = NULL;
	4150	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4151	if (error == 0)
	4152	error = nlookup(&nd);
	4153	if (error == 0)
	4154	error = ncp_writechk(&nd.nl_nch);
	4155	if (error == 0)
	4156	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4157	if (error) {
	4158	nlookup_done(&nd);
	4159	return (error);
	4160	}
	4161
	4162	error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred);
	4163	vput(vp);
	4164	nlookup_done(&nd);
	4165	return(error);
	4166	}
	4167
	4168	/*
	4169	* Determine if the mount is visible to the process.
	4170	*/
	4171	static int
	4172	chroot_visible_mnt(struct mount mp, struct proc p)
	4173	{
	4174	struct nchandle nch;
	4175
	4176	/*
	4177	* Traverse from the mount point upwards. If we hit the process
	4178	* root then the mount point is visible to the process.
	4179	*/
	4180	nch = mp->mnt_ncmountpt;
	4181	while (nch.ncp) {
	4182	if (nch.mount == p->p_fd->fd_nrdir.mount &&
	4183	nch.ncp == p->p_fd->fd_nrdir.ncp) {
	4184	return(1);
	4185	}
	4186	if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
	4187	nch = nch.mount->mnt_ncmounton;
	4188	} else {
	4189	nch.ncp = nch.ncp->nc_parent;
	4190	}
	4191	}
	4192
	4193	/*
	4194	* If the mount point is not visible to the process, but the
	4195	* process root is in a subdirectory of the mount, return
	4196	* TRUE anyway.
	4197	*/
	4198	if (p->p_fd->fd_nrdir.mount == mp)
	4199	return(1);
	4200
	4201	return(0);
	4202	}
	4203