gitweb.dragonflybsd.org Git - games.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
	39	* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.127 2008/05/18 05:54:25 dillon Exp $
	41	*/
	42
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
	45	#include <sys/buf.h>
	46	#include <sys/conf.h>
	47	#include <sys/sysent.h>
	48	#include <sys/malloc.h>
	49	#include <sys/mount.h>
	50	#include <sys/mountctl.h>
	51	#include <sys/sysproto.h>
	52	#include <sys/filedesc.h>
	53	#include <sys/kernel.h>
	54	#include <sys/fcntl.h>
	55	#include <sys/file.h>
	56	#include <sys/linker.h>
	57	#include <sys/stat.h>
	58	#include <sys/unistd.h>
	59	#include <sys/vnode.h>
	60	#include <sys/proc.h>
	61	#include <sys/namei.h>
	62	#include <sys/nlookup.h>
	63	#include <sys/dirent.h>
	64	#include <sys/extattr.h>
	65	#include <sys/spinlock.h>
	66	#include <sys/kern_syscall.h>
	67	#include <sys/objcache.h>
	68	#include <sys/sysctl.h>
	69
	70	#include <sys/buf2.h>
	71	#include <sys/file2.h>
	72	#include <sys/spinlock2.h>
	73
	74	#include <vm/vm.h>
	75	#include <vm/vm_object.h>
	76	#include <vm/vm_page.h>
	77
	78	#include <machine/limits.h>
	79	#include <machine/stdarg.h>
	80
	81	#include <vfs/union/union.h>
	82
	83	static void mount_warning(struct mount mp, const char ctl, ...);
	84	static int mount_path(struct proc p, struct mount mp, char rb, char fb);
	85	static int checkvp_chdir (struct vnode vn, struct thread td);
	86	static void checkdirs (struct nchandle old_nch, struct nchandle new_nch);
	87	static int chroot_refuse_vdir_fds (struct filedesc *fdp);
	88	static int chroot_visible_mnt(struct mount mp, struct proc p);
	89	static int getutimes (const struct timeval , struct timespec );
	90	static int setfown (struct vnode *, uid_t, gid_t);
	91	static int setfmode (struct vnode *, int);
	92	static int setfflags (struct vnode *, int);
	93	static int setutimes (struct vnode , const struct timespec , int);
	94	static int usermount = 0; /* if 1, non-root can mount fs. */
	95
	96	int (union_dircheckp) (struct thread , struct vnode *, struct file );
	97
	98	SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
	99
	100	/*
	101	* Virtual File System System Calls
	102	*/
	103
	104	/*
	105	* Mount a file system.
	106	*/
	107	/*
	108	* mount_args(char type, char path, int flags, caddr_t data)
	109	*/
	110	/* ARGSUSED */
	111	int
	112	sys_mount(struct mount_args *uap)
	113	{
	114	struct thread *td = curthread;
	115	struct proc *p = td->td_proc;
	116	struct vnode *vp;
	117	struct nchandle nch;
	118	struct mount *mp;
	119	struct vfsconf *vfsp;
	120	int error, flag = 0, flag2 = 0;
	121	int hasmount;
	122	struct vattr va;
	123	struct nlookupdata nd;
	124	char fstypename[MFSNAMELEN];
	125	struct ucred *cred = p->p_ucred;
	126
	127	KKASSERT(p);
	128	if (cred->cr_prison != NULL)
	129	return (EPERM);
	130	if (usermount == 0 && (error = suser(td)))
	131	return (error);
	132	/*
	133	* Do not allow NFS export by non-root users.
	134	*/
	135	if (uap->flags & MNT_EXPORTED) {
	136	error = suser(td);
	137	if (error)
	138	return (error);
	139	}
	140	/*
	141	* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
	142	*/
	143	if (suser(td))
	144	uap->flags \|= MNT_NOSUID \| MNT_NODEV;
	145
	146	/*
	147	* Lookup the requested path and extract the nch and vnode.
	148	*/
	149	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	150	if (error == 0) {
	151	if ((error = nlookup(&nd)) == 0) {
	152	if (nd.nl_nch.ncp->nc_vp == NULL)
	153	error = ENOENT;
	154	}
	155	}
	156	if (error) {
	157	nlookup_done(&nd);
	158	return (error);
	159	}
	160
	161	/*
	162	* Extract the locked+refd ncp and cleanup the nd structure
	163	*/
	164	nch = nd.nl_nch;
	165	cache_zero(&nd.nl_nch);
	166	nlookup_done(&nd);
	167
	168	if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch))
	169	hasmount = 1;
	170	else
	171	hasmount = 0;
	172
	173
	174	/*
	175	* now we have the locked ref'd nch and unreferenced vnode.
	176	*/
	177	vp = nch.ncp->nc_vp;
	178	if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
	179	cache_put(&nch);
	180	return (error);
	181	}
	182	cache_unlock(&nch);
	183
	184	/*
	185	* Now we have an unlocked ref'd nch and a locked ref'd vp
	186	*/
	187	if (uap->flags & MNT_UPDATE) {
	188	if ((vp->v_flag & VROOT) == 0) {
	189	cache_drop(&nch);
	190	vput(vp);
	191	return (EINVAL);
	192	}
	193	mp = vp->v_mount;
	194	flag = mp->mnt_flag;
	195	flag2 = mp->mnt_kern_flag;
	196	/*
	197	* We only allow the filesystem to be reloaded if it
	198	* is currently mounted read-only.
	199	*/
	200	if ((uap->flags & MNT_RELOAD) &&
	201	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	202	cache_drop(&nch);
	203	vput(vp);
	204	return (EOPNOTSUPP); /* Needs translation */
	205	}
	206	/*
	207	* Only root, or the user that did the original mount is
	208	* permitted to update it.
	209	*/
	210	if (mp->mnt_stat.f_owner != cred->cr_uid &&
	211	(error = suser(td))) {
	212	cache_drop(&nch);
	213	vput(vp);
	214	return (error);
	215	}
	216	if (vfs_busy(mp, LK_NOWAIT)) {
	217	cache_drop(&nch);
	218	vput(vp);
	219	return (EBUSY);
	220	}
	221	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	222	cache_drop(&nch);
	223	vfs_unbusy(mp);
	224	vput(vp);
	225	return (EBUSY);
	226	}
	227	vp->v_flag \|= VMOUNT;
	228	mp->mnt_flag \|=
	229	uap->flags & (MNT_RELOAD \| MNT_FORCE \| MNT_UPDATE);
	230	vn_unlock(vp);
	231	goto update;
	232	}
	233	/*
	234	* If the user is not root, ensure that they own the directory
	235	* onto which we are attempting to mount.
	236	*/
	237	if ((error = VOP_GETATTR(vp, &va)) \|\|
	238	(va.va_uid != cred->cr_uid && (error = suser(td)))) {
	239	cache_drop(&nch);
	240	vput(vp);
	241	return (error);
	242	}
	243	if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
	244	cache_drop(&nch);
	245	vput(vp);
	246	return (error);
	247	}
	248	if (vp->v_type != VDIR) {
	249	cache_drop(&nch);
	250	vput(vp);
	251	return (ENOTDIR);
	252	}
	253	if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
	254	cache_drop(&nch);
	255	vput(vp);
	256	return (EPERM);
	257	}
	258	if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
	259	cache_drop(&nch);
	260	vput(vp);
	261	return (error);
	262	}
	263	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	264	if (!strcmp(vfsp->vfc_name, fstypename))
	265	break;
	266	}
	267	if (vfsp == NULL) {
	268	linker_file_t lf;
	269
	270	/* Only load modules for root (very important!) */
	271	if ((error = suser(td)) != 0) {
	272	cache_drop(&nch);
	273	vput(vp);
	274	return error;
	275	}
	276	error = linker_load_file(fstypename, &lf);
	277	if (error \|\| lf == NULL) {
	278	cache_drop(&nch);
	279	vput(vp);
	280	if (lf == NULL)
	281	error = ENODEV;
	282	return error;
	283	}
	284	lf->userrefs++;
	285	/* lookup again, see if the VFS was loaded */
	286	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
	287	if (!strcmp(vfsp->vfc_name, fstypename))
	288	break;
	289	}
	290	if (vfsp == NULL) {
	291	lf->userrefs--;
	292	linker_file_unload(lf);
	293	cache_drop(&nch);
	294	vput(vp);
	295	return (ENODEV);
	296	}
	297	}
	298	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	299	cache_drop(&nch);
	300	vput(vp);
	301	return (EBUSY);
	302	}
	303	vp->v_flag \|= VMOUNT;
	304
	305	/*
	306	* Allocate and initialize the filesystem.
	307	*/
	308	mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO\|M_WAITOK);
	309	TAILQ_INIT(&mp->mnt_nvnodelist);
	310	TAILQ_INIT(&mp->mnt_reservedvnlist);
	311	TAILQ_INIT(&mp->mnt_jlist);
	312	mp->mnt_nvnodelistsize = 0;
	313	lockinit(&mp->mnt_lock, "vfslock", 0, 0);
	314	vfs_busy(mp, LK_NOWAIT);
	315	mp->mnt_op = vfsp->vfc_vfsops;
	316	mp->mnt_vfc = vfsp;
	317	vfsp->vfc_refcount++;
	318	mp->mnt_stat.f_type = vfsp->vfc_typenum;
	319	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	320	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
	321	mp->mnt_stat.f_owner = cred->cr_uid;
	322	mp->mnt_iosize_max = DFLTPHYS;
	323	vn_unlock(vp);
	324	update:
	325	/*
	326	* Set the mount level flags.
	327	*/
	328	if (uap->flags & MNT_RDONLY)
	329	mp->mnt_flag \|= MNT_RDONLY;
	330	else if (mp->mnt_flag & MNT_RDONLY)
	331	mp->mnt_kern_flag \|= MNTK_WANTRDWR;
	332	mp->mnt_flag &=~ (MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	333	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_NOATIME \|
	334	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	335	MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	336	mp->mnt_flag \|= uap->flags & (MNT_NOSUID \| MNT_NOEXEC \|
	337	MNT_NODEV \| MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_FORCE \|
	338	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	339	MNT_NOATIME \| MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	340	/*
	341	* Mount the filesystem.
	342	* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
	343	* get.
	344	*/
	345	error = VFS_MOUNT(mp, uap->path, uap->data, cred);
	346	if (mp->mnt_flag & MNT_UPDATE) {
	347	if (mp->mnt_kern_flag & MNTK_WANTRDWR)
	348	mp->mnt_flag &= ~MNT_RDONLY;
	349	mp->mnt_flag &=~ (MNT_UPDATE \| MNT_RELOAD \| MNT_FORCE);
	350	mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
	351	if (error) {
	352	mp->mnt_flag = flag;
	353	mp->mnt_kern_flag = flag2;
	354	}
	355	vfs_unbusy(mp);
	356	vp->v_flag &= ~VMOUNT;
	357	vrele(vp);
	358	cache_drop(&nch);
	359	return (error);
	360	}
	361	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	362	/*
	363	* Put the new filesystem on the mount list after root. The mount
	364	* point gets its own mnt_ncmountpt (unless the VFS already set one
	365	* up) which represents the root of the mount. The lookup code
	366	* detects the mount point going forward and checks the root of
	367	* the mount going backwards.
	368	*
	369	* It is not necessary to invalidate or purge the vnode underneath
	370	* because elements under the mount will be given their own glue
	371	* namecache record.
	372	*/
	373	if (!error) {
	374	if (mp->mnt_ncmountpt.ncp == NULL) {
	375	/*
	376	* allocate, then unlock, but leave the ref intact
	377	*/
	378	cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
	379	cache_unlock(&mp->mnt_ncmountpt);
	380	}
	381	mp->mnt_ncmounton = nch; /* inherits ref */
	382	nch.ncp->nc_flag \|= NCF_ISMOUNTPT;
	383
	384	/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
	385	vp->v_flag &= ~VMOUNT;
	386	mountlist_insert(mp, MNTINS_LAST);
	387	checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
	388	vn_unlock(vp);
	389	error = vfs_allocate_syncvnode(mp);
	390	vfs_unbusy(mp);
	391	error = VFS_START(mp, 0);
	392	vrele(vp);
	393	} else {
	394	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	395	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	396	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	397	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	398	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	399	vp->v_flag &= ~VMOUNT;
	400	mp->mnt_vfc->vfc_refcount--;
	401	vfs_unbusy(mp);
	402	kfree(mp, M_MOUNT);
	403	cache_drop(&nch);
	404	vput(vp);
	405	}
	406	return (error);
	407	}
	408
	409	/*
	410	* Scan all active processes to see if any of them have a current
	411	* or root directory onto which the new filesystem has just been
	412	* mounted. If so, replace them with the new mount point.
	413	*
	414	* The passed ncp is ref'd and locked (from the mount code) and
	415	* must be associated with the vnode representing the root of the
	416	* mount point.
	417	*/
	418	struct checkdirs_info {
	419	struct nchandle old_nch;
	420	struct nchandle new_nch;
	421	struct vnode *old_vp;
	422	struct vnode *new_vp;
	423	};
	424
	425	static int checkdirs_callback(struct proc p, void data);
	426
	427	static void
	428	checkdirs(struct nchandle old_nch, struct nchandle new_nch)
	429	{
	430	struct checkdirs_info info;
	431	struct vnode *olddp;
	432	struct vnode *newdp;
	433	struct mount *mp;
	434
	435	/*
	436	* If the old mount point's vnode has a usecount of 1, it is not
	437	* being held as a descriptor anywhere.
	438	*/
	439	olddp = old_nch->ncp->nc_vp;
	440	if (olddp == NULL \|\| olddp->v_sysref.refcnt == 1)
	441	return;
	442
	443	/*
	444	* Force the root vnode of the new mount point to be resolved
	445	* so we can update any matching processes.
	446	*/
	447	mp = new_nch->mount;
	448	if (VFS_ROOT(mp, &newdp))
	449	panic("mount: lost mount");
	450	cache_setunresolved(new_nch);
	451	cache_setvp(new_nch, newdp);
	452
	453	/*
	454	* Special handling of the root node
	455	*/
	456	if (rootvnode == olddp) {
	457	vref(newdp);
	458	vfs_cache_setroot(newdp, cache_hold(new_nch));
	459	}
	460
	461	/*
	462	* Pass newdp separately so the callback does not have to access
	463	* it via new_nch->ncp->nc_vp.
	464	*/
	465	info.old_nch = *old_nch;
	466	info.new_nch = *new_nch;
	467	info.new_vp = newdp;
	468	allproc_scan(checkdirs_callback, &info);
	469	vput(newdp);
	470	}
	471
	472	/*
	473	* NOTE: callback is not MP safe because the scanned process's filedesc
	474	* structure can be ripped out from under us, amoung other things.
	475	*/
	476	static int
	477	checkdirs_callback(struct proc p, void data)
	478	{
	479	struct checkdirs_info *info = data;
	480	struct filedesc *fdp;
	481	struct nchandle ncdrop1;
	482	struct nchandle ncdrop2;
	483	struct vnode *vprele1;
	484	struct vnode *vprele2;
	485
	486	if ((fdp = p->p_fd) != NULL) {
	487	cache_zero(&ncdrop1);
	488	cache_zero(&ncdrop2);
	489	vprele1 = NULL;
	490	vprele2 = NULL;
	491
	492	/*
	493	* MPUNSAFE - XXX fdp can be pulled out from under a
	494	* foreign process.
	495	*
	496	* A shared filedesc is ok, we don't have to copy it
	497	* because we are making this change globally.
	498	*/
	499	spin_lock_wr(&fdp->fd_spin);
	500	if (fdp->fd_ncdir.mount == info->old_nch.mount &&
	501	fdp->fd_ncdir.ncp == info->old_nch.ncp) {
	502	vprele1 = fdp->fd_cdir;
	503	vref(info->new_vp);
	504	fdp->fd_cdir = info->new_vp;
	505	ncdrop1 = fdp->fd_ncdir;
	506	cache_copy(&info->new_nch, &fdp->fd_ncdir);
	507	}
	508	if (fdp->fd_nrdir.mount == info->old_nch.mount &&
	509	fdp->fd_nrdir.ncp == info->old_nch.ncp) {
	510	vprele2 = fdp->fd_rdir;
	511	vref(info->new_vp);
	512	fdp->fd_rdir = info->new_vp;
	513	ncdrop2 = fdp->fd_nrdir;
	514	cache_copy(&info->new_nch, &fdp->fd_nrdir);
	515	}
	516	spin_unlock_wr(&fdp->fd_spin);
	517	if (ncdrop1.ncp)
	518	cache_drop(&ncdrop1);
	519	if (ncdrop2.ncp)
	520	cache_drop(&ncdrop2);
	521	if (vprele1)
	522	vrele(vprele1);
	523	if (vprele2)
	524	vrele(vprele2);
	525	}
	526	return(0);
	527	}
	528
	529	/*
	530	* Unmount a file system.
	531	*
	532	* Note: unmount takes a path to the vnode mounted on as argument,
	533	* not special file (as before).
	534	*/
	535	/*
	536	* umount_args(char *path, int flags)
	537	*/
	538	/* ARGSUSED */
	539	int
	540	sys_unmount(struct unmount_args *uap)
	541	{
	542	struct thread *td = curthread;
	543	struct proc *p = td->td_proc;
	544	struct mount *mp = NULL;
	545	int error;
	546	struct nlookupdata nd;
	547
	548	KKASSERT(p);
	549	if (p->p_ucred->cr_prison != NULL)
	550	return (EPERM);
	551	if (usermount == 0 && (error = suser(td)))
	552	return (error);
	553
	554	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	555	if (error == 0)
	556	error = nlookup(&nd);
	557	if (error)
	558	goto out;
	559
	560	mp = nd.nl_nch.mount;
	561
	562	/*
	563	* Only root, or the user that did the original mount is
	564	* permitted to unmount this filesystem.
	565	*/
	566	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
	567	(error = suser(td)))
	568	goto out;
	569
	570	/*
	571	* Don't allow unmounting the root file system.
	572	*/
	573	if (mp->mnt_flag & MNT_ROOTFS) {
	574	error = EINVAL;
	575	goto out;
	576	}
	577
	578	/*
	579	* Must be the root of the filesystem
	580	*/
	581	if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
	582	error = EINVAL;
	583	goto out;
	584	}
	585
	586	out:
	587	nlookup_done(&nd);
	588	if (error)
	589	return (error);
	590	return (dounmount(mp, uap->flags));
	591	}
	592
	593	/*
	594	* Do the actual file system unmount.
	595	*/
	596	static int
	597	dounmount_interlock(struct mount *mp)
	598	{
	599	if (mp->mnt_kern_flag & MNTK_UNMOUNT)
	600	return (EBUSY);
	601	mp->mnt_kern_flag \|= MNTK_UNMOUNT;
	602	return(0);
	603	}
	604
	605	int
	606	dounmount(struct mount *mp, int flags)
	607	{
	608	struct namecache *ncp;
	609	struct nchandle nch;
	610	struct vnode *vp;
	611	int error;
	612	int async_flag;
	613	int lflags;
	614	int freeok = 1;
	615
	616	/*
	617	* Exclusive access for unmounting purposes
	618	*/
	619	if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
	620	return (error);
	621
	622	/*
	623	* Allow filesystems to detect that a forced unmount is in progress.
	624	*/
	625	if (flags & MNT_FORCE)
	626	mp->mnt_kern_flag \|= MNTK_UNMOUNTF;
	627	lflags = LK_EXCLUSIVE \| ((flags & MNT_FORCE) ? 0 : LK_NOWAIT);
	628	error = lockmgr(&mp->mnt_lock, lflags);
	629	if (error) {
	630	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	631	if (mp->mnt_kern_flag & MNTK_MWAIT)
	632	wakeup(mp);
	633	return (error);
	634	}
	635
	636	if (mp->mnt_flag & MNT_EXPUBLIC)
	637	vfs_setpublicfs(NULL, NULL, NULL);
	638
	639	vfs_msync(mp, MNT_WAIT);
	640	async_flag = mp->mnt_flag & MNT_ASYNC;
	641	mp->mnt_flag &=~ MNT_ASYNC;
	642
	643	/*
	644	* If this filesystem isn't aliasing other filesystems,
	645	* try to invalidate any remaining namecache entries and
	646	* check the count afterwords.
	647	*/
	648	if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
	649	cache_lock(&mp->mnt_ncmountpt);
	650	cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY\|CINV_CHILDREN);
	651	cache_unlock(&mp->mnt_ncmountpt);
	652
	653	if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
	654	(ncp->nc_refs != 1 \|\| TAILQ_FIRST(&ncp->nc_list))) {
	655
	656	if ((flags & MNT_FORCE) == 0) {
	657	error = EBUSY;
	658	mount_warning(mp, "Cannot unmount: "
	659	"%d namecache "
	660	"references still "
	661	"present",
	662	ncp->nc_refs - 1);
	663	} else {
	664	mount_warning(mp, "Forced unmount: "
	665	"%d namecache "
	666	"references still "
	667	"present",
	668	ncp->nc_refs - 1);
	669	freeok = 0;
	670	}
	671	}
	672	}
	673
	674	/*
	675	* nchandle records ref the mount structure. Expect a count of 1
	676	* (our mount->mnt_ncmountpt).
	677	*/
	678	if (mp->mnt_refs != 1) {
	679	if ((flags & MNT_FORCE) == 0) {
	680	mount_warning(mp, "Cannot unmount: "
	681	"%d process references still "
	682	"present", mp->mnt_refs);
	683	error = EBUSY;
	684	} else {
	685	mount_warning(mp, "Forced unmount: "
	686	"%d process references still "
	687	"present", mp->mnt_refs);
	688	freeok = 0;
	689	}
	690	}
	691
	692	/*
	693	* Decomission our special mnt_syncer vnode. This also stops
	694	* the vnlru code. If we are unable to unmount we recommission
	695	* the vnode.
	696	*/
	697	if (error == 0) {
	698	if ((vp = mp->mnt_syncer) != NULL) {
	699	mp->mnt_syncer = NULL;
	700	vrele(vp);
	701	}
	702	if (((mp->mnt_flag & MNT_RDONLY) \|\|
	703	(error = VFS_SYNC(mp, MNT_WAIT)) == 0) \|\|
	704	(flags & MNT_FORCE)) {
	705	error = VFS_UNMOUNT(mp, flags);
	706	}
	707	}
	708	if (error) {
	709	if (mp->mnt_syncer == NULL)
	710	vfs_allocate_syncvnode(mp);
	711	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	712	mp->mnt_flag \|= async_flag;
	713	lockmgr(&mp->mnt_lock, LK_RELEASE);
	714	if (mp->mnt_kern_flag & MNTK_MWAIT)
	715	wakeup(mp);
	716	return (error);
	717	}
	718	/*
	719	* Clean up any journals still associated with the mount after
	720	* filesystem activity has ceased.
	721	*/
	722	journal_remove_all_journals(mp,
	723	((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
	724
	725	mountlist_remove(mp);
	726
	727	/*
	728	* Remove any installed vnode ops here so the individual VFSs don't
	729	* have to.
	730	*/
	731	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	732	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	733	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	734	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	735	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	736
	737	if (mp->mnt_ncmountpt.ncp != NULL) {
	738	nch = mp->mnt_ncmountpt;
	739	cache_zero(&mp->mnt_ncmountpt);
	740	cache_clrmountpt(&nch);
	741	cache_drop(&nch);
	742	}
	743	if (mp->mnt_ncmounton.ncp != NULL) {
	744	nch = mp->mnt_ncmounton;
	745	cache_zero(&mp->mnt_ncmounton);
	746	cache_clrmountpt(&nch);
	747	cache_drop(&nch);
	748	}
	749
	750	mp->mnt_vfc->vfc_refcount--;
	751	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
	752	panic("unmount: dangling vnode");
	753	lockmgr(&mp->mnt_lock, LK_RELEASE);
	754	if (mp->mnt_kern_flag & MNTK_MWAIT)
	755	wakeup(mp);
	756	if (freeok)
	757	kfree(mp, M_MOUNT);
	758	return (0);
	759	}
	760
	761	static
	762	void
	763	mount_warning(struct mount mp, const char ctl, ...)
	764	{
	765	char *ptr;
	766	char *buf;
	767	__va_list va;
	768
	769	__va_start(va, ctl);
	770	if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) {
	771	kprintf("unmount(%s): ", ptr);
	772	kvprintf(ctl, va);
	773	kprintf("\n");
	774	kfree(buf, M_TEMP);
	775	} else {
	776	kprintf("unmount(%p", mp);
	777	if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
	778	kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
	779	kprintf("): ");
	780	kvprintf(ctl, va);
	781	kprintf("\n");
	782	}
	783	__va_end(va);
	784	}
	785
	786	/*
	787	* Shim cache_fullpath() to handle the case where a process is chrooted into
	788	* a subdirectory of a mount. In this case if the root mount matches the
	789	* process root directory's mount we have to specify the process's root
	790	* directory instead of the mount point, because the mount point might
	791	* be above the root directory.
	792	*/
	793	static
	794	int
	795	mount_path(struct proc p, struct mount mp, char rb, char fb)
	796	{
	797	struct nchandle *nch;
	798
	799	if (p && p->p_fd->fd_nrdir.mount == mp)
	800	nch = &p->p_fd->fd_nrdir;
	801	else
	802	nch = &mp->mnt_ncmountpt;
	803	return(cache_fullpath(p, nch, rb, fb));
	804	}
	805
	806	/*
	807	* Sync each mounted filesystem.
	808	*/
	809
	810	#ifdef DEBUG
	811	static int syncprt = 0;
	812	SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
	813	#endif /* DEBUG */
	814
	815	static int sync_callback(struct mount mp, void data);
	816
	817	/* ARGSUSED */
	818	int
	819	sys_sync(struct sync_args *uap)
	820	{
	821	mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
	822	#ifdef DEBUG
	823	/*
	824	* print out buffer pool stat information on each sync() call.
	825	*/
	826	if (syncprt)
	827	vfs_bufstats();
	828	#endif /* DEBUG */
	829	return (0);
	830	}
	831
	832	static
	833	int
	834	sync_callback(struct mount mp, void data __unused)
	835	{
	836	int asyncflag;
	837
	838	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	839	asyncflag = mp->mnt_flag & MNT_ASYNC;
	840	mp->mnt_flag &= ~MNT_ASYNC;
	841	vfs_msync(mp, MNT_NOWAIT);
	842	VFS_SYNC(mp, MNT_NOWAIT);
	843	mp->mnt_flag \|= asyncflag;
	844	}
	845	return(0);
	846	}
	847
	848	/* XXX PRISON: could be per prison flag */
	849	static int prison_quotas;
	850	#if 0
	851	SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
	852	#endif
	853
	854	/*
	855	* quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
	856	*
	857	* Change filesystem quotas.
	858	*/
	859	/* ARGSUSED */
	860	int
	861	sys_quotactl(struct quotactl_args *uap)
	862	{
	863	struct nlookupdata nd;
	864	struct thread *td;
	865	struct proc *p;
	866	struct mount *mp;
	867	int error;
	868
	869	td = curthread;
	870	p = td->td_proc;
	871	if (p->p_ucred->cr_prison && !prison_quotas)
	872	return (EPERM);
	873
	874	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	875	if (error == 0)
	876	error = nlookup(&nd);
	877	if (error == 0) {
	878	mp = nd.nl_nch.mount;
	879	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
	880	uap->arg, nd.nl_cred);
	881	}
	882	nlookup_done(&nd);
	883	return (error);
	884	}
	885
	886	/*
	887	* mountctl(char path, int op, int fd, const void ctl, int ctllen,
	888	* void *buf, int buflen)
	889	*
	890	* This function operates on a mount point and executes the specified
	891	* operation using the specified control data, and possibly returns data.
	892	*
	893	* The actual number of bytes stored in the result buffer is returned, 0
	894	* if none, otherwise an error is returned.
	895	*/
	896	/* ARGSUSED */
	897	int
	898	sys_mountctl(struct mountctl_args *uap)
	899	{
	900	struct thread *td = curthread;
	901	struct proc *p = td->td_proc;
	902	struct file *fp;
	903	void *ctl = NULL;
	904	void *buf = NULL;
	905	char *path = NULL;
	906	int error;
	907
	908	/*
	909	* Sanity and permissions checks. We must be root.
	910	*/
	911	KKASSERT(p);
	912	if (p->p_ucred->cr_prison != NULL)
	913	return (EPERM);
	914	if ((error = suser(td)) != 0)
	915	return (error);
	916
	917	/*
	918	* Argument length checks
	919	*/
	920	if (uap->ctllen < 0 \|\| uap->ctllen > 1024)
	921	return (EINVAL);
	922	if (uap->buflen < 0 \|\| uap->buflen > 16 * 1024)
	923	return (EINVAL);
	924	if (uap->path == NULL)
	925	return (EINVAL);
	926
	927	/*
	928	* Allocate the necessary buffers and copyin data
	929	*/
	930	path = objcache_get(namei_oc, M_WAITOK);
	931	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	932	if (error)
	933	goto done;
	934
	935	if (uap->ctllen) {
	936	ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	937	error = copyin(uap->ctl, ctl, uap->ctllen);
	938	if (error)
	939	goto done;
	940	}
	941	if (uap->buflen)
	942	buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	943
	944	/*
	945	* Validate the descriptor
	946	*/
	947	if (uap->fd >= 0) {
	948	fp = holdfp(p->p_fd, uap->fd, -1);
	949	if (fp == NULL) {
	950	error = EBADF;
	951	goto done;
	952	}
	953	} else {
	954	fp = NULL;
	955	}
	956
	957	/*
	958	* Execute the internal kernel function and clean up.
	959	*/
	960	error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
	961	if (fp)
	962	fdrop(fp);
	963	if (error == 0 && uap->sysmsg_result > 0)
	964	error = copyout(buf, uap->buf, uap->sysmsg_result);
	965	done:
	966	if (path)
	967	objcache_put(namei_oc, path);
	968	if (ctl)
	969	kfree(ctl, M_TEMP);
	970	if (buf)
	971	kfree(buf, M_TEMP);
	972	return (error);
	973	}
	974
	975	/*
	976	* Execute a mount control operation by resolving the path to a mount point
	977	* and calling vop_mountctl().
	978	*/
	979	int
	980	kern_mountctl(const char path, int op, struct file fp,
	981	const void *ctl, int ctllen,
	982	void buf, int buflen, int res)
	983	{
	984	struct vnode *vp;
	985	struct mount *mp;
	986	struct nlookupdata nd;
	987	int error;
	988
	989	*res = 0;
	990	vp = NULL;
	991	error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
	992	if (error == 0)
	993	error = nlookup(&nd);
	994	if (error == 0)
	995	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	996	nlookup_done(&nd);
	997	if (error)
	998	return (error);
	999
	1000	mp = vp->v_mount;
	1001
	1002	/*
	1003	* Must be the root of the filesystem
	1004	*/
	1005	if ((vp->v_flag & VROOT) == 0) {
	1006	vput(vp);
	1007	return (EINVAL);
	1008	}
	1009	error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen,
	1010	buf, buflen, res);
	1011	vput(vp);
	1012	return (error);
	1013	}
	1014
	1015	int
	1016	kern_statfs(struct nlookupdata nd, struct statfs buf)
	1017	{
	1018	struct thread *td = curthread;
	1019	struct proc *p = td->td_proc;
	1020	struct mount *mp;
	1021	struct statfs *sp;
	1022	char fullpath, freepath;
	1023	int error;
	1024
	1025	if ((error = nlookup(nd)) != 0)
	1026	return (error);
	1027	mp = nd->nl_nch.mount;
	1028	sp = &mp->mnt_stat;
	1029	if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
	1030	return (error);
	1031
	1032	error = mount_path(p, mp, &fullpath, &freepath);
	1033	if (error)
	1034	return(error);
	1035	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1036	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1037	kfree(freepath, M_TEMP);
	1038
	1039	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1040	bcopy(sp, buf, sizeof(*buf));
	1041	/* Only root should have access to the fsid's. */
	1042	if (suser(td))
	1043	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1044	return (0);
	1045	}
	1046
	1047	/*
	1048	* statfs_args(char path, struct statfs buf)
	1049	*
	1050	* Get filesystem statistics.
	1051	*/
	1052	int
	1053	sys_statfs(struct statfs_args *uap)
	1054	{
	1055	struct nlookupdata nd;
	1056	struct statfs buf;
	1057	int error;
	1058
	1059	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1060	if (error == 0)
	1061	error = kern_statfs(&nd, &buf);
	1062	nlookup_done(&nd);
	1063	if (error == 0)
	1064	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1065	return (error);
	1066	}
	1067
	1068	int
	1069	kern_fstatfs(int fd, struct statfs *buf)
	1070	{
	1071	struct thread *td = curthread;
	1072	struct proc *p = td->td_proc;
	1073	struct file *fp;
	1074	struct mount *mp;
	1075	struct statfs *sp;
	1076	char fullpath, freepath;
	1077	int error;
	1078
	1079	KKASSERT(p);
	1080	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1081	return (error);
	1082	mp = ((struct vnode *)fp->f_data)->v_mount;
	1083	if (mp == NULL) {
	1084	error = EBADF;
	1085	goto done;
	1086	}
	1087	if (fp->f_cred == NULL) {
	1088	error = EINVAL;
	1089	goto done;
	1090	}
	1091	sp = &mp->mnt_stat;
	1092	if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
	1093	goto done;
	1094
	1095	if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
	1096	goto done;
	1097	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1098	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1099	kfree(freepath, M_TEMP);
	1100
	1101	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1102	bcopy(sp, buf, sizeof(*buf));
	1103
	1104	/* Only root should have access to the fsid's. */
	1105	if (suser(td))
	1106	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1107	error = 0;
	1108	done:
	1109	fdrop(fp);
	1110	return (error);
	1111	}
	1112
	1113	/*
	1114	* fstatfs_args(int fd, struct statfs *buf)
	1115	*
	1116	* Get filesystem statistics.
	1117	*/
	1118	int
	1119	sys_fstatfs(struct fstatfs_args *uap)
	1120	{
	1121	struct statfs buf;
	1122	int error;
	1123
	1124	error = kern_fstatfs(uap->fd, &buf);
	1125
	1126	if (error == 0)
	1127	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1128	return (error);
	1129	}
	1130
	1131	/*
	1132	* getfsstat_args(struct statfs *buf, long bufsize, int flags)
	1133	*
	1134	* Get statistics on all filesystems.
	1135	*/
	1136
	1137	struct getfsstat_info {
	1138	struct statfs *sfsp;
	1139	long count;
	1140	long maxcount;
	1141	int error;
	1142	int flags;
	1143	struct proc *p;
	1144	};
	1145
	1146	static int getfsstat_callback(struct mount , void );
	1147
	1148	/* ARGSUSED */
	1149	int
	1150	sys_getfsstat(struct getfsstat_args *uap)
	1151	{
	1152	struct thread *td = curthread;
	1153	struct proc *p = td->td_proc;
	1154	struct getfsstat_info info;
	1155
	1156	bzero(&info, sizeof(info));
	1157
	1158	info.maxcount = uap->bufsize / sizeof(struct statfs);
	1159	info.sfsp = uap->buf;
	1160	info.count = 0;
	1161	info.flags = uap->flags;
	1162	info.p = p;
	1163
	1164	mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
	1165	if (info.sfsp && info.count > info.maxcount)
	1166	uap->sysmsg_result = info.maxcount;
	1167	else
	1168	uap->sysmsg_result = info.count;
	1169	return (info.error);
	1170	}
	1171
	1172	static int
	1173	getfsstat_callback(struct mount mp, void data)
	1174	{
	1175	struct getfsstat_info *info = data;
	1176	struct statfs *sp;
	1177	char *freepath;
	1178	char *fullpath;
	1179	int error;
	1180
	1181	if (info->sfsp && info->count < info->maxcount) {
	1182	if (info->p && !chroot_visible_mnt(mp, info->p))
	1183	return(0);
	1184	sp = &mp->mnt_stat;
	1185
	1186	/*
	1187	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1188	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1189	* overrides MNT_WAIT.
	1190	*/
	1191	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1192	(info->flags & MNT_WAIT)) &&
	1193	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1194	return(0);
	1195	}
	1196	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1197
	1198	error = mount_path(info->p, mp, &fullpath, &freepath);
	1199	if (error) {
	1200	info->error = error;
	1201	return(-1);
	1202	}
	1203	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1204	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1205	kfree(freepath, M_TEMP);
	1206
	1207	error = copyout(sp, info->sfsp, sizeof(*sp));
	1208	if (error) {
	1209	info->error = error;
	1210	return (-1);
	1211	}
	1212	++info->sfsp;
	1213	}
	1214	info->count++;
	1215	return(0);
	1216	}
	1217
	1218	/*
	1219	* fchdir_args(int fd)
	1220	*
	1221	* Change current working directory to a given file descriptor.
	1222	*/
	1223	/* ARGSUSED */
	1224	int
	1225	sys_fchdir(struct fchdir_args *uap)
	1226	{
	1227	struct thread *td = curthread;
	1228	struct proc *p = td->td_proc;
	1229	struct filedesc *fdp = p->p_fd;
	1230	struct vnode vp, ovp;
	1231	struct mount *mp;
	1232	struct file *fp;
	1233	struct nchandle nch, onch, tnch;
	1234	int error;
	1235
	1236	if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
	1237	return (error);
	1238	vp = (struct vnode *)fp->f_data;
	1239	vref(vp);
	1240	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	1241	if (vp->v_type != VDIR \|\| fp->f_nchandle.ncp == NULL)
	1242	error = ENOTDIR;
	1243	else
	1244	error = VOP_ACCESS(vp, VEXEC, p->p_ucred);
	1245	if (error) {
	1246	vput(vp);
	1247	fdrop(fp);
	1248	return (error);
	1249	}
	1250	cache_copy(&fp->f_nchandle, &nch);
	1251
	1252	/*
	1253	* If the ncp has become a mount point, traverse through
	1254	* the mount point.
	1255	*/
	1256
	1257	while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
	1258	(mp = cache_findmount(&nch)) != NULL
	1259	) {
	1260	error = nlookup_mp(mp, &tnch);
	1261	if (error == 0) {
	1262	cache_unlock(&tnch); /* leave ref intact */
	1263	vput(vp);
	1264	vp = tnch.ncp->nc_vp;
	1265	error = vget(vp, LK_SHARED);
	1266	KKASSERT(error == 0);
	1267	cache_drop(&nch);
	1268	nch = tnch;
	1269	}
	1270	}
	1271	if (error == 0) {
	1272	ovp = fdp->fd_cdir;
	1273	onch = fdp->fd_ncdir;
	1274	vn_unlock(vp); /* leave ref intact */
	1275	fdp->fd_cdir = vp;
	1276	fdp->fd_ncdir = nch;
	1277	cache_drop(&onch);
	1278	vrele(ovp);
	1279	} else {
	1280	cache_drop(&nch);
	1281	vput(vp);
	1282	}
	1283	fdrop(fp);
	1284	return (error);
	1285	}
	1286
	1287	int
	1288	kern_chdir(struct nlookupdata *nd)
	1289	{
	1290	struct thread *td = curthread;
	1291	struct proc *p = td->td_proc;
	1292	struct filedesc *fdp = p->p_fd;
	1293	struct vnode vp, ovp;
	1294	struct nchandle onch;
	1295	int error;
	1296
	1297	if ((error = nlookup(nd)) != 0)
	1298	return (error);
	1299	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	1300	return (ENOENT);
	1301	if ((error = vget(vp, LK_SHARED)) != 0)
	1302	return (error);
	1303
	1304	error = checkvp_chdir(vp, td);
	1305	vn_unlock(vp);
	1306	if (error == 0) {
	1307	ovp = fdp->fd_cdir;
	1308	onch = fdp->fd_ncdir;
	1309	cache_unlock(&nd->nl_nch); /* leave reference intact */
	1310	fdp->fd_ncdir = nd->nl_nch;
	1311	fdp->fd_cdir = vp;
	1312	cache_drop(&onch);
	1313	vrele(ovp);
	1314	cache_zero(&nd->nl_nch);
	1315	} else {
	1316	vrele(vp);
	1317	}
	1318	return (error);
	1319	}
	1320
	1321	/*
	1322	* chdir_args(char *path)
	1323	*
	1324	* Change current working directory (``.'').
	1325	*/
	1326	int
	1327	sys_chdir(struct chdir_args *uap)
	1328	{
	1329	struct nlookupdata nd;
	1330	int error;
	1331
	1332	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1333	if (error == 0)
	1334	error = kern_chdir(&nd);
	1335	nlookup_done(&nd);
	1336	return (error);
	1337	}
	1338
	1339	/*
	1340	* Helper function for raised chroot(2) security function: Refuse if
	1341	* any filedescriptors are open directories.
	1342	*/
	1343	static int
	1344	chroot_refuse_vdir_fds(struct filedesc *fdp)
	1345	{
	1346	struct vnode *vp;
	1347	struct file *fp;
	1348	int error;
	1349	int fd;
	1350
	1351	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
	1352	if ((error = holdvnode(fdp, fd, &fp)) != 0)
	1353	continue;
	1354	vp = (struct vnode *)fp->f_data;
	1355	if (vp->v_type != VDIR) {
	1356	fdrop(fp);
	1357	continue;
	1358	}
	1359	fdrop(fp);
	1360	return(EPERM);
	1361	}
	1362	return (0);
	1363	}
	1364
	1365	/*
	1366	* This sysctl determines if we will allow a process to chroot(2) if it
	1367	* has a directory open:
	1368	* 0: disallowed for all processes.
	1369	* 1: allowed for processes that were not already chroot(2)'ed.
	1370	* 2: allowed for all processes.
	1371	*/
	1372
	1373	static int chroot_allow_open_directories = 1;
	1374
	1375	SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
	1376	&chroot_allow_open_directories, 0, "");
	1377
	1378	/*
	1379	* chroot to the specified namecache entry. We obtain the vp from the
	1380	* namecache data. The passed ncp must be locked and referenced and will
	1381	* remain locked and referenced on return.
	1382	*/
	1383	int
	1384	kern_chroot(struct nchandle *nch)
	1385	{
	1386	struct thread *td = curthread;
	1387	struct proc *p = td->td_proc;
	1388	struct filedesc *fdp = p->p_fd;
	1389	struct vnode *vp;
	1390	int error;
	1391
	1392	/*
	1393	* Only root can chroot
	1394	*/
	1395	if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)
	1396	return (error);
	1397
	1398	/*
	1399	* Disallow open directory descriptors (fchdir() breakouts).
	1400	*/
	1401	if (chroot_allow_open_directories == 0 \|\|
	1402	(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
	1403	if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
	1404	return (error);
	1405	}
	1406	if ((vp = nch->ncp->nc_vp) == NULL)
	1407	return (ENOENT);
	1408
	1409	if ((error = vget(vp, LK_SHARED)) != 0)
	1410	return (error);
	1411
	1412	/*
	1413	* Check the validity of vp as a directory to change to and
	1414	* associate it with rdir/jdir.
	1415	*/
	1416	error = checkvp_chdir(vp, td);
	1417	vn_unlock(vp); /* leave reference intact */
	1418	if (error == 0) {
	1419	vrele(fdp->fd_rdir);
	1420	fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
	1421	cache_drop(&fdp->fd_nrdir);
	1422	cache_copy(nch, &fdp->fd_nrdir);
	1423	if (fdp->fd_jdir == NULL) {
	1424	fdp->fd_jdir = vp;
	1425	vref(fdp->fd_jdir);
	1426	cache_copy(nch, &fdp->fd_njdir);
	1427	}
	1428	} else {
	1429	vrele(vp);
	1430	}
	1431	return (error);
	1432	}
	1433
	1434	/*
	1435	* chroot_args(char *path)
	1436	*
	1437	* Change notion of root (``/'') directory.
	1438	*/
	1439	/* ARGSUSED */
	1440	int
	1441	sys_chroot(struct chroot_args *uap)
	1442	{
	1443	struct thread *td = curthread;
	1444	struct nlookupdata nd;
	1445	int error;
	1446
	1447	KKASSERT(td->td_proc);
	1448	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1449	if (error) {
	1450	nlookup_done(&nd);
	1451	return(error);
	1452	}
	1453	error = nlookup(&nd);
	1454	if (error == 0)
	1455	error = kern_chroot(&nd.nl_nch);
	1456	nlookup_done(&nd);
	1457	return(error);
	1458	}
	1459
	1460	/*
	1461	* Common routine for chroot and chdir. Given a locked, referenced vnode,
	1462	* determine whether it is legal to chdir to the vnode. The vnode's state
	1463	* is not changed by this call.
	1464	*/
	1465	int
	1466	checkvp_chdir(struct vnode vp, struct thread td)
	1467	{
	1468	int error;
	1469
	1470	if (vp->v_type != VDIR)
	1471	error = ENOTDIR;
	1472	else
	1473	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred);
	1474	return (error);
	1475	}
	1476
	1477	int
	1478	kern_open(struct nlookupdata nd, int oflags, int mode, int res)
	1479	{
	1480	struct thread *td = curthread;
	1481	struct proc *p = td->td_proc;
	1482	struct lwp *lp = td->td_lwp;
	1483	struct filedesc *fdp = p->p_fd;
	1484	int cmode, flags;
	1485	struct file *nfp;
	1486	struct file *fp;
	1487	struct vnode *vp;
	1488	int type, indx, error;
	1489	struct flock lf;
	1490
	1491	if ((oflags & O_ACCMODE) == O_ACCMODE)
	1492	return (EINVAL);
	1493	flags = FFLAGS(oflags);
	1494	error = falloc(p, &nfp, NULL);
	1495	if (error)
	1496	return (error);
	1497	fp = nfp;
	1498	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
	1499
	1500	/*
	1501	* XXX p_dupfd is a real mess. It allows a device to return a
	1502	* file descriptor to be duplicated rather then doing the open
	1503	* itself.
	1504	*/
	1505	lp->lwp_dupfd = -1;
	1506
	1507	/*
	1508	* Call vn_open() to do the lookup and assign the vnode to the
	1509	* file pointer. vn_open() does not change the ref count on fp
	1510	* and the vnode, on success, will be inherited by the file pointer
	1511	* and unlocked.
	1512	*/
	1513	nd->nl_flags \|= NLC_LOCKVP;
	1514	error = vn_open(nd, fp, flags, cmode);
	1515	nlookup_done(nd);
	1516	if (error) {
	1517	/*
	1518	* handle special fdopen() case. bleh. dupfdopen() is
	1519	* responsible for dropping the old contents of ofiles[indx]
	1520	* if it succeeds.
	1521	*
	1522	* Note that fsetfd() will add a ref to fp which represents
	1523	* the fd_files[] assignment. We must still drop our
	1524	* reference.
	1525	*/
	1526	if ((error == ENODEV \|\| error == ENXIO) && lp->lwp_dupfd >= 0) {
	1527	if (fdalloc(p, 0, &indx) == 0) {
	1528	error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error);
	1529	if (error == 0) {
	1530	*res = indx;
	1531	fdrop(fp); /* our ref */
	1532	return (0);
	1533	}
	1534	fsetfd(p, NULL, indx);
	1535	}
	1536	}
	1537	fdrop(fp); /* our ref */
	1538	if (error == ERESTART)
	1539	error = EINTR;
	1540	return (error);
	1541	}
	1542
	1543	/*
	1544	* ref the vnode for ourselves so it can't be ripped out from under
	1545	* is. XXX need an ND flag to request that the vnode be returned
	1546	* anyway.
	1547	*
	1548	* Reserve a file descriptor but do not assign it until the open
	1549	* succeeds.
	1550	*/
	1551	vp = (struct vnode *)fp->f_data;
	1552	vref(vp);
	1553	if ((error = fdalloc(p, 0, &indx)) != 0) {
	1554	fdrop(fp);
	1555	vrele(vp);
	1556	return (error);
	1557	}
	1558
	1559	/*
	1560	* If no error occurs the vp will have been assigned to the file
	1561	* pointer.
	1562	*/
	1563	lp->lwp_dupfd = 0;
	1564
	1565	if (flags & (O_EXLOCK \| O_SHLOCK)) {
	1566	lf.l_whence = SEEK_SET;
	1567	lf.l_start = 0;
	1568	lf.l_len = 0;
	1569	if (flags & O_EXLOCK)
	1570	lf.l_type = F_WRLCK;
	1571	else
	1572	lf.l_type = F_RDLCK;
	1573	if (flags & FNONBLOCK)
	1574	type = 0;
	1575	else
	1576	type = F_WAIT;
	1577
	1578	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	1579	/*
	1580	* lock request failed. Clean up the reserved
	1581	* descriptor.
	1582	*/
	1583	vrele(vp);
	1584	fsetfd(p, NULL, indx);
	1585	fdrop(fp);
	1586	return (error);
	1587	}
	1588	fp->f_flag \|= FHASLOCK;
	1589	}
	1590	#if 0
	1591	/*
	1592	* Assert that all regular file vnodes were created with a object.
	1593	*/
	1594	KASSERT(vp->v_type != VREG \|\| vp->v_object != NULL,
	1595	("open: regular file has no backing object after vn_open"));
	1596	#endif
	1597
	1598	vrele(vp);
	1599
	1600	/*
	1601	* release our private reference, leaving the one associated with the
	1602	* descriptor table intact.
	1603	*/
	1604	fsetfd(p, fp, indx);
	1605	fdrop(fp);
	1606	*res = indx;
	1607	return (0);
	1608	}
	1609
	1610	/*
	1611	* open_args(char *path, int flags, int mode)
	1612	*
	1613	* Check permissions, allocate an open file structure,
	1614	* and call the device open routine if any.
	1615	*/
	1616	int
	1617	sys_open(struct open_args *uap)
	1618	{
	1619	struct nlookupdata nd;
	1620	int error;
	1621
	1622	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1623	if (error == 0) {
	1624	error = kern_open(&nd, uap->flags,
	1625	uap->mode, &uap->sysmsg_result);
	1626	}
	1627	nlookup_done(&nd);
	1628	return (error);
	1629	}
	1630
	1631	int
	1632	kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
	1633	{
	1634	struct thread *td = curthread;
	1635	struct proc *p = td->td_proc;
	1636	struct vnode *vp;
	1637	struct vattr vattr;
	1638	int error;
	1639	int whiteout = 0;
	1640
	1641	KKASSERT(p);
	1642
	1643	switch (mode & S_IFMT) {
	1644	case S_IFCHR:
	1645	case S_IFBLK:
	1646	error = suser(td);
	1647	break;
	1648	default:
	1649	error = suser_cred(p->p_ucred, PRISON_ROOT);
	1650	break;
	1651	}
	1652	if (error)
	1653	return (error);
	1654
	1655	bwillwrite();
	1656	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1657	if ((error = nlookup(nd)) != 0)
	1658	return (error);
	1659	if (nd->nl_nch.ncp->nc_vp)
	1660	return (EEXIST);
	1661	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1662	return (error);
	1663
	1664	VATTR_NULL(&vattr);
	1665	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1666	vattr.va_rmajor = rmajor;
	1667	vattr.va_rminor = rminor;
	1668	whiteout = 0;
	1669
	1670	switch (mode & S_IFMT) {
	1671	case S_IFMT: /* used by badsect to flag bad sectors */
	1672	vattr.va_type = VBAD;
	1673	break;
	1674	case S_IFCHR:
	1675	vattr.va_type = VCHR;
	1676	break;
	1677	case S_IFBLK:
	1678	vattr.va_type = VBLK;
	1679	break;
	1680	case S_IFWHT:
	1681	whiteout = 1;
	1682	break;
	1683	default:
	1684	error = EINVAL;
	1685	break;
	1686	}
	1687	if (error == 0) {
	1688	if (whiteout) {
	1689	error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
	1690	nd->nl_cred, NAMEI_CREATE);
	1691	} else {
	1692	vp = NULL;
	1693	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
	1694	&vp, nd->nl_cred, &vattr);
	1695	if (error == 0)
	1696	vput(vp);
	1697	}
	1698	}
	1699	return (error);
	1700	}
	1701
	1702	/*
	1703	* mknod_args(char *path, int mode, int dev)
	1704	*
	1705	* Create a special file.
	1706	*/
	1707	int
	1708	sys_mknod(struct mknod_args *uap)
	1709	{
	1710	struct nlookupdata nd;
	1711	int error;
	1712
	1713	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1714	if (error == 0) {
	1715	error = kern_mknod(&nd, uap->mode,
	1716	umajor(uap->dev), uminor(uap->dev));
	1717	}
	1718	nlookup_done(&nd);
	1719	return (error);
	1720	}
	1721
	1722	int
	1723	kern_mkfifo(struct nlookupdata *nd, int mode)
	1724	{
	1725	struct thread *td = curthread;
	1726	struct proc *p = td->td_proc;
	1727	struct vattr vattr;
	1728	struct vnode *vp;
	1729	int error;
	1730
	1731	bwillwrite();
	1732
	1733	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1734	if ((error = nlookup(nd)) != 0)
	1735	return (error);
	1736	if (nd->nl_nch.ncp->nc_vp)
	1737	return (EEXIST);
	1738	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1739	return (error);
	1740
	1741	VATTR_NULL(&vattr);
	1742	vattr.va_type = VFIFO;
	1743	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1744	vp = NULL;
	1745	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
	1746	if (error == 0)
	1747	vput(vp);
	1748	return (error);
	1749	}
	1750
	1751	/*
	1752	* mkfifo_args(char *path, int mode)
	1753	*
	1754	* Create a named pipe.
	1755	*/
	1756	int
	1757	sys_mkfifo(struct mkfifo_args *uap)
	1758	{
	1759	struct nlookupdata nd;
	1760	int error;
	1761
	1762	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1763	if (error == 0)
	1764	error = kern_mkfifo(&nd, uap->mode);
	1765	nlookup_done(&nd);
	1766	return (error);
	1767	}
	1768
	1769	static int hardlink_check_uid = 0;
	1770	SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
	1771	&hardlink_check_uid, 0,
	1772	"Unprivileged processes cannot create hard links to files owned by other "
	1773	"users");
	1774	static int hardlink_check_gid = 0;
	1775	SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
	1776	&hardlink_check_gid, 0,
	1777	"Unprivileged processes cannot create hard links to files owned by other "
	1778	"groups");
	1779
	1780	static int
	1781	can_hardlink(struct vnode vp, struct thread td, struct ucred *cred)
	1782	{
	1783	struct vattr va;
	1784	int error;
	1785
	1786	/*
	1787	* Shortcut if disabled
	1788	*/
	1789	if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
	1790	return (0);
	1791
	1792	/*
	1793	* root cred can always hardlink
	1794	*/
	1795	if (suser_cred(cred, PRISON_ROOT) == 0)
	1796	return (0);
	1797
	1798	/*
	1799	* Otherwise only if the originating file is owned by the
	1800	* same user or group. Note that any group is allowed if
	1801	* the file is owned by the caller.
	1802	*/
	1803	error = VOP_GETATTR(vp, &va);
	1804	if (error != 0)
	1805	return (error);
	1806
	1807	if (hardlink_check_uid) {
	1808	if (cred->cr_uid != va.va_uid)
	1809	return (EPERM);
	1810	}
	1811
	1812	if (hardlink_check_gid) {
	1813	if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
	1814	return (EPERM);
	1815	}
	1816
	1817	return (0);
	1818	}
	1819
	1820	int
	1821	kern_link(struct nlookupdata nd, struct nlookupdata linknd)
	1822	{
	1823	struct thread *td = curthread;
	1824	struct vnode *vp;
	1825	int error;
	1826
	1827	/*
	1828	* Lookup the source and obtained a locked vnode.
	1829	*
	1830	* XXX relookup on vget failure / race ?
	1831	*/
	1832	bwillwrite();
	1833	if ((error = nlookup(nd)) != 0)
	1834	return (error);
	1835	vp = nd->nl_nch.ncp->nc_vp;
	1836	KKASSERT(vp != NULL);
	1837	if (vp->v_type == VDIR)
	1838	return (EPERM); /* POSIX */
	1839	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1840	return (error);
	1841	if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
	1842	return (error);
	1843
	1844	/*
	1845	* Unlock the source so we can lookup the target without deadlocking
	1846	* (XXX vp is locked already, possible other deadlock?). The target
	1847	* must not exist.
	1848	*/
	1849	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
	1850	nd->nl_flags &= ~NLC_NCPISLOCKED;
	1851	cache_unlock(&nd->nl_nch);
	1852
	1853	linknd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1854	if ((error = nlookup(linknd)) != 0) {
	1855	vput(vp);
	1856	return (error);
	1857	}
	1858	if (linknd->nl_nch.ncp->nc_vp) {
	1859	vput(vp);
	1860	return (EEXIST);
	1861	}
	1862
	1863	/*
	1864	* Finally run the new API VOP.
	1865	*/
	1866	error = can_hardlink(vp, td, td->td_proc->p_ucred);
	1867	if (error == 0) {
	1868	error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
	1869	vp, linknd->nl_cred);
	1870	}
	1871	vput(vp);
	1872	return (error);
	1873	}
	1874
	1875	/*
	1876	* link_args(char path, char link)
	1877	*
	1878	* Make a hard file link.
	1879	*/
	1880	int
	1881	sys_link(struct link_args *uap)
	1882	{
	1883	struct nlookupdata nd, linknd;
	1884	int error;
	1885
	1886	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1887	if (error == 0) {
	1888	error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
	1889	if (error == 0)
	1890	error = kern_link(&nd, &linknd);
	1891	nlookup_done(&linknd);
	1892	}
	1893	nlookup_done(&nd);
	1894	return (error);
	1895	}
	1896
	1897	int
	1898	kern_symlink(struct nlookupdata nd, char path, int mode)
	1899	{
	1900	struct vattr vattr;
	1901	struct vnode *vp;
	1902	struct vnode *dvp;
	1903	int error;
	1904
	1905	bwillwrite();
	1906	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1907	if ((error = nlookup(nd)) != 0)
	1908	return (error);
	1909	if (nd->nl_nch.ncp->nc_vp)
	1910	return (EEXIST);
	1911	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1912	return (error);
	1913	dvp = nd->nl_dvp;
	1914	VATTR_NULL(&vattr);
	1915	vattr.va_mode = mode;
	1916	error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
	1917	if (error == 0)
	1918	vput(vp);
	1919	return (error);
	1920	}
	1921
	1922	/*
	1923	* symlink(char path, char link)
	1924	*
	1925	* Make a symbolic link.
	1926	*/
	1927	int
	1928	sys_symlink(struct symlink_args *uap)
	1929	{
	1930	struct thread *td = curthread;
	1931	struct nlookupdata nd;
	1932	char *path;
	1933	int error;
	1934	int mode;
	1935
	1936	path = objcache_get(namei_oc, M_WAITOK);
	1937	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	1938	if (error == 0) {
	1939	error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
	1940	if (error == 0) {
	1941	mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
	1942	error = kern_symlink(&nd, path, mode);
	1943	}
	1944	nlookup_done(&nd);
	1945	}
	1946	objcache_put(namei_oc, path);
	1947	return (error);
	1948	}
	1949
	1950	/*
	1951	* undelete_args(char *path)
	1952	*
	1953	* Delete a whiteout from the filesystem.
	1954	*/
	1955	/* ARGSUSED */
	1956	int
	1957	sys_undelete(struct undelete_args *uap)
	1958	{
	1959	struct nlookupdata nd;
	1960	int error;
	1961
	1962	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1963	bwillwrite();
	1964	nd.nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	1965	if (error == 0)
	1966	error = nlookup(&nd);
	1967	if (error == 0)
	1968	error = ncp_writechk(&nd.nl_nch);
	1969	if (error == 0) {
	1970	error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
	1971	NAMEI_DELETE);
	1972	}
	1973	nlookup_done(&nd);
	1974	return (error);
	1975	}
	1976
	1977	int
	1978	kern_unlink(struct nlookupdata *nd)
	1979	{
	1980	int error;
	1981
	1982	bwillwrite();
	1983	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	1984	if ((error = nlookup(nd)) != 0)
	1985	return (error);
	1986	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1987	return (error);
	1988	error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	1989	return (error);
	1990	}
	1991
	1992	/*
	1993	* unlink_args(char *path)
	1994	*
	1995	* Delete a name from the filesystem.
	1996	*/
	1997	int
	1998	sys_unlink(struct unlink_args *uap)
	1999	{
	2000	struct nlookupdata nd;
	2001	int error;
	2002
	2003	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2004	if (error == 0)
	2005	error = kern_unlink(&nd);
	2006	nlookup_done(&nd);
	2007	return (error);
	2008	}
	2009
	2010	int
	2011	kern_lseek(int fd, off_t offset, int whence, off_t *res)
	2012	{
	2013	struct thread *td = curthread;
	2014	struct proc *p = td->td_proc;
	2015	struct file *fp;
	2016	struct vattr vattr;
	2017	int error;
	2018
	2019	fp = holdfp(p->p_fd, fd, -1);
	2020	if (fp == NULL)
	2021	return (EBADF);
	2022	if (fp->f_type != DTYPE_VNODE) {
	2023	error = ESPIPE;
	2024	goto done;
	2025	}
	2026
	2027	switch (whence) {
	2028	case L_INCR:
	2029	fp->f_offset += offset;
	2030	error = 0;
	2031	break;
	2032	case L_XTND:
	2033	error = VOP_GETATTR((struct vnode *)fp->f_data, &vattr);
	2034	if (error == 0)
	2035	fp->f_offset = offset + vattr.va_size;
	2036	break;
	2037	case L_SET:
	2038	fp->f_offset = offset;
	2039	error = 0;
	2040	break;
	2041	default:
	2042	error = EINVAL;
	2043	break;
	2044	}
	2045	*res = fp->f_offset;
	2046	done:
	2047	fdrop(fp);
	2048	return (error);
	2049	}
	2050
	2051	/*
	2052	* lseek_args(int fd, int pad, off_t offset, int whence)
	2053	*
	2054	* Reposition read/write file offset.
	2055	*/
	2056	int
	2057	sys_lseek(struct lseek_args *uap)
	2058	{
	2059	int error;
	2060
	2061	error = kern_lseek(uap->fd, uap->offset, uap->whence,
	2062	&uap->sysmsg_offset);
	2063
	2064	return (error);
	2065	}
	2066
	2067	int
	2068	kern_access(struct nlookupdata *nd, int aflags)
	2069	{
	2070	struct vnode *vp;
	2071	int error, flags;
	2072
	2073	if ((error = nlookup(nd)) != 0)
	2074	return (error);
	2075	retry:
	2076	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2077	if (error)
	2078	return (error);
	2079
	2080	/* Flags == 0 means only check for existence. */
	2081	if (aflags) {
	2082	flags = 0;
	2083	if (aflags & R_OK)
	2084	flags \|= VREAD;
	2085	if (aflags & W_OK)
	2086	flags \|= VWRITE;
	2087	if (aflags & X_OK)
	2088	flags \|= VEXEC;
	2089	if ((flags & VWRITE) == 0 \|\|
	2090	(error = vn_writechk(vp, &nd->nl_nch)) == 0)
	2091	error = VOP_ACCESS(vp, flags, nd->nl_cred);
	2092
	2093	/*
	2094	* If the file handle is stale we have to re-resolve the
	2095	* entry. This is a hack at the moment.
	2096	*/
	2097	if (error == ESTALE) {
	2098	vput(vp);
	2099	cache_setunresolved(&nd->nl_nch);
	2100	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2101	if (error == 0) {
	2102	vp = NULL;
	2103	goto retry;
	2104	}
	2105	return(error);
	2106	}
	2107	}
	2108	vput(vp);
	2109	return (error);
	2110	}
	2111
	2112	/*
	2113	* access_args(char *path, int flags)
	2114	*
	2115	* Check access permissions.
	2116	*/
	2117	int
	2118	sys_access(struct access_args *uap)
	2119	{
	2120	struct nlookupdata nd;
	2121	int error;
	2122
	2123	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2124	if (error == 0)
	2125	error = kern_access(&nd, uap->flags);
	2126	nlookup_done(&nd);
	2127	return (error);
	2128	}
	2129
	2130	int
	2131	kern_stat(struct nlookupdata nd, struct stat st)
	2132	{
	2133	int error;
	2134	struct vnode *vp;
	2135	thread_t td;
	2136
	2137	if ((error = nlookup(nd)) != 0)
	2138	return (error);
	2139	again:
	2140	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	2141	return (ENOENT);
	2142
	2143	td = curthread;
	2144	if ((error = vget(vp, LK_SHARED)) != 0)
	2145	return (error);
	2146	error = vn_stat(vp, st, nd->nl_cred);
	2147
	2148	/*
	2149	* If the file handle is stale we have to re-resolve the entry. This
	2150	* is a hack at the moment.
	2151	*/
	2152	if (error == ESTALE) {
	2153	vput(vp);
	2154	cache_setunresolved(&nd->nl_nch);
	2155	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2156	if (error == 0)
	2157	goto again;
	2158	} else {
	2159	vput(vp);
	2160	}
	2161	return (error);
	2162	}
	2163
	2164	/*
	2165	* stat_args(char path, struct stat ub)
	2166	*
	2167	* Get file status; this version follows links.
	2168	*/
	2169	int
	2170	sys_stat(struct stat_args *uap)
	2171	{
	2172	struct nlookupdata nd;
	2173	struct stat st;
	2174	int error;
	2175
	2176	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2177	if (error == 0) {
	2178	error = kern_stat(&nd, &st);
	2179	if (error == 0)
	2180	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2181	}
	2182	nlookup_done(&nd);
	2183	return (error);
	2184	}
	2185
	2186	/*
	2187	* lstat_args(char path, struct stat ub)
	2188	*
	2189	* Get file status; this version does not follow links.
	2190	*/
	2191	int
	2192	sys_lstat(struct lstat_args *uap)
	2193	{
	2194	struct nlookupdata nd;
	2195	struct stat st;
	2196	int error;
	2197
	2198	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2199	if (error == 0) {
	2200	error = kern_stat(&nd, &st);
	2201	if (error == 0)
	2202	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2203	}
	2204	nlookup_done(&nd);
	2205	return (error);
	2206	}
	2207
	2208	/*
	2209	* pathconf_Args(char *path, int name)
	2210	*
	2211	* Get configurable pathname variables.
	2212	*/
	2213	/* ARGSUSED */
	2214	int
	2215	sys_pathconf(struct pathconf_args *uap)
	2216	{
	2217	struct nlookupdata nd;
	2218	struct vnode *vp;
	2219	int error;
	2220
	2221	vp = NULL;
	2222	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2223	if (error == 0)
	2224	error = nlookup(&nd);
	2225	if (error == 0)
	2226	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	2227	nlookup_done(&nd);
	2228	if (error == 0) {
	2229	error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
	2230	vput(vp);
	2231	}
	2232	return (error);
	2233	}
	2234
	2235	/*
	2236	* XXX: daver
	2237	* kern_readlink isn't properly split yet. There is a copyin burried
	2238	* in VOP_READLINK().
	2239	*/
	2240	int
	2241	kern_readlink(struct nlookupdata nd, char buf, int count, int *res)
	2242	{
	2243	struct thread *td = curthread;
	2244	struct proc *p = td->td_proc;
	2245	struct vnode *vp;
	2246	struct iovec aiov;
	2247	struct uio auio;
	2248	int error;
	2249
	2250	if ((error = nlookup(nd)) != 0)
	2251	return (error);
	2252	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2253	if (error)
	2254	return (error);
	2255	if (vp->v_type != VLNK) {
	2256	error = EINVAL;
	2257	} else {
	2258	aiov.iov_base = buf;
	2259	aiov.iov_len = count;
	2260	auio.uio_iov = &aiov;
	2261	auio.uio_iovcnt = 1;
	2262	auio.uio_offset = 0;
	2263	auio.uio_rw = UIO_READ;
	2264	auio.uio_segflg = UIO_USERSPACE;
	2265	auio.uio_td = td;
	2266	auio.uio_resid = count;
	2267	error = VOP_READLINK(vp, &auio, p->p_ucred);
	2268	}
	2269	vput(vp);
	2270	*res = count - auio.uio_resid;
	2271	return (error);
	2272	}
	2273
	2274	/*
	2275	* readlink_args(char path, char buf, int count)
	2276	*
	2277	* Return target name of a symbolic link.
	2278	*/
	2279	int
	2280	sys_readlink(struct readlink_args *uap)
	2281	{
	2282	struct nlookupdata nd;
	2283	int error;
	2284
	2285	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2286	if (error == 0) {
	2287	error = kern_readlink(&nd, uap->buf, uap->count,
	2288	&uap->sysmsg_result);
	2289	}
	2290	nlookup_done(&nd);
	2291	return (error);
	2292	}
	2293
	2294	static int
	2295	setfflags(struct vnode *vp, int flags)
	2296	{
	2297	struct thread *td = curthread;
	2298	struct proc *p = td->td_proc;
	2299	int error;
	2300	struct vattr vattr;
	2301
	2302	/*
	2303	* Prevent non-root users from setting flags on devices. When
	2304	* a device is reused, users can retain ownership of the device
	2305	* if they are allowed to set flags and programs assume that
	2306	* chown can't fail when done as root.
	2307	*/
	2308	if ((vp->v_type == VCHR \|\| vp->v_type == VBLK) &&
	2309	((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0))
	2310	return (error);
	2311
	2312	/*
	2313	* note: vget is required for any operation that might mod the vnode
	2314	* so VINACTIVE is properly cleared.
	2315	*/
	2316	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2317	VATTR_NULL(&vattr);
	2318	vattr.va_flags = flags;
	2319	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2320	vput(vp);
	2321	}
	2322	return (error);
	2323	}
	2324
	2325	/*
	2326	* chflags(char *path, int flags)
	2327	*
	2328	* Change flags of a file given a path name.
	2329	*/
	2330	/* ARGSUSED */
	2331	int
	2332	sys_chflags(struct chflags_args *uap)
	2333	{
	2334	struct nlookupdata nd;
	2335	struct vnode *vp;
	2336	int error;
	2337
	2338	vp = NULL;
	2339	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2340	/* XXX Add NLC flag indicating modifying operation? */
	2341	if (error == 0)
	2342	error = nlookup(&nd);
	2343	if (error == 0)
	2344	error = ncp_writechk(&nd.nl_nch);
	2345	if (error == 0)
	2346	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2347	nlookup_done(&nd);
	2348	if (error == 0) {
	2349	error = setfflags(vp, uap->flags);
	2350	vrele(vp);
	2351	}
	2352	return (error);
	2353	}
	2354
	2355	/*
	2356	* fchflags_args(int fd, int flags)
	2357	*
	2358	* Change flags of a file given a file descriptor.
	2359	*/
	2360	/* ARGSUSED */
	2361	int
	2362	sys_fchflags(struct fchflags_args *uap)
	2363	{
	2364	struct thread *td = curthread;
	2365	struct proc *p = td->td_proc;
	2366	struct file *fp;
	2367	int error;
	2368
	2369	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2370	return (error);
	2371	if (fp->f_nchandle.ncp)
	2372	error = ncp_writechk(&fp->f_nchandle);
	2373	if (error == 0)
	2374	error = setfflags((struct vnode *) fp->f_data, uap->flags);
	2375	fdrop(fp);
	2376	return (error);
	2377	}
	2378
	2379	static int
	2380	setfmode(struct vnode *vp, int mode)
	2381	{
	2382	struct thread *td = curthread;
	2383	struct proc *p = td->td_proc;
	2384	int error;
	2385	struct vattr vattr;
	2386
	2387	/*
	2388	* note: vget is required for any operation that might mod the vnode
	2389	* so VINACTIVE is properly cleared.
	2390	*/
	2391	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2392	VATTR_NULL(&vattr);
	2393	vattr.va_mode = mode & ALLPERMS;
	2394	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2395	vput(vp);
	2396	}
	2397	return error;
	2398	}
	2399
	2400	int
	2401	kern_chmod(struct nlookupdata *nd, int mode)
	2402	{
	2403	struct vnode *vp;
	2404	int error;
	2405
	2406	/* XXX Add NLC flag indicating modifying operation? */
	2407	if ((error = nlookup(nd)) != 0)
	2408	return (error);
	2409	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2410	return (error);
	2411	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2412	error = setfmode(vp, mode);
	2413	vrele(vp);
	2414	return (error);
	2415	}
	2416
	2417	/*
	2418	* chmod_args(char *path, int mode)
	2419	*
	2420	* Change mode of a file given path name.
	2421	*/
	2422	/* ARGSUSED */
	2423	int
	2424	sys_chmod(struct chmod_args *uap)
	2425	{
	2426	struct nlookupdata nd;
	2427	int error;
	2428
	2429	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2430	if (error == 0)
	2431	error = kern_chmod(&nd, uap->mode);
	2432	nlookup_done(&nd);
	2433	return (error);
	2434	}
	2435
	2436	/*
	2437	* lchmod_args(char *path, int mode)
	2438	*
	2439	* Change mode of a file given path name (don't follow links.)
	2440	*/
	2441	/* ARGSUSED */
	2442	int
	2443	sys_lchmod(struct lchmod_args *uap)
	2444	{
	2445	struct nlookupdata nd;
	2446	int error;
	2447
	2448	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2449	if (error == 0)
	2450	error = kern_chmod(&nd, uap->mode);
	2451	nlookup_done(&nd);
	2452	return (error);
	2453	}
	2454
	2455	/*
	2456	* fchmod_args(int fd, int mode)
	2457	*
	2458	* Change mode of a file given a file descriptor.
	2459	*/
	2460	/* ARGSUSED */
	2461	int
	2462	sys_fchmod(struct fchmod_args *uap)
	2463	{
	2464	struct thread *td = curthread;
	2465	struct proc *p = td->td_proc;
	2466	struct file *fp;
	2467	int error;
	2468
	2469	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2470	return (error);
	2471	if (fp->f_nchandle.ncp)
	2472	error = ncp_writechk(&fp->f_nchandle);
	2473	if (error == 0)
	2474	error = setfmode((struct vnode *)fp->f_data, uap->mode);
	2475	fdrop(fp);
	2476	return (error);
	2477	}
	2478
	2479	static int
	2480	setfown(struct vnode *vp, uid_t uid, gid_t gid)
	2481	{
	2482	struct thread *td = curthread;
	2483	struct proc *p = td->td_proc;
	2484	int error;
	2485	struct vattr vattr;
	2486
	2487	/*
	2488	* note: vget is required for any operation that might mod the vnode
	2489	* so VINACTIVE is properly cleared.
	2490	*/
	2491	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2492	VATTR_NULL(&vattr);
	2493	vattr.va_uid = uid;
	2494	vattr.va_gid = gid;
	2495	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2496	vput(vp);
	2497	}
	2498	return error;
	2499	}
	2500
	2501	int
	2502	kern_chown(struct nlookupdata *nd, int uid, int gid)
	2503	{
	2504	struct vnode *vp;
	2505	int error;
	2506
	2507	/* XXX Add NLC flag indicating modifying operation? */
	2508	if ((error = nlookup(nd)) != 0)
	2509	return (error);
	2510	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2511	return (error);
	2512	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2513	error = setfown(vp, uid, gid);
	2514	vrele(vp);
	2515	return (error);
	2516	}
	2517
	2518	/*
	2519	* chown(char *path, int uid, int gid)
	2520	*
	2521	* Set ownership given a path name.
	2522	*/
	2523	int
	2524	sys_chown(struct chown_args *uap)
	2525	{
	2526	struct nlookupdata nd;
	2527	int error;
	2528
	2529	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2530	if (error == 0)
	2531	error = kern_chown(&nd, uap->uid, uap->gid);
	2532	nlookup_done(&nd);
	2533	return (error);
	2534	}
	2535
	2536	/*
	2537	* lchown_args(char *path, int uid, int gid)
	2538	*
	2539	* Set ownership given a path name, do not cross symlinks.
	2540	*/
	2541	int
	2542	sys_lchown(struct lchown_args *uap)
	2543	{
	2544	struct nlookupdata nd;
	2545	int error;
	2546
	2547	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2548	if (error == 0)
	2549	error = kern_chown(&nd, uap->uid, uap->gid);
	2550	nlookup_done(&nd);
	2551	return (error);
	2552	}
	2553
	2554	/*
	2555	* fchown_args(int fd, int uid, int gid)
	2556	*
	2557	* Set ownership given a file descriptor.
	2558	*/
	2559	/* ARGSUSED */
	2560	int
	2561	sys_fchown(struct fchown_args *uap)
	2562	{
	2563	struct thread *td = curthread;
	2564	struct proc *p = td->td_proc;
	2565	struct file *fp;
	2566	int error;
	2567
	2568	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2569	return (error);
	2570	if (fp->f_nchandle.ncp)
	2571	error = ncp_writechk(&fp->f_nchandle);
	2572	if (error == 0)
	2573	error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid);
	2574	fdrop(fp);
	2575	return (error);
	2576	}
	2577
	2578	static int
	2579	getutimes(const struct timeval tvp, struct timespec tsp)
	2580	{
	2581	struct timeval tv[2];
	2582
	2583	if (tvp == NULL) {
	2584	microtime(&tv[0]);
	2585	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	2586	tsp[1] = tsp[0];
	2587	} else {
	2588	TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
	2589	TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
	2590	}
	2591	return 0;
	2592	}
	2593
	2594	static int
	2595	setutimes(struct vnode vp, const struct timespec ts, int nullflag)
	2596	{
	2597	struct thread *td = curthread;
	2598	struct proc *p = td->td_proc;
	2599	int error;
	2600	struct vattr vattr;
	2601
	2602	/*
	2603	* note: vget is required for any operation that might mod the vnode
	2604	* so VINACTIVE is properly cleared.
	2605	*/
	2606	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2607	VATTR_NULL(&vattr);
	2608	vattr.va_atime = ts[0];
	2609	vattr.va_mtime = ts[1];
	2610	if (nullflag)
	2611	vattr.va_vaflags \|= VA_UTIMES_NULL;
	2612	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2613	vput(vp);
	2614	}
	2615	return error;
	2616	}
	2617
	2618	int
	2619	kern_utimes(struct nlookupdata nd, struct timeval tptr)
	2620	{
	2621	struct timespec ts[2];
	2622	struct vnode *vp;
	2623	int error;
	2624
	2625	if ((error = getutimes(tptr, ts)) != 0)
	2626	return (error);
	2627	/* XXX Add NLC flag indicating modifying operation? */
	2628	if ((error = nlookup(nd)) != 0)
	2629	return (error);
	2630	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2631	return (error);
	2632	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2633	return (error);
	2634	error = setutimes(vp, ts, tptr == NULL);
	2635	vrele(vp);
	2636	return (error);
	2637	}
	2638
	2639	/*
	2640	* utimes_args(char path, struct timeval tptr)
	2641	*
	2642	* Set the access and modification times of a file.
	2643	*/
	2644	int
	2645	sys_utimes(struct utimes_args *uap)
	2646	{
	2647	struct timeval tv[2];
	2648	struct nlookupdata nd;
	2649	int error;
	2650
	2651	if (uap->tptr) {
	2652	error = copyin(uap->tptr, tv, sizeof(tv));
	2653	if (error)
	2654	return (error);
	2655	}
	2656	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2657	if (error == 0)
	2658	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2659	nlookup_done(&nd);
	2660	return (error);
	2661	}
	2662
	2663	/*
	2664	* lutimes_args(char path, struct timeval tptr)
	2665	*
	2666	* Set the access and modification times of a file.
	2667	*/
	2668	int
	2669	sys_lutimes(struct lutimes_args *uap)
	2670	{
	2671	struct timeval tv[2];
	2672	struct nlookupdata nd;
	2673	int error;
	2674
	2675	if (uap->tptr) {
	2676	error = copyin(uap->tptr, tv, sizeof(tv));
	2677	if (error)
	2678	return (error);
	2679	}
	2680	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2681	if (error == 0)
	2682	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	2683	nlookup_done(&nd);
	2684	return (error);
	2685	}
	2686
	2687	int
	2688	kern_futimes(int fd, struct timeval *tptr)
	2689	{
	2690	struct thread *td = curthread;
	2691	struct proc *p = td->td_proc;
	2692	struct timespec ts[2];
	2693	struct file *fp;
	2694	int error;
	2695
	2696	error = getutimes(tptr, ts);
	2697	if (error)
	2698	return (error);
	2699	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	2700	return (error);
	2701	if (fp->f_nchandle.ncp)
	2702	error = ncp_writechk(&fp->f_nchandle);
	2703	if (error == 0)
	2704	error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL);
	2705	fdrop(fp);
	2706	return (error);
	2707	}
	2708
	2709	/*
	2710	* futimes_args(int fd, struct timeval *tptr)
	2711	*
	2712	* Set the access and modification times of a file.
	2713	*/
	2714	int
	2715	sys_futimes(struct futimes_args *uap)
	2716	{
	2717	struct timeval tv[2];
	2718	int error;
	2719
	2720	if (uap->tptr) {
	2721	error = copyin(uap->tptr, tv, sizeof(tv));
	2722	if (error)
	2723	return (error);
	2724	}
	2725
	2726	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
	2727
	2728	return (error);
	2729	}
	2730
	2731	int
	2732	kern_truncate(struct nlookupdata *nd, off_t length)
	2733	{
	2734	struct vnode *vp;
	2735	struct vattr vattr;
	2736	int error;
	2737
	2738	if (length < 0)
	2739	return(EINVAL);
	2740	/* XXX Add NLC flag indicating modifying operation? */
	2741	if ((error = nlookup(nd)) != 0)
	2742	return (error);
	2743	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2744	return (error);
	2745	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2746	return (error);
	2747	if ((error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY)) != 0) {
	2748	vrele(vp);
	2749	return (error);
	2750	}
	2751	if (vp->v_type == VDIR) {
	2752	error = EISDIR;
	2753	} else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 &&
	2754	(error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) {
	2755	VATTR_NULL(&vattr);
	2756	vattr.va_size = length;
	2757	error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
	2758	}
	2759	vput(vp);
	2760	return (error);
	2761	}
	2762
	2763	/*
	2764	* truncate(char *path, int pad, off_t length)
	2765	*
	2766	* Truncate a file given its path name.
	2767	*/
	2768	int
	2769	sys_truncate(struct truncate_args *uap)
	2770	{
	2771	struct nlookupdata nd;
	2772	int error;
	2773
	2774	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2775	if (error == 0)
	2776	error = kern_truncate(&nd, uap->length);
	2777	nlookup_done(&nd);
	2778	return error;
	2779	}
	2780
	2781	int
	2782	kern_ftruncate(int fd, off_t length)
	2783	{
	2784	struct thread *td = curthread;
	2785	struct proc *p = td->td_proc;
	2786	struct vattr vattr;
	2787	struct vnode *vp;
	2788	struct file *fp;
	2789	int error;
	2790
	2791	if (length < 0)
	2792	return(EINVAL);
	2793	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	2794	return (error);
	2795	if (fp->f_nchandle.ncp) {
	2796	error = ncp_writechk(&fp->f_nchandle);
	2797	if (error)
	2798	goto done;
	2799	}
	2800	if ((fp->f_flag & FWRITE) == 0) {
	2801	error = EINVAL;
	2802	goto done;
	2803	}
	2804	vp = (struct vnode *)fp->f_data;
	2805	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	2806	if (vp->v_type == VDIR) {
	2807	error = EISDIR;
	2808	} else if ((error = vn_writechk(vp, NULL)) == 0) {
	2809	VATTR_NULL(&vattr);
	2810	vattr.va_size = length;
	2811	error = VOP_SETATTR(vp, &vattr, fp->f_cred);
	2812	}
	2813	vn_unlock(vp);
	2814	done:
	2815	fdrop(fp);
	2816	return (error);
	2817	}
	2818
	2819	/*
	2820	* ftruncate_args(int fd, int pad, off_t length)
	2821	*
	2822	* Truncate a file given a file descriptor.
	2823	*/
	2824	int
	2825	sys_ftruncate(struct ftruncate_args *uap)
	2826	{
	2827	int error;
	2828
	2829	error = kern_ftruncate(uap->fd, uap->length);
	2830
	2831	return (error);
	2832	}
	2833
	2834	/*
	2835	* fsync(int fd)
	2836	*
	2837	* Sync an open file.
	2838	*/
	2839	/* ARGSUSED */
	2840	int
	2841	sys_fsync(struct fsync_args *uap)
	2842	{
	2843	struct thread *td = curthread;
	2844	struct proc *p = td->td_proc;
	2845	struct vnode *vp;
	2846	struct file *fp;
	2847	vm_object_t obj;
	2848	int error;
	2849
	2850	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2851	return (error);
	2852	vp = (struct vnode *)fp->f_data;
	2853	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	2854	if ((obj = vp->v_object) != NULL)
	2855	vm_object_page_clean(obj, 0, 0, 0);
	2856	if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount)
	2857	error = buf_fsync(vp);
	2858	vn_unlock(vp);
	2859	fdrop(fp);
	2860	return (error);
	2861	}
	2862
	2863	int
	2864	kern_rename(struct nlookupdata fromnd, struct nlookupdata tond)
	2865	{
	2866	struct nchandle fnchd;
	2867	struct nchandle tnchd;
	2868	struct namecache *ncp;
	2869	struct vnode *fdvp;
	2870	struct vnode *tdvp;
	2871	struct mount *mp;
	2872	int error;
	2873
	2874	bwillwrite();
	2875	fromnd->nl_flags \|= NLC_REFDVP;
	2876	if ((error = nlookup(fromnd)) != 0)
	2877	return (error);
	2878	if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
	2879	return (ENOENT);
	2880	fnchd.mount = fromnd->nl_nch.mount;
	2881	cache_hold(&fnchd);
	2882
	2883	/*
	2884	* unlock the source nch so we can lookup the target nch without
	2885	* deadlocking. The target may or may not exist so we do not check
	2886	* for a target vp like kern_mkdir() and other creation functions do.
	2887	*
	2888	* The source and target directories are ref'd and rechecked after
	2889	* everything is relocked to determine if the source or target file
	2890	* has been renamed.
	2891	*/
	2892	KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
	2893	fromnd->nl_flags &= ~NLC_NCPISLOCKED;
	2894	cache_unlock(&fromnd->nl_nch);
	2895
	2896	tond->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2897	if ((error = nlookup(tond)) != 0) {
	2898	cache_drop(&fnchd);
	2899	return (error);
	2900	}
	2901	if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
	2902	cache_drop(&fnchd);
	2903	return (ENOENT);
	2904	}
	2905	tnchd.mount = tond->nl_nch.mount;
	2906	cache_hold(&tnchd);
	2907
	2908	/*
	2909	* If the source and target are the same there is nothing to do
	2910	*/
	2911	if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
	2912	cache_drop(&fnchd);
	2913	cache_drop(&tnchd);
	2914	return (0);
	2915	}
	2916
	2917	/*
	2918	* Mount points cannot be renamed or overwritten
	2919	*/
	2920	if ((fromnd->nl_nch.ncp->nc_flag \| tond->nl_nch.ncp->nc_flag) &
	2921	NCF_ISMOUNTPT
	2922	) {
	2923	cache_drop(&fnchd);
	2924	cache_drop(&tnchd);
	2925	return (EINVAL);
	2926	}
	2927
	2928	/*
	2929	* relock the source ncp. NOTE AFTER RELOCKING: the source ncp
	2930	* may have become invalid while it was unlocked, nc_vp and nc_mount
	2931	* could be NULL.
	2932	*/
	2933	if (cache_lock_nonblock(&fromnd->nl_nch) == 0) {
	2934	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	2935	} else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) {
	2936	cache_lock(&fromnd->nl_nch);
	2937	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	2938	} else {
	2939	cache_unlock(&tond->nl_nch);
	2940	cache_lock(&fromnd->nl_nch);
	2941	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	2942	cache_lock(&tond->nl_nch);
	2943	cache_resolve(&tond->nl_nch, tond->nl_cred);
	2944	}
	2945	fromnd->nl_flags \|= NLC_NCPISLOCKED;
	2946
	2947	/*
	2948	* make sure the parent directories linkages are the same
	2949	*/
	2950	if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent \|\|
	2951	tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
	2952	cache_drop(&fnchd);
	2953	cache_drop(&tnchd);
	2954	return (ENOENT);
	2955	}
	2956
	2957	/*
	2958	* Both the source and target must be within the same filesystem and
	2959	* in the same filesystem as their parent directories within the
	2960	* namecache topology.
	2961	*
	2962	* NOTE: fromnd's nc_mount or nc_vp could be NULL.
	2963	*/
	2964	mp = fnchd.mount;
	2965	if (mp != tnchd.mount \|\| mp != fromnd->nl_nch.mount \|\|
	2966	mp != tond->nl_nch.mount) {
	2967	cache_drop(&fnchd);
	2968	cache_drop(&tnchd);
	2969	return (EXDEV);
	2970	}
	2971
	2972	/*
	2973	* Make sure the mount point is writable
	2974	*/
	2975	if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
	2976	cache_drop(&fnchd);
	2977	cache_drop(&tnchd);
	2978	return (error);
	2979	}
	2980
	2981	/*
	2982	* If the target exists and either the source or target is a directory,
	2983	* then both must be directories.
	2984	*
	2985	* Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
	2986	* have become NULL.
	2987	*/
	2988	if (tond->nl_nch.ncp->nc_vp) {
	2989	if (fromnd->nl_nch.ncp->nc_vp == NULL) {
	2990	error = ENOENT;
	2991	} else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
	2992	if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
	2993	error = ENOTDIR;
	2994	} else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
	2995	error = EISDIR;
	2996	}
	2997	}
	2998
	2999	/*
	3000	* You cannot rename a source into itself or a subdirectory of itself.
	3001	* We check this by travsersing the target directory upwards looking
	3002	* for a match against the source.
	3003	*/
	3004	if (error == 0) {
	3005	for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
	3006	if (fromnd->nl_nch.ncp == ncp) {
	3007	error = EINVAL;
	3008	break;
	3009	}
	3010	}
	3011	}
	3012
	3013	cache_drop(&fnchd);
	3014	cache_drop(&tnchd);
	3015
	3016	/*
	3017	* Even though the namespaces are different, they may still represent
	3018	* hardlinks to the same file. The filesystem might have a hard time
	3019	* with this so we issue a NREMOVE of the source instead of a NRENAME
	3020	* when we detect the situation.
	3021	*/
	3022	if (error == 0) {
	3023	fdvp = fromnd->nl_dvp;
	3024	tdvp = tond->nl_dvp;
	3025	if (fdvp == NULL \|\| tdvp == NULL) {
	3026	error = EPERM;
	3027	} else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
	3028	error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
	3029	fromnd->nl_cred);
	3030	} else {
	3031	error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
	3032	fdvp, tdvp, tond->nl_cred);
	3033	}
	3034	}
	3035	return (error);
	3036	}
	3037
	3038	/*
	3039	* rename_args(char from, char to)
	3040	*
	3041	* Rename files. Source and destination must either both be directories,
	3042	* or both not be directories. If target is a directory, it must be empty.
	3043	*/
	3044	int
	3045	sys_rename(struct rename_args *uap)
	3046	{
	3047	struct nlookupdata fromnd, tond;
	3048	int error;
	3049
	3050	error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
	3051	if (error == 0) {
	3052	error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
	3053	if (error == 0)
	3054	error = kern_rename(&fromnd, &tond);
	3055	nlookup_done(&tond);
	3056	}
	3057	nlookup_done(&fromnd);
	3058	return (error);
	3059	}
	3060
	3061	int
	3062	kern_mkdir(struct nlookupdata *nd, int mode)
	3063	{
	3064	struct thread *td = curthread;
	3065	struct proc *p = td->td_proc;
	3066	struct vnode *vp;
	3067	struct vattr vattr;
	3068	int error;
	3069
	3070	bwillwrite();
	3071	nd->nl_flags \|= NLC_WILLBEDIR \| NLC_CREATE \| NLC_REFDVP;
	3072	if ((error = nlookup(nd)) != 0)
	3073	return (error);
	3074
	3075	if (nd->nl_nch.ncp->nc_vp)
	3076	return (EEXIST);
	3077	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3078	return (error);
	3079	VATTR_NULL(&vattr);
	3080	vattr.va_type = VDIR;
	3081	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
	3082
	3083	vp = NULL;
	3084	error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr);
	3085	if (error == 0)
	3086	vput(vp);
	3087	return (error);
	3088	}
	3089
	3090	/*
	3091	* mkdir_args(char *path, int mode)
	3092	*
	3093	* Make a directory file.
	3094	*/
	3095	/* ARGSUSED */
	3096	int
	3097	sys_mkdir(struct mkdir_args *uap)
	3098	{
	3099	struct nlookupdata nd;
	3100	int error;
	3101
	3102	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3103	if (error == 0)
	3104	error = kern_mkdir(&nd, uap->mode);
	3105	nlookup_done(&nd);
	3106	return (error);
	3107	}
	3108
	3109	int
	3110	kern_rmdir(struct nlookupdata *nd)
	3111	{
	3112	int error;
	3113
	3114	bwillwrite();
	3115	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	3116	if ((error = nlookup(nd)) != 0)
	3117	return (error);
	3118
	3119	/*
	3120	* Do not allow directories representing mount points to be
	3121	* deleted, even if empty. Check write perms on mount point
	3122	* in case the vnode is aliased (aka nullfs).
	3123	*/
	3124	if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
	3125	return (EINVAL);
	3126	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3127	return (error);
	3128	error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	3129	return (error);
	3130	}
	3131
	3132	/*
	3133	* rmdir_args(char *path)
	3134	*
	3135	* Remove a directory file.
	3136	*/
	3137	/* ARGSUSED */
	3138	int
	3139	sys_rmdir(struct rmdir_args *uap)
	3140	{
	3141	struct nlookupdata nd;
	3142	int error;
	3143
	3144	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3145	if (error == 0)
	3146	error = kern_rmdir(&nd);
	3147	nlookup_done(&nd);
	3148	return (error);
	3149	}
	3150
	3151	int
	3152	kern_getdirentries(int fd, char buf, u_int count, long basep, int *res,
	3153	enum uio_seg direction)
	3154	{
	3155	struct thread *td = curthread;
	3156	struct proc *p = td->td_proc;
	3157	struct vnode *vp;
	3158	struct file *fp;
	3159	struct uio auio;
	3160	struct iovec aiov;
	3161	off_t loff;
	3162	int error, eofflag;
	3163
	3164	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3165	return (error);
	3166	if ((fp->f_flag & FREAD) == 0) {
	3167	error = EBADF;
	3168	goto done;
	3169	}
	3170	vp = (struct vnode *)fp->f_data;
	3171	unionread:
	3172	if (vp->v_type != VDIR) {
	3173	error = EINVAL;
	3174	goto done;
	3175	}
	3176	aiov.iov_base = buf;
	3177	aiov.iov_len = count;
	3178	auio.uio_iov = &aiov;
	3179	auio.uio_iovcnt = 1;
	3180	auio.uio_rw = UIO_READ;
	3181	auio.uio_segflg = direction;
	3182	auio.uio_td = td;
	3183	auio.uio_resid = count;
	3184	loff = auio.uio_offset = fp->f_offset;
	3185	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
	3186	fp->f_offset = auio.uio_offset;
	3187	if (error)
	3188	goto done;
	3189	if (count == auio.uio_resid) {
	3190	if (union_dircheckp) {
	3191	error = union_dircheckp(td, &vp, fp);
	3192	if (error == -1)
	3193	goto unionread;
	3194	if (error)
	3195	goto done;
	3196	}
	3197	#if 0
	3198	if ((vp->v_flag & VROOT) &&
	3199	(vp->v_mount->mnt_flag & MNT_UNION)) {
	3200	struct vnode *tvp = vp;
	3201	vp = vp->v_mount->mnt_vnodecovered;
	3202	vref(vp);
	3203	fp->f_data = vp;
	3204	fp->f_offset = 0;
	3205	vrele(tvp);
	3206	goto unionread;
	3207	}
	3208	#endif
	3209	}
	3210
	3211	/*
	3212	* WARNING! *basep may not be wide enough to accomodate the
	3213	* seek offset. XXX should we hack this to return the upper 32 bits
	3214	* for offsets greater then 4G?
	3215	*/
	3216	if (basep) {
	3217	*basep = (long)loff;
	3218	}
	3219	*res = count - auio.uio_resid;
	3220	done:
	3221	fdrop(fp);
	3222	return (error);
	3223	}
	3224
	3225	/*
	3226	* getdirentries_args(int fd, char buf, u_int conut, long basep)
	3227	*
	3228	* Read a block of directory entries in a file system independent format.
	3229	*/
	3230	int
	3231	sys_getdirentries(struct getdirentries_args *uap)
	3232	{
	3233	long base;
	3234	int error;
	3235
	3236	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
	3237	&uap->sysmsg_result, UIO_USERSPACE);
	3238
	3239	if (error == 0 && uap->basep)
	3240	error = copyout(&base, uap->basep, sizeof(*uap->basep));
	3241	return (error);
	3242	}
	3243
	3244	/*
	3245	* getdents_args(int fd, char *buf, size_t count)
	3246	*/
	3247	int
	3248	sys_getdents(struct getdents_args *uap)
	3249	{
	3250	int error;
	3251
	3252	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
	3253	&uap->sysmsg_result, UIO_USERSPACE);
	3254
	3255	return (error);
	3256	}
	3257
	3258	/*
	3259	* umask(int newmask)
	3260	*
	3261	* Set the mode mask for creation of filesystem nodes.
	3262	*
	3263	* MP SAFE
	3264	*/
	3265	int
	3266	sys_umask(struct umask_args *uap)
	3267	{
	3268	struct thread *td = curthread;
	3269	struct proc *p = td->td_proc;
	3270	struct filedesc *fdp;
	3271
	3272	fdp = p->p_fd;
	3273	uap->sysmsg_result = fdp->fd_cmask;
	3274	fdp->fd_cmask = uap->newmask & ALLPERMS;
	3275	return (0);
	3276	}
	3277
	3278	/*
	3279	* revoke(char *path)
	3280	*
	3281	* Void all references to file by ripping underlying filesystem
	3282	* away from vnode.
	3283	*/
	3284	/* ARGSUSED */
	3285	int
	3286	sys_revoke(struct revoke_args *uap)
	3287	{
	3288	struct nlookupdata nd;
	3289	struct vattr vattr;
	3290	struct vnode *vp;
	3291	struct ucred *cred;
	3292	int error;
	3293
	3294	vp = NULL;
	3295	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3296	if (error == 0)
	3297	error = nlookup(&nd);
	3298	if (error == 0)
	3299	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	3300	cred = crhold(nd.nl_cred);
	3301	nlookup_done(&nd);
	3302	if (error == 0) {
	3303	if (vp->v_type != VCHR && vp->v_type != VBLK)
	3304	error = EINVAL;
	3305	if (error == 0)
	3306	error = VOP_GETATTR(vp, &vattr);
	3307	if (error == 0 && cred->cr_uid != vattr.va_uid)
	3308	error = suser_cred(cred, PRISON_ROOT);
	3309	if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) {
	3310	error = 0;
	3311	vx_lock(vp);
	3312	VOP_REVOKE(vp, REVOKEALL);
	3313	vx_unlock(vp);
	3314	}
	3315	vrele(vp);
	3316	}
	3317	if (cred)
	3318	crfree(cred);
	3319	return (error);
	3320	}
	3321
	3322	/*
	3323	* getfh_args(char fname, fhandle_t fhp)
	3324	*
	3325	* Get (NFS) file handle
	3326	*/
	3327	int
	3328	sys_getfh(struct getfh_args *uap)
	3329	{
	3330	struct thread *td = curthread;
	3331	struct nlookupdata nd;
	3332	fhandle_t fh;
	3333	struct vnode *vp;
	3334	int error;
	3335
	3336	/*
	3337	* Must be super user
	3338	*/
	3339	if ((error = suser(td)) != 0)
	3340	return (error);
	3341
	3342	vp = NULL;
	3343	error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
	3344	if (error == 0)
	3345	error = nlookup(&nd);
	3346	if (error == 0)
	3347	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3348	nlookup_done(&nd);
	3349	if (error == 0) {
	3350	bzero(&fh, sizeof(fh));
	3351	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
	3352	error = VFS_VPTOFH(vp, &fh.fh_fid);
	3353	vput(vp);
	3354	if (error == 0)
	3355	error = copyout(&fh, uap->fhp, sizeof(fh));
	3356	}
	3357	return (error);
	3358	}
	3359
	3360	/*
	3361	* fhopen_args(const struct fhandle *u_fhp, int flags)
	3362	*
	3363	* syscall for the rpc.lockd to use to translate a NFS file handle into
	3364	* an open descriptor.
	3365	*
	3366	* warning: do not remove the suser() call or this becomes one giant
	3367	* security hole.
	3368	*/
	3369	int
	3370	sys_fhopen(struct fhopen_args *uap)
	3371	{
	3372	struct thread *td = curthread;
	3373	struct proc *p = td->td_proc;
	3374	struct mount *mp;
	3375	struct vnode *vp;
	3376	struct fhandle fhp;
	3377	struct vattr vat;
	3378	struct vattr *vap = &vat;
	3379	struct flock lf;
	3380	int fmode, mode, error, type;
	3381	struct file *nfp;
	3382	struct file *fp;
	3383	int indx;
	3384
	3385	/*
	3386	* Must be super user
	3387	*/
	3388	error = suser(td);
	3389	if (error)
	3390	return (error);
	3391
	3392	fmode = FFLAGS(uap->flags);
	3393	/* why not allow a non-read/write open for our lockd? */
	3394	if (((fmode & (FREAD \| FWRITE)) == 0) \|\| (fmode & O_CREAT))
	3395	return (EINVAL);
	3396	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
	3397	if (error)
	3398	return(error);
	3399	/* find the mount point */
	3400	mp = vfs_getvfs(&fhp.fh_fsid);
	3401	if (mp == NULL)
	3402	return (ESTALE);
	3403	/* now give me my vnode, it gets returned to me locked */
	3404	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
	3405	if (error)
	3406	return (error);
	3407	/*
	3408	* from now on we have to make sure not
	3409	* to forget about the vnode
	3410	* any error that causes an abort must vput(vp)
	3411	* just set error = err and 'goto bad;'.
	3412	*/
	3413
	3414	/*
	3415	* from vn_open
	3416	*/
	3417	if (vp->v_type == VLNK) {
	3418	error = EMLINK;
	3419	goto bad;
	3420	}
	3421	if (vp->v_type == VSOCK) {
	3422	error = EOPNOTSUPP;
	3423	goto bad;
	3424	}
	3425	mode = 0;
	3426	if (fmode & (FWRITE \| O_TRUNC)) {
	3427	if (vp->v_type == VDIR) {
	3428	error = EISDIR;
	3429	goto bad;
	3430	}
	3431	error = vn_writechk(vp, NULL);
	3432	if (error)
	3433	goto bad;
	3434	mode \|= VWRITE;
	3435	}
	3436	if (fmode & FREAD)
	3437	mode \|= VREAD;
	3438	if (mode) {
	3439	error = VOP_ACCESS(vp, mode, p->p_ucred);
	3440	if (error)
	3441	goto bad;
	3442	}
	3443	if (fmode & O_TRUNC) {
	3444	vn_unlock(vp); /* XXX */
	3445	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY); /* XXX */
	3446	VATTR_NULL(vap);
	3447	vap->va_size = 0;
	3448	error = VOP_SETATTR(vp, vap, p->p_ucred);
	3449	if (error)
	3450	goto bad;
	3451	}
	3452
	3453	/*
	3454	* VOP_OPEN needs the file pointer so it can potentially override
	3455	* it.
	3456	*
	3457	* WARNING! no f_nchandle will be associated when fhopen()ing a
	3458	* directory. XXX
	3459	*/
	3460	if ((error = falloc(p, &nfp, &indx)) != 0)
	3461	goto bad;
	3462	fp = nfp;
	3463
	3464	error = VOP_OPEN(vp, fmode, p->p_ucred, fp);
	3465	if (error) {
	3466	/*
	3467	* setting f_ops this way prevents VOP_CLOSE from being
	3468	* called or fdrop() releasing the vp from v_data. Since
	3469	* the VOP_OPEN failed we don't want to VOP_CLOSE.
	3470	*/
	3471	fp->f_ops = &badfileops;
	3472	fp->f_data = NULL;
	3473	goto bad_drop;
	3474	}
	3475
	3476	/*
	3477	* The fp is given its own reference, we still have our ref and lock.
	3478	*
	3479	* Assert that all regular files must be created with a VM object.
	3480	*/
	3481	if (vp->v_type == VREG && vp->v_object == NULL) {
	3482	kprintf("fhopen: regular file did not have VM object: %p\n", vp);
	3483	goto bad_drop;
	3484	}
	3485
	3486	/*
	3487	* The open was successful. Handle any locking requirements.
	3488	*/
	3489	if (fmode & (O_EXLOCK \| O_SHLOCK)) {
	3490	lf.l_whence = SEEK_SET;
	3491	lf.l_start = 0;
	3492	lf.l_len = 0;
	3493	if (fmode & O_EXLOCK)
	3494	lf.l_type = F_WRLCK;
	3495	else
	3496	lf.l_type = F_RDLCK;
	3497	if (fmode & FNONBLOCK)
	3498	type = 0;
	3499	else
	3500	type = F_WAIT;
	3501	vn_unlock(vp);
	3502	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	3503	/*
	3504	* release our private reference.
	3505	*/
	3506	fsetfd(p, NULL, indx);
	3507	fdrop(fp);
	3508	vrele(vp);
	3509	return (error);
	3510	}
	3511	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3512	fp->f_flag \|= FHASLOCK;
	3513	}
	3514
	3515	/*
	3516	* Clean up. Associate the file pointer with the previously
	3517	* reserved descriptor and return it.
	3518	*/
	3519	vput(vp);
	3520	fsetfd(p, fp, indx);
	3521	fdrop(fp);
	3522	uap->sysmsg_result = indx;
	3523	return (0);
	3524
	3525	bad_drop:
	3526	fsetfd(p, NULL, indx);
	3527	fdrop(fp);
	3528	bad:
	3529	vput(vp);
	3530	return (error);
	3531	}
	3532
	3533	/*
	3534	* fhstat_args(struct fhandle u_fhp, struct stat sb)
	3535	*/
	3536	int
	3537	sys_fhstat(struct fhstat_args *uap)
	3538	{
	3539	struct thread *td = curthread;
	3540	struct stat sb;
	3541	fhandle_t fh;
	3542	struct mount *mp;
	3543	struct vnode *vp;
	3544	int error;
	3545
	3546	/*
	3547	* Must be super user
	3548	*/
	3549	error = suser(td);
	3550	if (error)
	3551	return (error);
	3552
	3553	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
	3554	if (error)
	3555	return (error);
	3556
	3557	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3558	return (ESTALE);
	3559	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
	3560	return (error);
	3561	error = vn_stat(vp, &sb, td->td_proc->p_ucred);
	3562	vput(vp);
	3563	if (error)
	3564	return (error);
	3565	error = copyout(&sb, uap->sb, sizeof(sb));
	3566	return (error);
	3567	}
	3568
	3569	/*
	3570	* fhstatfs_args(struct fhandle u_fhp, struct statfs buf)
	3571	*/
	3572	int
	3573	sys_fhstatfs(struct fhstatfs_args *uap)
	3574	{
	3575	struct thread *td = curthread;
	3576	struct proc *p = td->td_proc;
	3577	struct statfs *sp;
	3578	struct mount *mp;
	3579	struct vnode *vp;
	3580	struct statfs sb;
	3581	char fullpath, freepath;
	3582	fhandle_t fh;
	3583	int error;
	3584
	3585	/*
	3586	* Must be super user
	3587	*/
	3588	if ((error = suser(td)))
	3589	return (error);
	3590
	3591	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	3592	return (error);
	3593
	3594	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	3595	return (ESTALE);
	3596
	3597	if (p != NULL && !chroot_visible_mnt(mp, p))
	3598	return (ESTALE);
	3599
	3600	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
	3601	return (error);
	3602	mp = vp->v_mount;
	3603	sp = &mp->mnt_stat;
	3604	vput(vp);
	3605	if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0)
	3606	return (error);
	3607
	3608	error = mount_path(p, mp, &fullpath, &freepath);
	3609	if (error)
	3610	return(error);
	3611	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	3612	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	3613	kfree(freepath, M_TEMP);
	3614
	3615	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	3616	if (suser(td)) {
	3617	bcopy(sp, &sb, sizeof(sb));
	3618	sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
	3619	sp = &sb;
	3620	}
	3621	return (copyout(sp, uap->buf, sizeof(*sp)));
	3622	}
	3623
	3624	/*
	3625	* Syscall to push extended attribute configuration information into the
	3626	* VFS. Accepts a path, which it converts to a mountpoint, as well as
	3627	* a command (int cmd), and attribute name and misc data. For now, the
	3628	* attribute name is left in userspace for consumption by the VFS_op.
	3629	* It will probably be changed to be copied into sysspace by the
	3630	* syscall in the future, once issues with various consumers of the
	3631	* attribute code have raised their hands.
	3632	*
	3633	* Currently this is used only by UFS Extended Attributes.
	3634	*/
	3635	int
	3636	sys_extattrctl(struct extattrctl_args *uap)
	3637	{
	3638	struct nlookupdata nd;
	3639	struct mount *mp;
	3640	struct vnode *vp;
	3641	int error;
	3642
	3643	vp = NULL;
	3644	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3645	if (error == 0)
	3646	error = nlookup(&nd);
	3647	if (error == 0) {
	3648	mp = nd.nl_nch.mount;
	3649	error = VFS_EXTATTRCTL(mp, uap->cmd,
	3650	uap->attrname, uap->arg,
	3651	nd.nl_cred);
	3652	}
	3653	nlookup_done(&nd);
	3654	return (error);
	3655	}
	3656
	3657	/*
	3658	* Syscall to set a named extended attribute on a file or directory.
	3659	* Accepts attribute name, and a uio structure pointing to the data to set.
	3660	* The uio is consumed in the style of writev(). The real work happens
	3661	* in VOP_SETEXTATTR().
	3662	*/
	3663	int
	3664	sys_extattr_set_file(struct extattr_set_file_args *uap)
	3665	{
	3666	char attrname[EXTATTR_MAXNAMELEN];
	3667	struct iovec aiov[UIO_SMALLIOV];
	3668	struct iovec *needfree;
	3669	struct nlookupdata nd;
	3670	struct iovec *iov;
	3671	struct vnode *vp;
	3672	struct uio auio;
	3673	u_int iovlen;
	3674	u_int cnt;
	3675	int error;
	3676	int i;
	3677
	3678	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3679	if (error)
	3680	return (error);
	3681
	3682	vp = NULL;
	3683	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3684	if (error == 0)
	3685	error = nlookup(&nd);
	3686	if (error == 0)
	3687	error = ncp_writechk(&nd.nl_nch);
	3688	if (error == 0)
	3689	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3690	if (error) {
	3691	nlookup_done(&nd);
	3692	return (error);
	3693	}
	3694
	3695	needfree = NULL;
	3696	iovlen = uap->iovcnt * sizeof(struct iovec);
	3697	if (uap->iovcnt > UIO_SMALLIOV) {
	3698	if (uap->iovcnt > UIO_MAXIOV) {
	3699	error = EINVAL;
	3700	goto done;
	3701	}
	3702	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	3703	needfree = iov;
	3704	} else {
	3705	iov = aiov;
	3706	}
	3707	auio.uio_iov = iov;
	3708	auio.uio_iovcnt = uap->iovcnt;
	3709	auio.uio_rw = UIO_WRITE;
	3710	auio.uio_segflg = UIO_USERSPACE;
	3711	auio.uio_td = nd.nl_td;
	3712	auio.uio_offset = 0;
	3713	if ((error = copyin(uap->iovp, iov, iovlen)))
	3714	goto done;
	3715	auio.uio_resid = 0;
	3716	for (i = 0; i < uap->iovcnt; i++) {
	3717	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	3718	error = EINVAL;
	3719	goto done;
	3720	}
	3721	auio.uio_resid += iov->iov_len;
	3722	iov++;
	3723	}
	3724	cnt = auio.uio_resid;
	3725	error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	3726	cnt -= auio.uio_resid;
	3727	uap->sysmsg_result = cnt;
	3728	done:
	3729	vput(vp);
	3730	nlookup_done(&nd);
	3731	if (needfree)
	3732	FREE(needfree, M_IOV);
	3733	return (error);
	3734	}
	3735
	3736	/*
	3737	* Syscall to get a named extended attribute on a file or directory.
	3738	* Accepts attribute name, and a uio structure pointing to a buffer for the
	3739	* data. The uio is consumed in the style of readv(). The real work
	3740	* happens in VOP_GETEXTATTR();
	3741	*/
	3742	int
	3743	sys_extattr_get_file(struct extattr_get_file_args *uap)
	3744	{
	3745	char attrname[EXTATTR_MAXNAMELEN];
	3746	struct iovec aiov[UIO_SMALLIOV];
	3747	struct iovec *needfree;
	3748	struct nlookupdata nd;
	3749	struct iovec *iov;
	3750	struct vnode *vp;
	3751	struct uio auio;
	3752	u_int iovlen;
	3753	u_int cnt;
	3754	int error;
	3755	int i;
	3756
	3757	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3758	if (error)
	3759	return (error);
	3760
	3761	vp = NULL;
	3762	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3763	if (error == 0)
	3764	error = nlookup(&nd);
	3765	if (error == 0)
	3766	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3767	if (error) {
	3768	nlookup_done(&nd);
	3769	return (error);
	3770	}
	3771
	3772	iovlen = uap->iovcnt * sizeof (struct iovec);
	3773	needfree = NULL;
	3774	if (uap->iovcnt > UIO_SMALLIOV) {
	3775	if (uap->iovcnt > UIO_MAXIOV) {
	3776	error = EINVAL;
	3777	goto done;
	3778	}
	3779	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	3780	needfree = iov;
	3781	} else {
	3782	iov = aiov;
	3783	}
	3784	auio.uio_iov = iov;
	3785	auio.uio_iovcnt = uap->iovcnt;
	3786	auio.uio_rw = UIO_READ;
	3787	auio.uio_segflg = UIO_USERSPACE;
	3788	auio.uio_td = nd.nl_td;
	3789	auio.uio_offset = 0;
	3790	if ((error = copyin(uap->iovp, iov, iovlen)))
	3791	goto done;
	3792	auio.uio_resid = 0;
	3793	for (i = 0; i < uap->iovcnt; i++) {
	3794	if (iov->iov_len > INT_MAX - auio.uio_resid) {
	3795	error = EINVAL;
	3796	goto done;
	3797	}
	3798	auio.uio_resid += iov->iov_len;
	3799	iov++;
	3800	}
	3801	cnt = auio.uio_resid;
	3802	error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	3803	cnt -= auio.uio_resid;
	3804	uap->sysmsg_result = cnt;
	3805	done:
	3806	vput(vp);
	3807	nlookup_done(&nd);
	3808	if (needfree)
	3809	FREE(needfree, M_IOV);
	3810	return(error);
	3811	}
	3812
	3813	/*
	3814	* Syscall to delete a named extended attribute from a file or directory.
	3815	* Accepts attribute name. The real work happens in VOP_SETEXTATTR().
	3816	*/
	3817	int
	3818	sys_extattr_delete_file(struct extattr_delete_file_args *uap)
	3819	{
	3820	char attrname[EXTATTR_MAXNAMELEN];
	3821	struct nlookupdata nd;
	3822	struct vnode *vp;
	3823	int error;
	3824
	3825	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	3826	if (error)
	3827	return(error);
	3828
	3829	vp = NULL;
	3830	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3831	if (error == 0)
	3832	error = nlookup(&nd);
	3833	if (error == 0)
	3834	error = ncp_writechk(&nd.nl_nch);
	3835	if (error == 0)
	3836	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3837	if (error) {
	3838	nlookup_done(&nd);
	3839	return (error);
	3840	}
	3841
	3842	error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred);
	3843	vput(vp);
	3844	nlookup_done(&nd);
	3845	return(error);
	3846	}
	3847
	3848	/*
	3849	* Determine if the mount is visible to the process.
	3850	*/
	3851	static int
	3852	chroot_visible_mnt(struct mount mp, struct proc p)
	3853	{
	3854	struct nchandle nch;
	3855
	3856	/*
	3857	* Traverse from the mount point upwards. If we hit the process
	3858	* root then the mount point is visible to the process.
	3859	*/
	3860	nch = mp->mnt_ncmountpt;
	3861	while (nch.ncp) {
	3862	if (nch.mount == p->p_fd->fd_nrdir.mount &&
	3863	nch.ncp == p->p_fd->fd_nrdir.ncp) {
	3864	return(1);
	3865	}
	3866	if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
	3867	nch = nch.mount->mnt_ncmounton;
	3868	} else {
	3869	nch.ncp = nch.ncp->nc_parent;
	3870	}
	3871	}
	3872
	3873	/*
	3874	* If the mount point is not visible to the process, but the
	3875	* process root is in a subdirectory of the mount, return
	3876	* TRUE anyway.
	3877	*/
	3878	if (p->p_fd->fd_nrdir.mount == mp)
	3879	return(1);
	3880
	3881	return(0);
	3882	}
	3883