gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
	39	* $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $
	40	* $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $
	41	*/
	42
	43	#include <sys/param.h>
	44	#include <sys/systm.h>
	45	#include <sys/buf.h>
	46	#include <sys/conf.h>
	47	#include <sys/sysent.h>
	48	#include <sys/malloc.h>
	49	#include <sys/mount.h>
	50	#include <sys/mountctl.h>
	51	#include <sys/sysproto.h>
	52	#include <sys/filedesc.h>
	53	#include <sys/kernel.h>
	54	#include <sys/fcntl.h>
	55	#include <sys/file.h>
	56	#include <sys/linker.h>
	57	#include <sys/stat.h>
	58	#include <sys/unistd.h>
	59	#include <sys/vnode.h>
	60	#include <sys/proc.h>
	61	#include <sys/priv.h>
	62	#include <sys/jail.h>
	63	#include <sys/namei.h>
	64	#include <sys/nlookup.h>
	65	#include <sys/dirent.h>
	66	#include <sys/extattr.h>
	67	#include <sys/spinlock.h>
	68	#include <sys/kern_syscall.h>
	69	#include <sys/objcache.h>
	70	#include <sys/sysctl.h>
	71
	72	#include <sys/buf2.h>
	73	#include <sys/file2.h>
	74	#include <sys/spinlock2.h>
	75
	76	#include <vm/vm.h>
	77	#include <vm/vm_object.h>
	78	#include <vm/vm_page.h>
	79
	80	#include <machine/limits.h>
	81	#include <machine/stdarg.h>
	82
	83	#include <vfs/union/union.h>
	84
	85	static void mount_warning(struct mount mp, const char ctl, ...);
	86	static int mount_path(struct proc p, struct mount mp, char rb, char fb);
	87	static int checkvp_chdir (struct vnode vn, struct thread td);
	88	static void checkdirs (struct nchandle old_nch, struct nchandle new_nch);
	89	static int chroot_refuse_vdir_fds (struct filedesc *fdp);
	90	static int chroot_visible_mnt(struct mount mp, struct proc p);
	91	static int getutimes (const struct timeval , struct timespec );
	92	static int setfown (struct vnode *, uid_t, gid_t);
	93	static int setfmode (struct vnode *, int);
	94	static int setfflags (struct vnode *, int);
	95	static int setutimes (struct vnode , struct vattr ,
	96	const struct timespec *, int);
	97	static int usermount = 0; /* if 1, non-root can mount fs. */
	98
	99	int (union_dircheckp) (struct thread , struct vnode *, struct file );
	100
	101	SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
	102
	103	/*
	104	* Virtual File System System Calls
	105	*/
	106
	107	/*
	108	* Mount a file system.
	109	*/
	110	/*
	111	* mount_args(char type, char path, int flags, caddr_t data)
	112	*/
	113	/* ARGSUSED */
	114	int
	115	sys_mount(struct mount_args *uap)
	116	{
	117	struct thread *td = curthread;
	118	struct proc *p = td->td_proc;
	119	struct vnode *vp;
	120	struct nchandle nch;
	121	struct mount *mp;
	122	struct vfsconf *vfsp;
	123	int error, flag = 0, flag2 = 0;
	124	int hasmount;
	125	struct vattr va;
	126	struct nlookupdata nd;
	127	char fstypename[MFSNAMELEN];
	128	struct ucred *cred = p->p_ucred;
	129
	130	KKASSERT(p);
	131	if (jailed(cred))
	132	return (EPERM);
	133	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
	134	return (error);
	135	/*
	136	* Do not allow NFS export by non-root users.
	137	*/
	138	if (uap->flags & MNT_EXPORTED) {
	139	error = priv_check(td, PRIV_ROOT);
	140	if (error)
	141	return (error);
	142	}
	143	/*
	144	* Silently enforce MNT_NOSUID and MNT_NODEV for non-root users
	145	*/
	146	if (priv_check(td, PRIV_ROOT))
	147	uap->flags \|= MNT_NOSUID \| MNT_NODEV;
	148
	149	/*
	150	* Lookup the requested path and extract the nch and vnode.
	151	*/
	152	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	153	if (error == 0) {
	154	if ((error = nlookup(&nd)) == 0) {
	155	if (nd.nl_nch.ncp->nc_vp == NULL)
	156	error = ENOENT;
	157	}
	158	}
	159	if (error) {
	160	nlookup_done(&nd);
	161	return (error);
	162	}
	163
	164	/*
	165	* Extract the locked+refd ncp and cleanup the nd structure
	166	*/
	167	nch = nd.nl_nch;
	168	cache_zero(&nd.nl_nch);
	169	nlookup_done(&nd);
	170
	171	if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch))
	172	hasmount = 1;
	173	else
	174	hasmount = 0;
	175
	176
	177	/*
	178	* now we have the locked ref'd nch and unreferenced vnode.
	179	*/
	180	vp = nch.ncp->nc_vp;
	181	if ((error = vget(vp, LK_EXCLUSIVE)) != 0) {
	182	cache_put(&nch);
	183	return (error);
	184	}
	185	cache_unlock(&nch);
	186
	187	/*
	188	* Now we have an unlocked ref'd nch and a locked ref'd vp
	189	*/
	190	if (uap->flags & MNT_UPDATE) {
	191	if ((vp->v_flag & (VROOT\|VPFSROOT)) == 0) {
	192	cache_drop(&nch);
	193	vput(vp);
	194	return (EINVAL);
	195	}
	196	mp = vp->v_mount;
	197	flag = mp->mnt_flag;
	198	flag2 = mp->mnt_kern_flag;
	199	/*
	200	* We only allow the filesystem to be reloaded if it
	201	* is currently mounted read-only.
	202	*/
	203	if ((uap->flags & MNT_RELOAD) &&
	204	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	205	cache_drop(&nch);
	206	vput(vp);
	207	return (EOPNOTSUPP); /* Needs translation */
	208	}
	209	/*
	210	* Only root, or the user that did the original mount is
	211	* permitted to update it.
	212	*/
	213	if (mp->mnt_stat.f_owner != cred->cr_uid &&
	214	(error = priv_check(td, PRIV_ROOT))) {
	215	cache_drop(&nch);
	216	vput(vp);
	217	return (error);
	218	}
	219	if (vfs_busy(mp, LK_NOWAIT)) {
	220	cache_drop(&nch);
	221	vput(vp);
	222	return (EBUSY);
	223	}
	224	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	225	cache_drop(&nch);
	226	vfs_unbusy(mp);
	227	vput(vp);
	228	return (EBUSY);
	229	}
	230	vp->v_flag \|= VMOUNT;
	231	mp->mnt_flag \|=
	232	uap->flags & (MNT_RELOAD \| MNT_FORCE \| MNT_UPDATE);
	233	vn_unlock(vp);
	234	goto update;
	235	}
	236	/*
	237	* If the user is not root, ensure that they own the directory
	238	* onto which we are attempting to mount.
	239	*/
	240	if ((error = VOP_GETATTR(vp, &va)) \|\|
	241	(va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) {
	242	cache_drop(&nch);
	243	vput(vp);
	244	return (error);
	245	}
	246	if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) {
	247	cache_drop(&nch);
	248	vput(vp);
	249	return (error);
	250	}
	251	if (vp->v_type != VDIR) {
	252	cache_drop(&nch);
	253	vput(vp);
	254	return (ENOTDIR);
	255	}
	256	if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) {
	257	cache_drop(&nch);
	258	vput(vp);
	259	return (EPERM);
	260	}
	261	if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) {
	262	cache_drop(&nch);
	263	vput(vp);
	264	return (error);
	265	}
	266	vfsp = vfsconf_find_by_name(fstypename);
	267	if (vfsp == NULL) {
	268	linker_file_t lf;
	269
	270	/* Only load modules for root (very important!) */
	271	if ((error = priv_check(td, PRIV_ROOT)) != 0) {
	272	cache_drop(&nch);
	273	vput(vp);
	274	return error;
	275	}
	276	error = linker_load_file(fstypename, &lf);
	277	if (error \|\| lf == NULL) {
	278	cache_drop(&nch);
	279	vput(vp);
	280	if (lf == NULL)
	281	error = ENODEV;
	282	return error;
	283	}
	284	lf->userrefs++;
	285	/* lookup again, see if the VFS was loaded */
	286	vfsp = vfsconf_find_by_name(fstypename);
	287	if (vfsp == NULL) {
	288	lf->userrefs--;
	289	linker_file_unload(lf);
	290	cache_drop(&nch);
	291	vput(vp);
	292	return (ENODEV);
	293	}
	294	}
	295	if ((vp->v_flag & VMOUNT) != 0 \|\| hasmount) {
	296	cache_drop(&nch);
	297	vput(vp);
	298	return (EBUSY);
	299	}
	300	vp->v_flag \|= VMOUNT;
	301
	302	/*
	303	* Allocate and initialize the filesystem.
	304	*/
	305	mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO\|M_WAITOK);
	306	TAILQ_INIT(&mp->mnt_nvnodelist);
	307	TAILQ_INIT(&mp->mnt_reservedvnlist);
	308	TAILQ_INIT(&mp->mnt_jlist);
	309	mp->mnt_nvnodelistsize = 0;
	310	lockinit(&mp->mnt_lock, "vfslock", 0, 0);
	311	vfs_busy(mp, LK_NOWAIT);
	312	mp->mnt_op = vfsp->vfc_vfsops;
	313	mp->mnt_vfc = vfsp;
	314	vfsp->vfc_refcount++;
	315	mp->mnt_stat.f_type = vfsp->vfc_typenum;
	316	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	317	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
	318	mp->mnt_stat.f_owner = cred->cr_uid;
	319	mp->mnt_iosize_max = DFLTPHYS;
	320	vn_unlock(vp);
	321	update:
	322	/*
	323	* Set the mount level flags.
	324	*/
	325	if (uap->flags & MNT_RDONLY)
	326	mp->mnt_flag \|= MNT_RDONLY;
	327	else if (mp->mnt_flag & MNT_RDONLY)
	328	mp->mnt_kern_flag \|= MNTK_WANTRDWR;
	329	mp->mnt_flag &=~ (MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	330	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_NOATIME \|
	331	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	332	MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	333	mp->mnt_flag \|= uap->flags & (MNT_NOSUID \| MNT_NOEXEC \|
	334	MNT_NODEV \| MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \| MNT_FORCE \|
	335	MNT_NOSYMFOLLOW \| MNT_IGNORE \|
	336	MNT_NOATIME \| MNT_NOCLUSTERR \| MNT_NOCLUSTERW \| MNT_SUIDDIR);
	337	/*
	338	* Mount the filesystem.
	339	* XXX The final recipients of VFS_MOUNT just overwrite the ndp they
	340	* get.
	341	*/
	342	error = VFS_MOUNT(mp, uap->path, uap->data, cred);
	343	if (mp->mnt_flag & MNT_UPDATE) {
	344	if (mp->mnt_kern_flag & MNTK_WANTRDWR)
	345	mp->mnt_flag &= ~MNT_RDONLY;
	346	mp->mnt_flag &=~ (MNT_UPDATE \| MNT_RELOAD \| MNT_FORCE);
	347	mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
	348	if (error) {
	349	mp->mnt_flag = flag;
	350	mp->mnt_kern_flag = flag2;
	351	}
	352	vfs_unbusy(mp);
	353	vp->v_flag &= ~VMOUNT;
	354	vrele(vp);
	355	cache_drop(&nch);
	356	return (error);
	357	}
	358	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	359	/*
	360	* Put the new filesystem on the mount list after root. The mount
	361	* point gets its own mnt_ncmountpt (unless the VFS already set one
	362	* up) which represents the root of the mount. The lookup code
	363	* detects the mount point going forward and checks the root of
	364	* the mount going backwards.
	365	*
	366	* It is not necessary to invalidate or purge the vnode underneath
	367	* because elements under the mount will be given their own glue
	368	* namecache record.
	369	*/
	370	if (!error) {
	371	if (mp->mnt_ncmountpt.ncp == NULL) {
	372	/*
	373	* allocate, then unlock, but leave the ref intact
	374	*/
	375	cache_allocroot(&mp->mnt_ncmountpt, mp, NULL);
	376	cache_unlock(&mp->mnt_ncmountpt);
	377	}
	378	mp->mnt_ncmounton = nch; /* inherits ref */
	379	nch.ncp->nc_flag \|= NCF_ISMOUNTPT;
	380
	381	/* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */
	382	vp->v_flag &= ~VMOUNT;
	383	mountlist_insert(mp, MNTINS_LAST);
	384	vn_unlock(vp);
	385	checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt);
	386	error = vfs_allocate_syncvnode(mp);
	387	vfs_unbusy(mp);
	388	error = VFS_START(mp, 0);
	389	vrele(vp);
	390	} else {
	391	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	392	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	393	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	394	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	395	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	396	vp->v_flag &= ~VMOUNT;
	397	mp->mnt_vfc->vfc_refcount--;
	398	vfs_unbusy(mp);
	399	kfree(mp, M_MOUNT);
	400	cache_drop(&nch);
	401	vput(vp);
	402	}
	403	return (error);
	404	}
	405
	406	/*
	407	* Scan all active processes to see if any of them have a current
	408	* or root directory onto which the new filesystem has just been
	409	* mounted. If so, replace them with the new mount point.
	410	*
	411	* The passed ncp is ref'd and locked (from the mount code) and
	412	* must be associated with the vnode representing the root of the
	413	* mount point.
	414	*/
	415	struct checkdirs_info {
	416	struct nchandle old_nch;
	417	struct nchandle new_nch;
	418	struct vnode *old_vp;
	419	struct vnode *new_vp;
	420	};
	421
	422	static int checkdirs_callback(struct proc p, void data);
	423
	424	static void
	425	checkdirs(struct nchandle old_nch, struct nchandle new_nch)
	426	{
	427	struct checkdirs_info info;
	428	struct vnode *olddp;
	429	struct vnode *newdp;
	430	struct mount *mp;
	431
	432	/*
	433	* If the old mount point's vnode has a usecount of 1, it is not
	434	* being held as a descriptor anywhere.
	435	*/
	436	olddp = old_nch->ncp->nc_vp;
	437	if (olddp == NULL \|\| olddp->v_sysref.refcnt == 1)
	438	return;
	439
	440	/*
	441	* Force the root vnode of the new mount point to be resolved
	442	* so we can update any matching processes.
	443	*/
	444	mp = new_nch->mount;
	445	if (VFS_ROOT(mp, &newdp))
	446	panic("mount: lost mount");
	447	cache_setunresolved(new_nch);
	448	cache_setvp(new_nch, newdp);
	449
	450	/*
	451	* Special handling of the root node
	452	*/
	453	if (rootvnode == olddp) {
	454	vref(newdp);
	455	vfs_cache_setroot(newdp, cache_hold(new_nch));
	456	}
	457
	458	/*
	459	* Pass newdp separately so the callback does not have to access
	460	* it via new_nch->ncp->nc_vp.
	461	*/
	462	info.old_nch = *old_nch;
	463	info.new_nch = *new_nch;
	464	info.new_vp = newdp;
	465	allproc_scan(checkdirs_callback, &info);
	466	vput(newdp);
	467	}
	468
	469	/*
	470	* NOTE: callback is not MP safe because the scanned process's filedesc
	471	* structure can be ripped out from under us, amoung other things.
	472	*/
	473	static int
	474	checkdirs_callback(struct proc p, void data)
	475	{
	476	struct checkdirs_info *info = data;
	477	struct filedesc *fdp;
	478	struct nchandle ncdrop1;
	479	struct nchandle ncdrop2;
	480	struct vnode *vprele1;
	481	struct vnode *vprele2;
	482
	483	if ((fdp = p->p_fd) != NULL) {
	484	cache_zero(&ncdrop1);
	485	cache_zero(&ncdrop2);
	486	vprele1 = NULL;
	487	vprele2 = NULL;
	488
	489	/*
	490	* MPUNSAFE - XXX fdp can be pulled out from under a
	491	* foreign process.
	492	*
	493	* A shared filedesc is ok, we don't have to copy it
	494	* because we are making this change globally.
	495	*/
	496	spin_lock_wr(&fdp->fd_spin);
	497	if (fdp->fd_ncdir.mount == info->old_nch.mount &&
	498	fdp->fd_ncdir.ncp == info->old_nch.ncp) {
	499	vprele1 = fdp->fd_cdir;
	500	vref(info->new_vp);
	501	fdp->fd_cdir = info->new_vp;
	502	ncdrop1 = fdp->fd_ncdir;
	503	cache_copy(&info->new_nch, &fdp->fd_ncdir);
	504	}
	505	if (fdp->fd_nrdir.mount == info->old_nch.mount &&
	506	fdp->fd_nrdir.ncp == info->old_nch.ncp) {
	507	vprele2 = fdp->fd_rdir;
	508	vref(info->new_vp);
	509	fdp->fd_rdir = info->new_vp;
	510	ncdrop2 = fdp->fd_nrdir;
	511	cache_copy(&info->new_nch, &fdp->fd_nrdir);
	512	}
	513	spin_unlock_wr(&fdp->fd_spin);
	514	if (ncdrop1.ncp)
	515	cache_drop(&ncdrop1);
	516	if (ncdrop2.ncp)
	517	cache_drop(&ncdrop2);
	518	if (vprele1)
	519	vrele(vprele1);
	520	if (vprele2)
	521	vrele(vprele2);
	522	}
	523	return(0);
	524	}
	525
	526	/*
	527	* Unmount a file system.
	528	*
	529	* Note: unmount takes a path to the vnode mounted on as argument,
	530	* not special file (as before).
	531	*/
	532	/*
	533	* umount_args(char *path, int flags)
	534	*/
	535	/* ARGSUSED */
	536	int
	537	sys_unmount(struct unmount_args *uap)
	538	{
	539	struct thread *td = curthread;
	540	struct proc *p = td->td_proc;
	541	struct mount *mp = NULL;
	542	int error;
	543	struct nlookupdata nd;
	544
	545	KKASSERT(p);
	546	if (p->p_ucred->cr_prison != NULL)
	547	return (EPERM);
	548	if (usermount == 0 && (error = priv_check(td, PRIV_ROOT)))
	549	return (error);
	550
	551	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	552	if (error == 0)
	553	error = nlookup(&nd);
	554	if (error)
	555	goto out;
	556
	557	mp = nd.nl_nch.mount;
	558
	559	/*
	560	* Only root, or the user that did the original mount is
	561	* permitted to unmount this filesystem.
	562	*/
	563	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
	564	(error = priv_check(td, PRIV_ROOT)))
	565	goto out;
	566
	567	/*
	568	* Don't allow unmounting the root file system.
	569	*/
	570	if (mp->mnt_flag & MNT_ROOTFS) {
	571	error = EINVAL;
	572	goto out;
	573	}
	574
	575	/*
	576	* Must be the root of the filesystem
	577	*/
	578	if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) {
	579	error = EINVAL;
	580	goto out;
	581	}
	582
	583	out:
	584	nlookup_done(&nd);
	585	if (error)
	586	return (error);
	587	return (dounmount(mp, uap->flags));
	588	}
	589
	590	/*
	591	* Do the actual file system unmount.
	592	*/
	593	static int
	594	dounmount_interlock(struct mount *mp)
	595	{
	596	if (mp->mnt_kern_flag & MNTK_UNMOUNT)
	597	return (EBUSY);
	598	mp->mnt_kern_flag \|= MNTK_UNMOUNT;
	599	return(0);
	600	}
	601
	602	int
	603	dounmount(struct mount *mp, int flags)
	604	{
	605	struct namecache *ncp;
	606	struct nchandle nch;
	607	struct vnode *vp;
	608	int error;
	609	int async_flag;
	610	int lflags;
	611	int freeok = 1;
	612
	613	/*
	614	* Exclusive access for unmounting purposes
	615	*/
	616	if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0)
	617	return (error);
	618
	619	/*
	620	* Allow filesystems to detect that a forced unmount is in progress.
	621	*/
	622	if (flags & MNT_FORCE)
	623	mp->mnt_kern_flag \|= MNTK_UNMOUNTF;
	624	lflags = LK_EXCLUSIVE \| ((flags & MNT_FORCE) ? 0 : LK_NOWAIT);
	625	error = lockmgr(&mp->mnt_lock, lflags);
	626	if (error) {
	627	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	628	if (mp->mnt_kern_flag & MNTK_MWAIT)
	629	wakeup(mp);
	630	return (error);
	631	}
	632
	633	if (mp->mnt_flag & MNT_EXPUBLIC)
	634	vfs_setpublicfs(NULL, NULL, NULL);
	635
	636	vfs_msync(mp, MNT_WAIT);
	637	async_flag = mp->mnt_flag & MNT_ASYNC;
	638	mp->mnt_flag &=~ MNT_ASYNC;
	639
	640	/*
	641	* If this filesystem isn't aliasing other filesystems,
	642	* try to invalidate any remaining namecache entries and
	643	* check the count afterwords.
	644	*/
	645	if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) {
	646	cache_lock(&mp->mnt_ncmountpt);
	647	cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY\|CINV_CHILDREN);
	648	cache_unlock(&mp->mnt_ncmountpt);
	649
	650	if ((ncp = mp->mnt_ncmountpt.ncp) != NULL &&
	651	(ncp->nc_refs != 1 \|\| TAILQ_FIRST(&ncp->nc_list))) {
	652
	653	if ((flags & MNT_FORCE) == 0) {
	654	error = EBUSY;
	655	mount_warning(mp, "Cannot unmount: "
	656	"%d namecache "
	657	"references still "
	658	"present",
	659	ncp->nc_refs - 1);
	660	} else {
	661	mount_warning(mp, "Forced unmount: "
	662	"%d namecache "
	663	"references still "
	664	"present",
	665	ncp->nc_refs - 1);
	666	freeok = 0;
	667	}
	668	}
	669	}
	670
	671	/*
	672	* nchandle records ref the mount structure. Expect a count of 1
	673	* (our mount->mnt_ncmountpt).
	674	*/
	675	if (mp->mnt_refs != 1) {
	676	if ((flags & MNT_FORCE) == 0) {
	677	mount_warning(mp, "Cannot unmount: "
	678	"%d process references still "
	679	"present", mp->mnt_refs);
	680	error = EBUSY;
	681	} else {
	682	mount_warning(mp, "Forced unmount: "
	683	"%d process references still "
	684	"present", mp->mnt_refs);
	685	freeok = 0;
	686	}
	687	}
	688
	689	/*
	690	* Decomission our special mnt_syncer vnode. This also stops
	691	* the vnlru code. If we are unable to unmount we recommission
	692	* the vnode.
	693	*/
	694	if (error == 0) {
	695	if ((vp = mp->mnt_syncer) != NULL) {
	696	mp->mnt_syncer = NULL;
	697	vrele(vp);
	698	}
	699	if (((mp->mnt_flag & MNT_RDONLY) \|\|
	700	(error = VFS_SYNC(mp, MNT_WAIT)) == 0) \|\|
	701	(flags & MNT_FORCE)) {
	702	error = VFS_UNMOUNT(mp, flags);
	703	}
	704	}
	705	if (error) {
	706	if (mp->mnt_syncer == NULL)
	707	vfs_allocate_syncvnode(mp);
	708	mp->mnt_kern_flag &= ~(MNTK_UNMOUNT \| MNTK_UNMOUNTF);
	709	mp->mnt_flag \|= async_flag;
	710	lockmgr(&mp->mnt_lock, LK_RELEASE);
	711	if (mp->mnt_kern_flag & MNTK_MWAIT)
	712	wakeup(mp);
	713	return (error);
	714	}
	715	/*
	716	* Clean up any journals still associated with the mount after
	717	* filesystem activity has ceased.
	718	*/
	719	journal_remove_all_journals(mp,
	720	((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0));
	721
	722	mountlist_remove(mp);
	723
	724	/*
	725	* Remove any installed vnode ops here so the individual VFSs don't
	726	* have to.
	727	*/
	728	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops);
	729	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops);
	730	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops);
	731	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops);
	732	vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops);
	733
	734	if (mp->mnt_ncmountpt.ncp != NULL) {
	735	nch = mp->mnt_ncmountpt;
	736	cache_zero(&mp->mnt_ncmountpt);
	737	cache_clrmountpt(&nch);
	738	cache_drop(&nch);
	739	}
	740	if (mp->mnt_ncmounton.ncp != NULL) {
	741	nch = mp->mnt_ncmounton;
	742	cache_zero(&mp->mnt_ncmounton);
	743	cache_clrmountpt(&nch);
	744	cache_drop(&nch);
	745	}
	746
	747	mp->mnt_vfc->vfc_refcount--;
	748	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
	749	panic("unmount: dangling vnode");
	750	lockmgr(&mp->mnt_lock, LK_RELEASE);
	751	if (mp->mnt_kern_flag & MNTK_MWAIT)
	752	wakeup(mp);
	753	if (freeok)
	754	kfree(mp, M_MOUNT);
	755	return (0);
	756	}
	757
	758	static
	759	void
	760	mount_warning(struct mount mp, const char ctl, ...)
	761	{
	762	char *ptr;
	763	char *buf;
	764	__va_list va;
	765
	766	__va_start(va, ctl);
	767	if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) {
	768	kprintf("unmount(%s): ", ptr);
	769	kvprintf(ctl, va);
	770	kprintf("\n");
	771	kfree(buf, M_TEMP);
	772	} else {
	773	kprintf("unmount(%p", mp);
	774	if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name)
	775	kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name);
	776	kprintf("): ");
	777	kvprintf(ctl, va);
	778	kprintf("\n");
	779	}
	780	__va_end(va);
	781	}
	782
	783	/*
	784	* Shim cache_fullpath() to handle the case where a process is chrooted into
	785	* a subdirectory of a mount. In this case if the root mount matches the
	786	* process root directory's mount we have to specify the process's root
	787	* directory instead of the mount point, because the mount point might
	788	* be above the root directory.
	789	*/
	790	static
	791	int
	792	mount_path(struct proc p, struct mount mp, char rb, char fb)
	793	{
	794	struct nchandle *nch;
	795
	796	if (p && p->p_fd->fd_nrdir.mount == mp)
	797	nch = &p->p_fd->fd_nrdir;
	798	else
	799	nch = &mp->mnt_ncmountpt;
	800	return(cache_fullpath(p, nch, rb, fb));
	801	}
	802
	803	/*
	804	* Sync each mounted filesystem.
	805	*/
	806
	807	#ifdef DEBUG
	808	static int syncprt = 0;
	809	SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
	810	#endif /* DEBUG */
	811
	812	static int sync_callback(struct mount mp, void data);
	813
	814	/* ARGSUSED */
	815	int
	816	sys_sync(struct sync_args *uap)
	817	{
	818	mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD);
	819	#ifdef DEBUG
	820	/*
	821	* print out buffer pool stat information on each sync() call.
	822	*/
	823	if (syncprt)
	824	vfs_bufstats();
	825	#endif /* DEBUG */
	826	return (0);
	827	}
	828
	829	static
	830	int
	831	sync_callback(struct mount mp, void data __unused)
	832	{
	833	int asyncflag;
	834
	835	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	836	asyncflag = mp->mnt_flag & MNT_ASYNC;
	837	mp->mnt_flag &= ~MNT_ASYNC;
	838	vfs_msync(mp, MNT_NOWAIT);
	839	VFS_SYNC(mp, MNT_NOWAIT);
	840	mp->mnt_flag \|= asyncflag;
	841	}
	842	return(0);
	843	}
	844
	845	/* XXX PRISON: could be per prison flag */
	846	static int prison_quotas;
	847	#if 0
	848	SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
	849	#endif
	850
	851	/*
	852	* quotactl_args(char *path, int fcmd, int uid, caddr_t arg)
	853	*
	854	* Change filesystem quotas.
	855	*/
	856	/* ARGSUSED */
	857	int
	858	sys_quotactl(struct quotactl_args *uap)
	859	{
	860	struct nlookupdata nd;
	861	struct thread *td;
	862	struct proc *p;
	863	struct mount *mp;
	864	int error;
	865
	866	td = curthread;
	867	p = td->td_proc;
	868	if (p->p_ucred->cr_prison && !prison_quotas)
	869	return (EPERM);
	870
	871	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	872	if (error == 0)
	873	error = nlookup(&nd);
	874	if (error == 0) {
	875	mp = nd.nl_nch.mount;
	876	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid,
	877	uap->arg, nd.nl_cred);
	878	}
	879	nlookup_done(&nd);
	880	return (error);
	881	}
	882
	883	/*
	884	* mountctl(char path, int op, int fd, const void ctl, int ctllen,
	885	* void *buf, int buflen)
	886	*
	887	* This function operates on a mount point and executes the specified
	888	* operation using the specified control data, and possibly returns data.
	889	*
	890	* The actual number of bytes stored in the result buffer is returned, 0
	891	* if none, otherwise an error is returned.
	892	*/
	893	/* ARGSUSED */
	894	int
	895	sys_mountctl(struct mountctl_args *uap)
	896	{
	897	struct thread *td = curthread;
	898	struct proc *p = td->td_proc;
	899	struct file *fp;
	900	void *ctl = NULL;
	901	void *buf = NULL;
	902	char *path = NULL;
	903	int error;
	904
	905	/*
	906	* Sanity and permissions checks. We must be root.
	907	*/
	908	KKASSERT(p);
	909	if (p->p_ucred->cr_prison != NULL)
	910	return (EPERM);
	911	if ((uap->op != MOUNTCTL_MOUNTFLAGS) &&
	912	(error = priv_check(td, PRIV_ROOT)) != 0)
	913	return (error);
	914
	915	/*
	916	* Argument length checks
	917	*/
	918	if (uap->ctllen < 0 \|\| uap->ctllen > 1024)
	919	return (EINVAL);
	920	if (uap->buflen < 0 \|\| uap->buflen > 16 * 1024)
	921	return (EINVAL);
	922	if (uap->path == NULL)
	923	return (EINVAL);
	924
	925	/*
	926	* Allocate the necessary buffers and copyin data
	927	*/
	928	path = objcache_get(namei_oc, M_WAITOK);
	929	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	930	if (error)
	931	goto done;
	932
	933	if (uap->ctllen) {
	934	ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	935	error = copyin(uap->ctl, ctl, uap->ctllen);
	936	if (error)
	937	goto done;
	938	}
	939	if (uap->buflen)
	940	buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK\|M_ZERO);
	941
	942	/*
	943	* Validate the descriptor
	944	*/
	945	if (uap->fd >= 0) {
	946	fp = holdfp(p->p_fd, uap->fd, -1);
	947	if (fp == NULL) {
	948	error = EBADF;
	949	goto done;
	950	}
	951	} else {
	952	fp = NULL;
	953	}
	954
	955	/*
	956	* Execute the internal kernel function and clean up.
	957	*/
	958	error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result);
	959	if (fp)
	960	fdrop(fp);
	961	if (error == 0 && uap->sysmsg_result > 0)
	962	error = copyout(buf, uap->buf, uap->sysmsg_result);
	963	done:
	964	if (path)
	965	objcache_put(namei_oc, path);
	966	if (ctl)
	967	kfree(ctl, M_TEMP);
	968	if (buf)
	969	kfree(buf, M_TEMP);
	970	return (error);
	971	}
	972
	973	/*
	974	* Execute a mount control operation by resolving the path to a mount point
	975	* and calling vop_mountctl().
	976	*
	977	* Use the mount point from the nch instead of the vnode so nullfs mounts
	978	* can properly spike the VOP.
	979	*/
	980	int
	981	kern_mountctl(const char path, int op, struct file fp,
	982	const void *ctl, int ctllen,
	983	void buf, int buflen, int res)
	984	{
	985	struct vnode *vp;
	986	struct mount *mp;
	987	struct nlookupdata nd;
	988	int error;
	989
	990	*res = 0;
	991	vp = NULL;
	992	error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW);
	993	if (error == 0)
	994	error = nlookup(&nd);
	995	if (error == 0)
	996	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	997	mp = nd.nl_nch.mount;
	998	nlookup_done(&nd);
	999	if (error)
	1000	return (error);
	1001	vn_unlock(vp);
	1002
	1003	/*
	1004	* Must be the root of the filesystem
	1005	*/
	1006	if ((vp->v_flag & (VROOT\|VPFSROOT)) == 0) {
	1007	vrele(vp);
	1008	return (EINVAL);
	1009	}
	1010	error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen,
	1011	buf, buflen, res);
	1012	vrele(vp);
	1013	return (error);
	1014	}
	1015
	1016	int
	1017	kern_statfs(struct nlookupdata nd, struct statfs buf)
	1018	{
	1019	struct thread *td = curthread;
	1020	struct proc *p = td->td_proc;
	1021	struct mount *mp;
	1022	struct statfs *sp;
	1023	char fullpath, freepath;
	1024	int error;
	1025
	1026	if ((error = nlookup(nd)) != 0)
	1027	return (error);
	1028	mp = nd->nl_nch.mount;
	1029	sp = &mp->mnt_stat;
	1030	if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0)
	1031	return (error);
	1032
	1033	error = mount_path(p, mp, &fullpath, &freepath);
	1034	if (error)
	1035	return(error);
	1036	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1037	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1038	kfree(freepath, M_TEMP);
	1039
	1040	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1041	bcopy(sp, buf, sizeof(*buf));
	1042	/* Only root should have access to the fsid's. */
	1043	if (priv_check(td, PRIV_ROOT))
	1044	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1045	return (0);
	1046	}
	1047
	1048	/*
	1049	* statfs_args(char path, struct statfs buf)
	1050	*
	1051	* Get filesystem statistics.
	1052	*/
	1053	int
	1054	sys_statfs(struct statfs_args *uap)
	1055	{
	1056	struct nlookupdata nd;
	1057	struct statfs buf;
	1058	int error;
	1059
	1060	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1061	if (error == 0)
	1062	error = kern_statfs(&nd, &buf);
	1063	nlookup_done(&nd);
	1064	if (error == 0)
	1065	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1066	return (error);
	1067	}
	1068
	1069	int
	1070	kern_fstatfs(int fd, struct statfs *buf)
	1071	{
	1072	struct thread *td = curthread;
	1073	struct proc *p = td->td_proc;
	1074	struct file *fp;
	1075	struct mount *mp;
	1076	struct statfs *sp;
	1077	char fullpath, freepath;
	1078	int error;
	1079
	1080	KKASSERT(p);
	1081	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1082	return (error);
	1083	mp = ((struct vnode *)fp->f_data)->v_mount;
	1084	if (mp == NULL) {
	1085	error = EBADF;
	1086	goto done;
	1087	}
	1088	if (fp->f_cred == NULL) {
	1089	error = EINVAL;
	1090	goto done;
	1091	}
	1092	sp = &mp->mnt_stat;
	1093	if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0)
	1094	goto done;
	1095
	1096	if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0)
	1097	goto done;
	1098	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1099	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1100	kfree(freepath, M_TEMP);
	1101
	1102	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1103	bcopy(sp, buf, sizeof(*buf));
	1104
	1105	/* Only root should have access to the fsid's. */
	1106	if (priv_check(td, PRIV_ROOT))
	1107	buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
	1108	error = 0;
	1109	done:
	1110	fdrop(fp);
	1111	return (error);
	1112	}
	1113
	1114	/*
	1115	* fstatfs_args(int fd, struct statfs *buf)
	1116	*
	1117	* Get filesystem statistics.
	1118	*/
	1119	int
	1120	sys_fstatfs(struct fstatfs_args *uap)
	1121	{
	1122	struct statfs buf;
	1123	int error;
	1124
	1125	error = kern_fstatfs(uap->fd, &buf);
	1126
	1127	if (error == 0)
	1128	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1129	return (error);
	1130	}
	1131
	1132	int
	1133	kern_statvfs(struct nlookupdata nd, struct statvfs buf)
	1134	{
	1135	struct mount *mp;
	1136	struct statvfs *sp;
	1137	int error;
	1138
	1139	if ((error = nlookup(nd)) != 0)
	1140	return (error);
	1141	mp = nd->nl_nch.mount;
	1142	sp = &mp->mnt_vstat;
	1143	if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0)
	1144	return (error);
	1145
	1146	sp->f_flag = 0;
	1147	if (mp->mnt_flag & MNT_RDONLY)
	1148	sp->f_flag \|= ST_RDONLY;
	1149	if (mp->mnt_flag & MNT_NOSUID)
	1150	sp->f_flag \|= ST_NOSUID;
	1151	bcopy(sp, buf, sizeof(*buf));
	1152	return (0);
	1153	}
	1154
	1155	/*
	1156	* statfs_args(char path, struct statfs buf)
	1157	*
	1158	* Get filesystem statistics.
	1159	*/
	1160	int
	1161	sys_statvfs(struct statvfs_args *uap)
	1162	{
	1163	struct nlookupdata nd;
	1164	struct statvfs buf;
	1165	int error;
	1166
	1167	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1168	if (error == 0)
	1169	error = kern_statvfs(&nd, &buf);
	1170	nlookup_done(&nd);
	1171	if (error == 0)
	1172	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1173	return (error);
	1174	}
	1175
	1176	int
	1177	kern_fstatvfs(int fd, struct statvfs *buf)
	1178	{
	1179	struct thread *td = curthread;
	1180	struct proc *p = td->td_proc;
	1181	struct file *fp;
	1182	struct mount *mp;
	1183	struct statvfs *sp;
	1184	int error;
	1185
	1186	KKASSERT(p);
	1187	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	1188	return (error);
	1189	mp = ((struct vnode *)fp->f_data)->v_mount;
	1190	if (mp == NULL) {
	1191	error = EBADF;
	1192	goto done;
	1193	}
	1194	if (fp->f_cred == NULL) {
	1195	error = EINVAL;
	1196	goto done;
	1197	}
	1198	sp = &mp->mnt_vstat;
	1199	if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0)
	1200	goto done;
	1201
	1202	sp->f_flag = 0;
	1203	if (mp->mnt_flag & MNT_RDONLY)
	1204	sp->f_flag \|= ST_RDONLY;
	1205	if (mp->mnt_flag & MNT_NOSUID)
	1206	sp->f_flag \|= ST_NOSUID;
	1207
	1208	bcopy(sp, buf, sizeof(*buf));
	1209	error = 0;
	1210	done:
	1211	fdrop(fp);
	1212	return (error);
	1213	}
	1214
	1215	/*
	1216	* fstatfs_args(int fd, struct statfs *buf)
	1217	*
	1218	* Get filesystem statistics.
	1219	*/
	1220	int
	1221	sys_fstatvfs(struct fstatvfs_args *uap)
	1222	{
	1223	struct statvfs buf;
	1224	int error;
	1225
	1226	error = kern_fstatvfs(uap->fd, &buf);
	1227
	1228	if (error == 0)
	1229	error = copyout(&buf, uap->buf, sizeof(*uap->buf));
	1230	return (error);
	1231	}
	1232
	1233	/*
	1234	* getfsstat_args(struct statfs *buf, long bufsize, int flags)
	1235	*
	1236	* Get statistics on all filesystems.
	1237	*/
	1238
	1239	struct getfsstat_info {
	1240	struct statfs *sfsp;
	1241	long count;
	1242	long maxcount;
	1243	int error;
	1244	int flags;
	1245	struct proc *p;
	1246	};
	1247
	1248	static int getfsstat_callback(struct mount , void );
	1249
	1250	/* ARGSUSED */
	1251	int
	1252	sys_getfsstat(struct getfsstat_args *uap)
	1253	{
	1254	struct thread *td = curthread;
	1255	struct proc *p = td->td_proc;
	1256	struct getfsstat_info info;
	1257
	1258	bzero(&info, sizeof(info));
	1259
	1260	info.maxcount = uap->bufsize / sizeof(struct statfs);
	1261	info.sfsp = uap->buf;
	1262	info.count = 0;
	1263	info.flags = uap->flags;
	1264	info.p = p;
	1265
	1266	mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD);
	1267	if (info.sfsp && info.count > info.maxcount)
	1268	uap->sysmsg_result = info.maxcount;
	1269	else
	1270	uap->sysmsg_result = info.count;
	1271	return (info.error);
	1272	}
	1273
	1274	static int
	1275	getfsstat_callback(struct mount mp, void data)
	1276	{
	1277	struct getfsstat_info *info = data;
	1278	struct statfs *sp;
	1279	char *freepath;
	1280	char *fullpath;
	1281	int error;
	1282
	1283	if (info->sfsp && info->count < info->maxcount) {
	1284	if (info->p && !chroot_visible_mnt(mp, info->p))
	1285	return(0);
	1286	sp = &mp->mnt_stat;
	1287
	1288	/*
	1289	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1290	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1291	* overrides MNT_WAIT.
	1292	*/
	1293	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1294	(info->flags & MNT_WAIT)) &&
	1295	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1296	return(0);
	1297	}
	1298	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1299
	1300	error = mount_path(info->p, mp, &fullpath, &freepath);
	1301	if (error) {
	1302	info->error = error;
	1303	return(-1);
	1304	}
	1305	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1306	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1307	kfree(freepath, M_TEMP);
	1308
	1309	error = copyout(sp, info->sfsp, sizeof(*sp));
	1310	if (error) {
	1311	info->error = error;
	1312	return (-1);
	1313	}
	1314	++info->sfsp;
	1315	}
	1316	info->count++;
	1317	return(0);
	1318	}
	1319
	1320	/*
	1321	* getvfsstat_args(struct statfs buf, struct statvfs vbuf,
	1322	long bufsize, int flags)
	1323	*
	1324	* Get statistics on all filesystems.
	1325	*/
	1326
	1327	struct getvfsstat_info {
	1328	struct statfs *sfsp;
	1329	struct statvfs *vsfsp;
	1330	long count;
	1331	long maxcount;
	1332	int error;
	1333	int flags;
	1334	struct proc *p;
	1335	};
	1336
	1337	static int getvfsstat_callback(struct mount , void );
	1338
	1339	/* ARGSUSED */
	1340	int
	1341	sys_getvfsstat(struct getvfsstat_args *uap)
	1342	{
	1343	struct thread *td = curthread;
	1344	struct proc *p = td->td_proc;
	1345	struct getvfsstat_info info;
	1346
	1347	bzero(&info, sizeof(info));
	1348
	1349	info.maxcount = uap->vbufsize / sizeof(struct statvfs);
	1350	info.sfsp = uap->buf;
	1351	info.vsfsp = uap->vbuf;
	1352	info.count = 0;
	1353	info.flags = uap->flags;
	1354	info.p = p;
	1355
	1356	mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD);
	1357	if (info.vsfsp && info.count > info.maxcount)
	1358	uap->sysmsg_result = info.maxcount;
	1359	else
	1360	uap->sysmsg_result = info.count;
	1361	return (info.error);
	1362	}
	1363
	1364	static int
	1365	getvfsstat_callback(struct mount mp, void data)
	1366	{
	1367	struct getvfsstat_info *info = data;
	1368	struct statfs *sp;
	1369	struct statvfs *vsp;
	1370	char *freepath;
	1371	char *fullpath;
	1372	int error;
	1373
	1374	if (info->vsfsp && info->count < info->maxcount) {
	1375	if (info->p && !chroot_visible_mnt(mp, info->p))
	1376	return(0);
	1377	sp = &mp->mnt_stat;
	1378	vsp = &mp->mnt_vstat;
	1379
	1380	/*
	1381	* If MNT_NOWAIT or MNT_LAZY is specified, do not
	1382	* refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
	1383	* overrides MNT_WAIT.
	1384	*/
	1385	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1386	(info->flags & MNT_WAIT)) &&
	1387	(error = VFS_STATFS(mp, sp, info->p->p_ucred))) {
	1388	return(0);
	1389	}
	1390	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	1391
	1392	if (((info->flags & (MNT_LAZY\|MNT_NOWAIT)) == 0 \|\|
	1393	(info->flags & MNT_WAIT)) &&
	1394	(error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) {
	1395	return(0);
	1396	}
	1397	vsp->f_flag = 0;
	1398	if (mp->mnt_flag & MNT_RDONLY)
	1399	vsp->f_flag \|= ST_RDONLY;
	1400	if (mp->mnt_flag & MNT_NOSUID)
	1401	vsp->f_flag \|= ST_NOSUID;
	1402
	1403	error = mount_path(info->p, mp, &fullpath, &freepath);
	1404	if (error) {
	1405	info->error = error;
	1406	return(-1);
	1407	}
	1408	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	1409	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	1410	kfree(freepath, M_TEMP);
	1411
	1412	error = copyout(sp, info->sfsp, sizeof(*sp));
	1413	if (error == 0)
	1414	error = copyout(vsp, info->vsfsp, sizeof(*vsp));
	1415	if (error) {
	1416	info->error = error;
	1417	return (-1);
	1418	}
	1419	++info->sfsp;
	1420	++info->vsfsp;
	1421	}
	1422	info->count++;
	1423	return(0);
	1424	}
	1425
	1426
	1427	/*
	1428	* fchdir_args(int fd)
	1429	*
	1430	* Change current working directory to a given file descriptor.
	1431	*/
	1432	/* ARGSUSED */
	1433	int
	1434	sys_fchdir(struct fchdir_args *uap)
	1435	{
	1436	struct thread *td = curthread;
	1437	struct proc *p = td->td_proc;
	1438	struct filedesc *fdp = p->p_fd;
	1439	struct vnode vp, ovp;
	1440	struct mount *mp;
	1441	struct file *fp;
	1442	struct nchandle nch, onch, tnch;
	1443	int error;
	1444
	1445	if ((error = holdvnode(fdp, uap->fd, &fp)) != 0)
	1446	return (error);
	1447	vp = (struct vnode *)fp->f_data;
	1448	vref(vp);
	1449	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	1450	if (vp->v_type != VDIR \|\| fp->f_nchandle.ncp == NULL)
	1451	error = ENOTDIR;
	1452	else
	1453	error = VOP_ACCESS(vp, VEXEC, p->p_ucred);
	1454	if (error) {
	1455	vput(vp);
	1456	fdrop(fp);
	1457	return (error);
	1458	}
	1459	cache_copy(&fp->f_nchandle, &nch);
	1460
	1461	/*
	1462	* If the ncp has become a mount point, traverse through
	1463	* the mount point.
	1464	*/
	1465
	1466	while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
	1467	(mp = cache_findmount(&nch)) != NULL
	1468	) {
	1469	error = nlookup_mp(mp, &tnch);
	1470	if (error == 0) {
	1471	cache_unlock(&tnch); /* leave ref intact */
	1472	vput(vp);
	1473	vp = tnch.ncp->nc_vp;
	1474	error = vget(vp, LK_SHARED);
	1475	KKASSERT(error == 0);
	1476	cache_drop(&nch);
	1477	nch = tnch;
	1478	}
	1479	}
	1480	if (error == 0) {
	1481	ovp = fdp->fd_cdir;
	1482	onch = fdp->fd_ncdir;
	1483	vn_unlock(vp); /* leave ref intact */
	1484	fdp->fd_cdir = vp;
	1485	fdp->fd_ncdir = nch;
	1486	cache_drop(&onch);
	1487	vrele(ovp);
	1488	} else {
	1489	cache_drop(&nch);
	1490	vput(vp);
	1491	}
	1492	fdrop(fp);
	1493	return (error);
	1494	}
	1495
	1496	int
	1497	kern_chdir(struct nlookupdata *nd)
	1498	{
	1499	struct thread *td = curthread;
	1500	struct proc *p = td->td_proc;
	1501	struct filedesc *fdp = p->p_fd;
	1502	struct vnode vp, ovp;
	1503	struct nchandle onch;
	1504	int error;
	1505
	1506	if ((error = nlookup(nd)) != 0)
	1507	return (error);
	1508	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	1509	return (ENOENT);
	1510	if ((error = vget(vp, LK_SHARED)) != 0)
	1511	return (error);
	1512
	1513	error = checkvp_chdir(vp, td);
	1514	vn_unlock(vp);
	1515	if (error == 0) {
	1516	ovp = fdp->fd_cdir;
	1517	onch = fdp->fd_ncdir;
	1518	cache_unlock(&nd->nl_nch); /* leave reference intact */
	1519	fdp->fd_ncdir = nd->nl_nch;
	1520	fdp->fd_cdir = vp;
	1521	cache_drop(&onch);
	1522	vrele(ovp);
	1523	cache_zero(&nd->nl_nch);
	1524	} else {
	1525	vrele(vp);
	1526	}
	1527	return (error);
	1528	}
	1529
	1530	/*
	1531	* chdir_args(char *path)
	1532	*
	1533	* Change current working directory (``.'').
	1534	*/
	1535	int
	1536	sys_chdir(struct chdir_args *uap)
	1537	{
	1538	struct nlookupdata nd;
	1539	int error;
	1540
	1541	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1542	if (error == 0)
	1543	error = kern_chdir(&nd);
	1544	nlookup_done(&nd);
	1545	return (error);
	1546	}
	1547
	1548	/*
	1549	* Helper function for raised chroot(2) security function: Refuse if
	1550	* any filedescriptors are open directories.
	1551	*/
	1552	static int
	1553	chroot_refuse_vdir_fds(struct filedesc *fdp)
	1554	{
	1555	struct vnode *vp;
	1556	struct file *fp;
	1557	int error;
	1558	int fd;
	1559
	1560	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
	1561	if ((error = holdvnode(fdp, fd, &fp)) != 0)
	1562	continue;
	1563	vp = (struct vnode *)fp->f_data;
	1564	if (vp->v_type != VDIR) {
	1565	fdrop(fp);
	1566	continue;
	1567	}
	1568	fdrop(fp);
	1569	return(EPERM);
	1570	}
	1571	return (0);
	1572	}
	1573
	1574	/*
	1575	* This sysctl determines if we will allow a process to chroot(2) if it
	1576	* has a directory open:
	1577	* 0: disallowed for all processes.
	1578	* 1: allowed for processes that were not already chroot(2)'ed.
	1579	* 2: allowed for all processes.
	1580	*/
	1581
	1582	static int chroot_allow_open_directories = 1;
	1583
	1584	SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
	1585	&chroot_allow_open_directories, 0, "");
	1586
	1587	/*
	1588	* chroot to the specified namecache entry. We obtain the vp from the
	1589	* namecache data. The passed ncp must be locked and referenced and will
	1590	* remain locked and referenced on return.
	1591	*/
	1592	int
	1593	kern_chroot(struct nchandle *nch)
	1594	{
	1595	struct thread *td = curthread;
	1596	struct proc *p = td->td_proc;
	1597	struct filedesc *fdp = p->p_fd;
	1598	struct vnode *vp;
	1599	int error;
	1600
	1601	/*
	1602	* Only privileged user can chroot
	1603	*/
	1604	error = priv_check_cred(p->p_ucred, PRIV_VFS_CHROOT, 0);
	1605	if (error)
	1606	return (error);
	1607
	1608	/*
	1609	* Disallow open directory descriptors (fchdir() breakouts).
	1610	*/
	1611	if (chroot_allow_open_directories == 0 \|\|
	1612	(chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
	1613	if ((error = chroot_refuse_vdir_fds(fdp)) != 0)
	1614	return (error);
	1615	}
	1616	if ((vp = nch->ncp->nc_vp) == NULL)
	1617	return (ENOENT);
	1618
	1619	if ((error = vget(vp, LK_SHARED)) != 0)
	1620	return (error);
	1621
	1622	/*
	1623	* Check the validity of vp as a directory to change to and
	1624	* associate it with rdir/jdir.
	1625	*/
	1626	error = checkvp_chdir(vp, td);
	1627	vn_unlock(vp); /* leave reference intact */
	1628	if (error == 0) {
	1629	vrele(fdp->fd_rdir);
	1630	fdp->fd_rdir = vp; /* reference inherited by fd_rdir */
	1631	cache_drop(&fdp->fd_nrdir);
	1632	cache_copy(nch, &fdp->fd_nrdir);
	1633	if (fdp->fd_jdir == NULL) {
	1634	fdp->fd_jdir = vp;
	1635	vref(fdp->fd_jdir);
	1636	cache_copy(nch, &fdp->fd_njdir);
	1637	}
	1638	} else {
	1639	vrele(vp);
	1640	}
	1641	return (error);
	1642	}
	1643
	1644	/*
	1645	* chroot_args(char *path)
	1646	*
	1647	* Change notion of root (``/'') directory.
	1648	*/
	1649	/* ARGSUSED */
	1650	int
	1651	sys_chroot(struct chroot_args *uap)
	1652	{
	1653	struct thread *td = curthread;
	1654	struct nlookupdata nd;
	1655	int error;
	1656
	1657	KKASSERT(td->td_proc);
	1658	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	1659	if (error) {
	1660	nlookup_done(&nd);
	1661	return(error);
	1662	}
	1663	nd.nl_flags \|= NLC_EXEC;
	1664	error = nlookup(&nd);
	1665	if (error == 0)
	1666	error = kern_chroot(&nd.nl_nch);
	1667	nlookup_done(&nd);
	1668	return(error);
	1669	}
	1670
	1671	/*
	1672	* Common routine for chroot and chdir. Given a locked, referenced vnode,
	1673	* determine whether it is legal to chdir to the vnode. The vnode's state
	1674	* is not changed by this call.
	1675	*/
	1676	int
	1677	checkvp_chdir(struct vnode vp, struct thread td)
	1678	{
	1679	int error;
	1680
	1681	if (vp->v_type != VDIR)
	1682	error = ENOTDIR;
	1683	else
	1684	error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred);
	1685	return (error);
	1686	}
	1687
	1688	int
	1689	kern_open(struct nlookupdata nd, int oflags, int mode, int res)
	1690	{
	1691	struct thread *td = curthread;
	1692	struct proc *p = td->td_proc;
	1693	struct lwp *lp = td->td_lwp;
	1694	struct filedesc *fdp = p->p_fd;
	1695	int cmode, flags;
	1696	struct file *nfp;
	1697	struct file *fp;
	1698	struct vnode *vp;
	1699	int type, indx, error;
	1700	struct flock lf;
	1701
	1702	if ((oflags & O_ACCMODE) == O_ACCMODE)
	1703	return (EINVAL);
	1704	flags = FFLAGS(oflags);
	1705	error = falloc(p, &nfp, NULL);
	1706	if (error)
	1707	return (error);
	1708	fp = nfp;
	1709	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
	1710
	1711	/*
	1712	* XXX p_dupfd is a real mess. It allows a device to return a
	1713	* file descriptor to be duplicated rather then doing the open
	1714	* itself.
	1715	*/
	1716	lp->lwp_dupfd = -1;
	1717
	1718	/*
	1719	* Call vn_open() to do the lookup and assign the vnode to the
	1720	* file pointer. vn_open() does not change the ref count on fp
	1721	* and the vnode, on success, will be inherited by the file pointer
	1722	* and unlocked.
	1723	*/
	1724	nd->nl_flags \|= NLC_LOCKVP;
	1725	error = vn_open(nd, fp, flags, cmode);
	1726	nlookup_done(nd);
	1727	if (error) {
	1728	/*
	1729	* handle special fdopen() case. bleh. dupfdopen() is
	1730	* responsible for dropping the old contents of ofiles[indx]
	1731	* if it succeeds.
	1732	*
	1733	* Note that fsetfd() will add a ref to fp which represents
	1734	* the fd_files[] assignment. We must still drop our
	1735	* reference.
	1736	*/
	1737	if ((error == ENODEV \|\| error == ENXIO) && lp->lwp_dupfd >= 0) {
	1738	if (fdalloc(p, 0, &indx) == 0) {
	1739	error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error);
	1740	if (error == 0) {
	1741	*res = indx;
	1742	fdrop(fp); /* our ref */
	1743	return (0);
	1744	}
	1745	fsetfd(p, NULL, indx);
	1746	}
	1747	}
	1748	fdrop(fp); /* our ref */
	1749	if (error == ERESTART)
	1750	error = EINTR;
	1751	return (error);
	1752	}
	1753
	1754	/*
	1755	* ref the vnode for ourselves so it can't be ripped out from under
	1756	* is. XXX need an ND flag to request that the vnode be returned
	1757	* anyway.
	1758	*
	1759	* Reserve a file descriptor but do not assign it until the open
	1760	* succeeds.
	1761	*/
	1762	vp = (struct vnode *)fp->f_data;
	1763	vref(vp);
	1764	if ((error = fdalloc(p, 0, &indx)) != 0) {
	1765	fdrop(fp);
	1766	vrele(vp);
	1767	return (error);
	1768	}
	1769
	1770	/*
	1771	* If no error occurs the vp will have been assigned to the file
	1772	* pointer.
	1773	*/
	1774	lp->lwp_dupfd = 0;
	1775
	1776	if (flags & (O_EXLOCK \| O_SHLOCK)) {
	1777	lf.l_whence = SEEK_SET;
	1778	lf.l_start = 0;
	1779	lf.l_len = 0;
	1780	if (flags & O_EXLOCK)
	1781	lf.l_type = F_WRLCK;
	1782	else
	1783	lf.l_type = F_RDLCK;
	1784	if (flags & FNONBLOCK)
	1785	type = 0;
	1786	else
	1787	type = F_WAIT;
	1788
	1789	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	1790	/*
	1791	* lock request failed. Clean up the reserved
	1792	* descriptor.
	1793	*/
	1794	vrele(vp);
	1795	fsetfd(p, NULL, indx);
	1796	fdrop(fp);
	1797	return (error);
	1798	}
	1799	fp->f_flag \|= FHASLOCK;
	1800	}
	1801	#if 0
	1802	/*
	1803	* Assert that all regular file vnodes were created with a object.
	1804	*/
	1805	KASSERT(vp->v_type != VREG \|\| vp->v_object != NULL,
	1806	("open: regular file has no backing object after vn_open"));
	1807	#endif
	1808
	1809	vrele(vp);
	1810
	1811	/*
	1812	* release our private reference, leaving the one associated with the
	1813	* descriptor table intact.
	1814	*/
	1815	fsetfd(p, fp, indx);
	1816	fdrop(fp);
	1817	*res = indx;
	1818	return (0);
	1819	}
	1820
	1821	/*
	1822	* open_args(char *path, int flags, int mode)
	1823	*
	1824	* Check permissions, allocate an open file structure,
	1825	* and call the device open routine if any.
	1826	*/
	1827	int
	1828	sys_open(struct open_args *uap)
	1829	{
	1830	struct nlookupdata nd;
	1831	int error;
	1832
	1833	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1834	if (error == 0) {
	1835	error = kern_open(&nd, uap->flags,
	1836	uap->mode, &uap->sysmsg_result);
	1837	}
	1838	nlookup_done(&nd);
	1839	return (error);
	1840	}
	1841
	1842	/*
	1843	* openat_args(int fd, char *path, int flags, int mode)
	1844	*/
	1845	int
	1846	sys_openat(struct openat_args *uap)
	1847	{
	1848	struct nlookupdata nd;
	1849	int error;
	1850	struct file *fp;
	1851
	1852	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
	1853	if (error == 0) {
	1854	error = kern_open(&nd, uap->flags, uap->mode,
	1855	&uap->sysmsg_result);
	1856	}
	1857	nlookup_done_at(&nd, fp);
	1858	return (error);
	1859	}
	1860
	1861	int
	1862	kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor)
	1863	{
	1864	struct thread *td = curthread;
	1865	struct proc *p = td->td_proc;
	1866	struct vnode *vp;
	1867	struct vattr vattr;
	1868	int error;
	1869	int whiteout = 0;
	1870
	1871	KKASSERT(p);
	1872
	1873	VATTR_NULL(&vattr);
	1874	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1875	vattr.va_rmajor = rmajor;
	1876	vattr.va_rminor = rminor;
	1877
	1878	switch (mode & S_IFMT) {
	1879	case S_IFMT: /* used by badsect to flag bad sectors */
	1880	error = priv_check_cred(p->p_ucred, PRIV_VFS_MKNOD_BAD, 0);
	1881	vattr.va_type = VBAD;
	1882	break;
	1883	case S_IFCHR:
	1884	error = priv_check(td, PRIV_VFS_MKNOD_DEV);
	1885	vattr.va_type = VCHR;
	1886	break;
	1887	case S_IFBLK:
	1888	error = priv_check(td, PRIV_VFS_MKNOD_DEV);
	1889	vattr.va_type = VBLK;
	1890	break;
	1891	case S_IFWHT:
	1892	error = priv_check_cred(p->p_ucred, PRIV_VFS_MKNOD_WHT, 0);
	1893	whiteout = 1;
	1894	break;
	1895	case S_IFDIR: /* special directories support for HAMMER */
	1896	error = priv_check_cred(p->p_ucred, PRIV_VFS_MKNOD_DIR, 0);
	1897	vattr.va_type = VDIR;
	1898	break;
	1899	default:
	1900	error = EINVAL;
	1901	break;
	1902	}
	1903
	1904	if (error)
	1905	return (error);
	1906
	1907	bwillinode(1);
	1908	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1909	if ((error = nlookup(nd)) != 0)
	1910	return (error);
	1911	if (nd->nl_nch.ncp->nc_vp)
	1912	return (EEXIST);
	1913	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1914	return (error);
	1915
	1916	if (whiteout) {
	1917	error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp,
	1918	nd->nl_cred, NAMEI_CREATE);
	1919	} else {
	1920	vp = NULL;
	1921	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp,
	1922	&vp, nd->nl_cred, &vattr);
	1923	if (error == 0)
	1924	vput(vp);
	1925	}
	1926	return (error);
	1927	}
	1928
	1929	/*
	1930	* mknod_args(char *path, int mode, int dev)
	1931	*
	1932	* Create a special file.
	1933	*/
	1934	int
	1935	sys_mknod(struct mknod_args *uap)
	1936	{
	1937	struct nlookupdata nd;
	1938	int error;
	1939
	1940	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1941	if (error == 0) {
	1942	error = kern_mknod(&nd, uap->mode,
	1943	umajor(uap->dev), uminor(uap->dev));
	1944	}
	1945	nlookup_done(&nd);
	1946	return (error);
	1947	}
	1948
	1949	int
	1950	kern_mkfifo(struct nlookupdata *nd, int mode)
	1951	{
	1952	struct thread *td = curthread;
	1953	struct proc *p = td->td_proc;
	1954	struct vattr vattr;
	1955	struct vnode *vp;
	1956	int error;
	1957
	1958	bwillinode(1);
	1959
	1960	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	1961	if ((error = nlookup(nd)) != 0)
	1962	return (error);
	1963	if (nd->nl_nch.ncp->nc_vp)
	1964	return (EEXIST);
	1965	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	1966	return (error);
	1967
	1968	VATTR_NULL(&vattr);
	1969	vattr.va_type = VFIFO;
	1970	vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask;
	1971	vp = NULL;
	1972	error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr);
	1973	if (error == 0)
	1974	vput(vp);
	1975	return (error);
	1976	}
	1977
	1978	/*
	1979	* mkfifo_args(char *path, int mode)
	1980	*
	1981	* Create a named pipe.
	1982	*/
	1983	int
	1984	sys_mkfifo(struct mkfifo_args *uap)
	1985	{
	1986	struct nlookupdata nd;
	1987	int error;
	1988
	1989	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	1990	if (error == 0)
	1991	error = kern_mkfifo(&nd, uap->mode);
	1992	nlookup_done(&nd);
	1993	return (error);
	1994	}
	1995
	1996	static int hardlink_check_uid = 0;
	1997	SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
	1998	&hardlink_check_uid, 0,
	1999	"Unprivileged processes cannot create hard links to files owned by other "
	2000	"users");
	2001	static int hardlink_check_gid = 0;
	2002	SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
	2003	&hardlink_check_gid, 0,
	2004	"Unprivileged processes cannot create hard links to files owned by other "
	2005	"groups");
	2006
	2007	static int
	2008	can_hardlink(struct vnode vp, struct thread td, struct ucred *cred)
	2009	{
	2010	struct vattr va;
	2011	int error;
	2012
	2013	/*
	2014	* Shortcut if disabled
	2015	*/
	2016	if (hardlink_check_uid == 0 && hardlink_check_gid == 0)
	2017	return (0);
	2018
	2019	/*
	2020	* Privileged user can always hardlink
	2021	*/
	2022	if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0)
	2023	return (0);
	2024
	2025	/*
	2026	* Otherwise only if the originating file is owned by the
	2027	* same user or group. Note that any group is allowed if
	2028	* the file is owned by the caller.
	2029	*/
	2030	error = VOP_GETATTR(vp, &va);
	2031	if (error != 0)
	2032	return (error);
	2033
	2034	if (hardlink_check_uid) {
	2035	if (cred->cr_uid != va.va_uid)
	2036	return (EPERM);
	2037	}
	2038
	2039	if (hardlink_check_gid) {
	2040	if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred))
	2041	return (EPERM);
	2042	}
	2043
	2044	return (0);
	2045	}
	2046
	2047	int
	2048	kern_link(struct nlookupdata nd, struct nlookupdata linknd)
	2049	{
	2050	struct thread *td = curthread;
	2051	struct vnode *vp;
	2052	int error;
	2053
	2054	/*
	2055	* Lookup the source and obtained a locked vnode.
	2056	*
	2057	* You may only hardlink a file which you have write permission
	2058	* on or which you own.
	2059	*
	2060	* XXX relookup on vget failure / race ?
	2061	*/
	2062	bwillinode(1);
	2063	nd->nl_flags \|= NLC_WRITE \| NLC_OWN \| NLC_HLINK;
	2064	if ((error = nlookup(nd)) != 0)
	2065	return (error);
	2066	vp = nd->nl_nch.ncp->nc_vp;
	2067	KKASSERT(vp != NULL);
	2068	if (vp->v_type == VDIR)
	2069	return (EPERM); /* POSIX */
	2070	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2071	return (error);
	2072	if ((error = vget(vp, LK_EXCLUSIVE)) != 0)
	2073	return (error);
	2074
	2075	/*
	2076	* Unlock the source so we can lookup the target without deadlocking
	2077	* (XXX vp is locked already, possible other deadlock?). The target
	2078	* must not exist.
	2079	*/
	2080	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);
	2081	nd->nl_flags &= ~NLC_NCPISLOCKED;
	2082	cache_unlock(&nd->nl_nch);
	2083
	2084	linknd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2085	if ((error = nlookup(linknd)) != 0) {
	2086	vput(vp);
	2087	return (error);
	2088	}
	2089	if (linknd->nl_nch.ncp->nc_vp) {
	2090	vput(vp);
	2091	return (EEXIST);
	2092	}
	2093
	2094	/*
	2095	* Finally run the new API VOP.
	2096	*/
	2097	error = can_hardlink(vp, td, td->td_proc->p_ucred);
	2098	if (error == 0) {
	2099	error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp,
	2100	vp, linknd->nl_cred);
	2101	}
	2102	vput(vp);
	2103	return (error);
	2104	}
	2105
	2106	/*
	2107	* link_args(char path, char link)
	2108	*
	2109	* Make a hard file link.
	2110	*/
	2111	int
	2112	sys_link(struct link_args *uap)
	2113	{
	2114	struct nlookupdata nd, linknd;
	2115	int error;
	2116
	2117	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2118	if (error == 0) {
	2119	error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0);
	2120	if (error == 0)
	2121	error = kern_link(&nd, &linknd);
	2122	nlookup_done(&linknd);
	2123	}
	2124	nlookup_done(&nd);
	2125	return (error);
	2126	}
	2127
	2128	int
	2129	kern_symlink(struct nlookupdata nd, char path, int mode)
	2130	{
	2131	struct vattr vattr;
	2132	struct vnode *vp;
	2133	struct vnode *dvp;
	2134	int error;
	2135
	2136	bwillinode(1);
	2137	nd->nl_flags \|= NLC_CREATE \| NLC_REFDVP;
	2138	if ((error = nlookup(nd)) != 0)
	2139	return (error);
	2140	if (nd->nl_nch.ncp->nc_vp)
	2141	return (EEXIST);
	2142	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2143	return (error);
	2144	dvp = nd->nl_dvp;
	2145	VATTR_NULL(&vattr);
	2146	vattr.va_mode = mode;
	2147	error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path);
	2148	if (error == 0)
	2149	vput(vp);
	2150	return (error);
	2151	}
	2152
	2153	/*
	2154	* symlink(char path, char link)
	2155	*
	2156	* Make a symbolic link.
	2157	*/
	2158	int
	2159	sys_symlink(struct symlink_args *uap)
	2160	{
	2161	struct thread *td = curthread;
	2162	struct nlookupdata nd;
	2163	char *path;
	2164	int error;
	2165	int mode;
	2166
	2167	path = objcache_get(namei_oc, M_WAITOK);
	2168	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	2169	if (error == 0) {
	2170	error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0);
	2171	if (error == 0) {
	2172	mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask;
	2173	error = kern_symlink(&nd, path, mode);
	2174	}
	2175	nlookup_done(&nd);
	2176	}
	2177	objcache_put(namei_oc, path);
	2178	return (error);
	2179	}
	2180
	2181	/*
	2182	* undelete_args(char *path)
	2183	*
	2184	* Delete a whiteout from the filesystem.
	2185	*/
	2186	/* ARGSUSED */
	2187	int
	2188	sys_undelete(struct undelete_args *uap)
	2189	{
	2190	struct nlookupdata nd;
	2191	int error;
	2192
	2193	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2194	bwillinode(1);
	2195	nd.nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2196	if (error == 0)
	2197	error = nlookup(&nd);
	2198	if (error == 0)
	2199	error = ncp_writechk(&nd.nl_nch);
	2200	if (error == 0) {
	2201	error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred,
	2202	NAMEI_DELETE);
	2203	}
	2204	nlookup_done(&nd);
	2205	return (error);
	2206	}
	2207
	2208	int
	2209	kern_unlink(struct nlookupdata *nd)
	2210	{
	2211	int error;
	2212
	2213	bwillinode(1);
	2214	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	2215	if ((error = nlookup(nd)) != 0)
	2216	return (error);
	2217	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	2218	return (error);
	2219	error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	2220	return (error);
	2221	}
	2222
	2223	/*
	2224	* unlink_args(char *path)
	2225	*
	2226	* Delete a name from the filesystem.
	2227	*/
	2228	int
	2229	sys_unlink(struct unlink_args *uap)
	2230	{
	2231	struct nlookupdata nd;
	2232	int error;
	2233
	2234	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2235	if (error == 0)
	2236	error = kern_unlink(&nd);
	2237	nlookup_done(&nd);
	2238	return (error);
	2239	}
	2240
	2241
	2242	/*
	2243	* unlinkat_args(int fd, char *path, int flags)
	2244	*
	2245	* Delete the file or directory entry pointed to by fd/path.
	2246	*/
	2247	int
	2248	sys_unlinkat(struct unlinkat_args *uap)
	2249	{
	2250	struct nlookupdata nd;
	2251	struct file *fp;
	2252	int error;
	2253
	2254	if (uap->flags & ~AT_REMOVEDIR)
	2255	return (EINVAL);
	2256
	2257	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0);
	2258	if (error == 0) {
	2259	if (uap->flags & AT_REMOVEDIR)
	2260	error = kern_rmdir(&nd);
	2261	else
	2262	error = kern_unlink(&nd);
	2263	}
	2264	nlookup_done_at(&nd, fp);
	2265	return (error);
	2266	}
	2267
	2268	int
	2269	kern_lseek(int fd, off_t offset, int whence, off_t *res)
	2270	{
	2271	struct thread *td = curthread;
	2272	struct proc *p = td->td_proc;
	2273	struct file *fp;
	2274	struct vnode *vp;
	2275	struct vattr vattr;
	2276	off_t new_offset;
	2277	int error;
	2278
	2279	fp = holdfp(p->p_fd, fd, -1);
	2280	if (fp == NULL)
	2281	return (EBADF);
	2282	if (fp->f_type != DTYPE_VNODE) {
	2283	error = ESPIPE;
	2284	goto done;
	2285	}
	2286	vp = (struct vnode *)fp->f_data;
	2287
	2288	switch (whence) {
	2289	case L_INCR:
	2290	new_offset = fp->f_offset + offset;
	2291	error = 0;
	2292	break;
	2293	case L_XTND:
	2294	error = VOP_GETATTR(vp, &vattr);
	2295	new_offset = offset + vattr.va_size;
	2296	break;
	2297	case L_SET:
	2298	new_offset = offset;
	2299	error = 0;
	2300	break;
	2301	default:
	2302	new_offset = 0;
	2303	error = EINVAL;
	2304	break;
	2305	}
	2306
	2307	/*
	2308	* Validate the seek position. Negative offsets are not allowed
	2309	* for regular files or directories.
	2310	*
	2311	* Normally we would also not want to allow negative offsets for
	2312	* character and block-special devices. However kvm addresses
	2313	* on 64 bit architectures might appear to be negative and must
	2314	* be allowed.
	2315	*/
	2316	if (error == 0) {
	2317	if (new_offset < 0 &&
	2318	(vp->v_type == VREG \|\| vp->v_type == VDIR)) {
	2319	error = EINVAL;
	2320	} else {
	2321	fp->f_offset = new_offset;
	2322	}
	2323	}
	2324	*res = fp->f_offset;
	2325	done:
	2326	fdrop(fp);
	2327	return (error);
	2328	}
	2329
	2330	/*
	2331	* lseek_args(int fd, int pad, off_t offset, int whence)
	2332	*
	2333	* Reposition read/write file offset.
	2334	*/
	2335	int
	2336	sys_lseek(struct lseek_args *uap)
	2337	{
	2338	int error;
	2339
	2340	error = kern_lseek(uap->fd, uap->offset, uap->whence,
	2341	&uap->sysmsg_offset);
	2342
	2343	return (error);
	2344	}
	2345
	2346	/*
	2347	* Check if current process can access given file. amode is a bitmask of *_OK
	2348	* access bits. flags is a bitmask of AT_* flags.
	2349	*/
	2350	int
	2351	kern_access(struct nlookupdata *nd, int amode, int flags)
	2352	{
	2353	struct vnode *vp;
	2354	int error, mode;
	2355
	2356	if (flags & ~AT_EACCESS)
	2357	return (EINVAL);
	2358	if ((error = nlookup(nd)) != 0)
	2359	return (error);
	2360	retry:
	2361	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2362	if (error)
	2363	return (error);
	2364
	2365	/* Flags == 0 means only check for existence. */
	2366	if (amode) {
	2367	mode = 0;
	2368	if (amode & R_OK)
	2369	mode \|= VREAD;
	2370	if (amode & W_OK)
	2371	mode \|= VWRITE;
	2372	if (amode & X_OK)
	2373	mode \|= VEXEC;
	2374	if ((mode & VWRITE) == 0 \|\|
	2375	(error = vn_writechk(vp, &nd->nl_nch)) == 0)
	2376	error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred);
	2377
	2378	/*
	2379	* If the file handle is stale we have to re-resolve the
	2380	* entry. This is a hack at the moment.
	2381	*/
	2382	if (error == ESTALE) {
	2383	vput(vp);
	2384	cache_setunresolved(&nd->nl_nch);
	2385	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2386	if (error == 0) {
	2387	vp = NULL;
	2388	goto retry;
	2389	}
	2390	return(error);
	2391	}
	2392	}
	2393	vput(vp);
	2394	return (error);
	2395	}
	2396
	2397	/*
	2398	* access_args(char *path, int flags)
	2399	*
	2400	* Check access permissions.
	2401	*/
	2402	int
	2403	sys_access(struct access_args *uap)
	2404	{
	2405	struct nlookupdata nd;
	2406	int error;
	2407
	2408	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2409	if (error == 0)
	2410	error = kern_access(&nd, uap->flags, 0);
	2411	nlookup_done(&nd);
	2412	return (error);
	2413	}
	2414
	2415
	2416	/*
	2417	* faccessat_args(int fd, char *path, int amode, int flags)
	2418	*
	2419	* Check access permissions.
	2420	*/
	2421	int
	2422	sys_faccessat(struct faccessat_args *uap)
	2423	{
	2424	struct nlookupdata nd;
	2425	struct file *fp;
	2426	int error;
	2427
	2428	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE,
	2429	NLC_FOLLOW);
	2430	if (error == 0)
	2431	error = kern_access(&nd, uap->amode, uap->flags);
	2432	nlookup_done_at(&nd, fp);
	2433	return (error);
	2434	}
	2435
	2436
	2437	int
	2438	kern_stat(struct nlookupdata nd, struct stat st)
	2439	{
	2440	int error;
	2441	struct vnode *vp;
	2442	thread_t td;
	2443
	2444	if ((error = nlookup(nd)) != 0)
	2445	return (error);
	2446	again:
	2447	if ((vp = nd->nl_nch.ncp->nc_vp) == NULL)
	2448	return (ENOENT);
	2449
	2450	td = curthread;
	2451	if ((error = vget(vp, LK_SHARED)) != 0)
	2452	return (error);
	2453	error = vn_stat(vp, st, nd->nl_cred);
	2454
	2455	/*
	2456	* If the file handle is stale we have to re-resolve the entry. This
	2457	* is a hack at the moment.
	2458	*/
	2459	if (error == ESTALE) {
	2460	vput(vp);
	2461	cache_setunresolved(&nd->nl_nch);
	2462	error = cache_resolve(&nd->nl_nch, nd->nl_cred);
	2463	if (error == 0)
	2464	goto again;
	2465	} else {
	2466	vput(vp);
	2467	}
	2468	return (error);
	2469	}
	2470
	2471	/*
	2472	* stat_args(char path, struct stat ub)
	2473	*
	2474	* Get file status; this version follows links.
	2475	*/
	2476	int
	2477	sys_stat(struct stat_args *uap)
	2478	{
	2479	struct nlookupdata nd;
	2480	struct stat st;
	2481	int error;
	2482
	2483	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2484	if (error == 0) {
	2485	error = kern_stat(&nd, &st);
	2486	if (error == 0)
	2487	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2488	}
	2489	nlookup_done(&nd);
	2490	return (error);
	2491	}
	2492
	2493	/*
	2494	* lstat_args(char path, struct stat ub)
	2495	*
	2496	* Get file status; this version does not follow links.
	2497	*/
	2498	int
	2499	sys_lstat(struct lstat_args *uap)
	2500	{
	2501	struct nlookupdata nd;
	2502	struct stat st;
	2503	int error;
	2504
	2505	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2506	if (error == 0) {
	2507	error = kern_stat(&nd, &st);
	2508	if (error == 0)
	2509	error = copyout(&st, uap->ub, sizeof(*uap->ub));
	2510	}
	2511	nlookup_done(&nd);
	2512	return (error);
	2513	}
	2514
	2515	/*
	2516	* fstatat_args(int fd, char path, struct stat sb, int flags)
	2517	*
	2518	* Get status of file pointed to by fd/path.
	2519	*/
	2520	int
	2521	sys_fstatat(struct fstatat_args *uap)
	2522	{
	2523	struct nlookupdata nd;
	2524	struct stat st;
	2525	int error;
	2526	int flags;
	2527	struct file *fp;
	2528
	2529	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
	2530	return (EINVAL);
	2531
	2532	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
	2533
	2534	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
	2535	UIO_USERSPACE, flags);
	2536	if (error == 0) {
	2537	error = kern_stat(&nd, &st);
	2538	if (error == 0)
	2539	error = copyout(&st, uap->sb, sizeof(*uap->sb));
	2540	}
	2541	nlookup_done_at(&nd, fp);
	2542	return (error);
	2543	}
	2544
	2545	/*
	2546	* pathconf_Args(char *path, int name)
	2547	*
	2548	* Get configurable pathname variables.
	2549	*/
	2550	/* ARGSUSED */
	2551	int
	2552	sys_pathconf(struct pathconf_args *uap)
	2553	{
	2554	struct nlookupdata nd;
	2555	struct vnode *vp;
	2556	int error;
	2557
	2558	vp = NULL;
	2559	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2560	if (error == 0)
	2561	error = nlookup(&nd);
	2562	if (error == 0)
	2563	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	2564	nlookup_done(&nd);
	2565	if (error == 0) {
	2566	error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg);
	2567	vput(vp);
	2568	}
	2569	return (error);
	2570	}
	2571
	2572	/*
	2573	* XXX: daver
	2574	* kern_readlink isn't properly split yet. There is a copyin burried
	2575	* in VOP_READLINK().
	2576	*/
	2577	int
	2578	kern_readlink(struct nlookupdata nd, char buf, int count, int *res)
	2579	{
	2580	struct thread *td = curthread;
	2581	struct proc *p = td->td_proc;
	2582	struct vnode *vp;
	2583	struct iovec aiov;
	2584	struct uio auio;
	2585	int error;
	2586
	2587	if ((error = nlookup(nd)) != 0)
	2588	return (error);
	2589	error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp);
	2590	if (error)
	2591	return (error);
	2592	if (vp->v_type != VLNK) {
	2593	error = EINVAL;
	2594	} else {
	2595	aiov.iov_base = buf;
	2596	aiov.iov_len = count;
	2597	auio.uio_iov = &aiov;
	2598	auio.uio_iovcnt = 1;
	2599	auio.uio_offset = 0;
	2600	auio.uio_rw = UIO_READ;
	2601	auio.uio_segflg = UIO_USERSPACE;
	2602	auio.uio_td = td;
	2603	auio.uio_resid = count;
	2604	error = VOP_READLINK(vp, &auio, p->p_ucred);
	2605	}
	2606	vput(vp);
	2607	*res = count - auio.uio_resid;
	2608	return (error);
	2609	}
	2610
	2611	/*
	2612	* readlink_args(char path, char buf, int count)
	2613	*
	2614	* Return target name of a symbolic link.
	2615	*/
	2616	int
	2617	sys_readlink(struct readlink_args *uap)
	2618	{
	2619	struct nlookupdata nd;
	2620	int error;
	2621
	2622	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2623	if (error == 0) {
	2624	error = kern_readlink(&nd, uap->buf, uap->count,
	2625	&uap->sysmsg_result);
	2626	}
	2627	nlookup_done(&nd);
	2628	return (error);
	2629	}
	2630
	2631	static int
	2632	setfflags(struct vnode *vp, int flags)
	2633	{
	2634	struct thread *td = curthread;
	2635	struct proc *p = td->td_proc;
	2636	int error;
	2637	struct vattr vattr;
	2638
	2639	/*
	2640	* Prevent non-root users from setting flags on devices. When
	2641	* a device is reused, users can retain ownership of the device
	2642	* if they are allowed to set flags and programs assume that
	2643	* chown can't fail when done as root.
	2644	*/
	2645	if ((vp->v_type == VCHR \|\| vp->v_type == VBLK) &&
	2646	((error = priv_check_cred(p->p_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0))
	2647	return (error);
	2648
	2649	/*
	2650	* note: vget is required for any operation that might mod the vnode
	2651	* so VINACTIVE is properly cleared.
	2652	*/
	2653	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2654	VATTR_NULL(&vattr);
	2655	vattr.va_flags = flags;
	2656	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2657	vput(vp);
	2658	}
	2659	return (error);
	2660	}
	2661
	2662	/*
	2663	* chflags(char *path, int flags)
	2664	*
	2665	* Change flags of a file given a path name.
	2666	*/
	2667	/* ARGSUSED */
	2668	int
	2669	sys_chflags(struct chflags_args *uap)
	2670	{
	2671	struct nlookupdata nd;
	2672	struct vnode *vp;
	2673	int error;
	2674
	2675	vp = NULL;
	2676	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2677	if (error == 0)
	2678	error = nlookup(&nd);
	2679	if (error == 0)
	2680	error = ncp_writechk(&nd.nl_nch);
	2681	if (error == 0)
	2682	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2683	nlookup_done(&nd);
	2684	if (error == 0) {
	2685	error = setfflags(vp, uap->flags);
	2686	vrele(vp);
	2687	}
	2688	return (error);
	2689	}
	2690
	2691	/*
	2692	* lchflags(char *path, int flags)
	2693	*
	2694	* Change flags of a file given a path name, but don't follow symlinks.
	2695	*/
	2696	/* ARGSUSED */
	2697	int
	2698	sys_lchflags(struct lchflags_args *uap)
	2699	{
	2700	struct nlookupdata nd;
	2701	struct vnode *vp;
	2702	int error;
	2703
	2704	vp = NULL;
	2705	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2706	if (error == 0)
	2707	error = nlookup(&nd);
	2708	if (error == 0)
	2709	error = ncp_writechk(&nd.nl_nch);
	2710	if (error == 0)
	2711	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	2712	nlookup_done(&nd);
	2713	if (error == 0) {
	2714	error = setfflags(vp, uap->flags);
	2715	vrele(vp);
	2716	}
	2717	return (error);
	2718	}
	2719
	2720	/*
	2721	* fchflags_args(int fd, int flags)
	2722	*
	2723	* Change flags of a file given a file descriptor.
	2724	*/
	2725	/* ARGSUSED */
	2726	int
	2727	sys_fchflags(struct fchflags_args *uap)
	2728	{
	2729	struct thread *td = curthread;
	2730	struct proc *p = td->td_proc;
	2731	struct file *fp;
	2732	int error;
	2733
	2734	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2735	return (error);
	2736	if (fp->f_nchandle.ncp)
	2737	error = ncp_writechk(&fp->f_nchandle);
	2738	if (error == 0)
	2739	error = setfflags((struct vnode *) fp->f_data, uap->flags);
	2740	fdrop(fp);
	2741	return (error);
	2742	}
	2743
	2744	static int
	2745	setfmode(struct vnode *vp, int mode)
	2746	{
	2747	struct thread *td = curthread;
	2748	struct proc *p = td->td_proc;
	2749	int error;
	2750	struct vattr vattr;
	2751
	2752	/*
	2753	* note: vget is required for any operation that might mod the vnode
	2754	* so VINACTIVE is properly cleared.
	2755	*/
	2756	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2757	VATTR_NULL(&vattr);
	2758	vattr.va_mode = mode & ALLPERMS;
	2759	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2760	vput(vp);
	2761	}
	2762	return error;
	2763	}
	2764
	2765	int
	2766	kern_chmod(struct nlookupdata *nd, int mode)
	2767	{
	2768	struct vnode *vp;
	2769	int error;
	2770
	2771	if ((error = nlookup(nd)) != 0)
	2772	return (error);
	2773	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2774	return (error);
	2775	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2776	error = setfmode(vp, mode);
	2777	vrele(vp);
	2778	return (error);
	2779	}
	2780
	2781	/*
	2782	* chmod_args(char *path, int mode)
	2783	*
	2784	* Change mode of a file given path name.
	2785	*/
	2786	/* ARGSUSED */
	2787	int
	2788	sys_chmod(struct chmod_args *uap)
	2789	{
	2790	struct nlookupdata nd;
	2791	int error;
	2792
	2793	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2794	if (error == 0)
	2795	error = kern_chmod(&nd, uap->mode);
	2796	nlookup_done(&nd);
	2797	return (error);
	2798	}
	2799
	2800	/*
	2801	* lchmod_args(char *path, int mode)
	2802	*
	2803	* Change mode of a file given path name (don't follow links.)
	2804	*/
	2805	/* ARGSUSED */
	2806	int
	2807	sys_lchmod(struct lchmod_args *uap)
	2808	{
	2809	struct nlookupdata nd;
	2810	int error;
	2811
	2812	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2813	if (error == 0)
	2814	error = kern_chmod(&nd, uap->mode);
	2815	nlookup_done(&nd);
	2816	return (error);
	2817	}
	2818
	2819	/*
	2820	* fchmod_args(int fd, int mode)
	2821	*
	2822	* Change mode of a file given a file descriptor.
	2823	*/
	2824	/* ARGSUSED */
	2825	int
	2826	sys_fchmod(struct fchmod_args *uap)
	2827	{
	2828	struct thread *td = curthread;
	2829	struct proc *p = td->td_proc;
	2830	struct file *fp;
	2831	int error;
	2832
	2833	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2834	return (error);
	2835	if (fp->f_nchandle.ncp)
	2836	error = ncp_writechk(&fp->f_nchandle);
	2837	if (error == 0)
	2838	error = setfmode((struct vnode *)fp->f_data, uap->mode);
	2839	fdrop(fp);
	2840	return (error);
	2841	}
	2842
	2843	/*
	2844	* fchmodat_args(char *path, int mode)
	2845	*
	2846	* Change mode of a file pointed to by fd/path.
	2847	*/
	2848	int
	2849	sys_fchmodat(struct fchmodat_args *uap)
	2850	{
	2851	struct nlookupdata nd;
	2852	struct file *fp;
	2853	int error;
	2854	int flags;
	2855
	2856	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
	2857	return (EINVAL);
	2858	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
	2859
	2860	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
	2861	UIO_USERSPACE, flags);
	2862	if (error == 0)
	2863	error = kern_chmod(&nd, uap->mode);
	2864	nlookup_done_at(&nd, fp);
	2865	return (error);
	2866	}
	2867
	2868	static int
	2869	setfown(struct vnode *vp, uid_t uid, gid_t gid)
	2870	{
	2871	struct thread *td = curthread;
	2872	struct proc *p = td->td_proc;
	2873	int error;
	2874	struct vattr vattr;
	2875
	2876	/*
	2877	* note: vget is required for any operation that might mod the vnode
	2878	* so VINACTIVE is properly cleared.
	2879	*/
	2880	if ((error = vget(vp, LK_EXCLUSIVE)) == 0) {
	2881	VATTR_NULL(&vattr);
	2882	vattr.va_uid = uid;
	2883	vattr.va_gid = gid;
	2884	error = VOP_SETATTR(vp, &vattr, p->p_ucred);
	2885	vput(vp);
	2886	}
	2887	return error;
	2888	}
	2889
	2890	int
	2891	kern_chown(struct nlookupdata *nd, int uid, int gid)
	2892	{
	2893	struct vnode *vp;
	2894	int error;
	2895
	2896	if ((error = nlookup(nd)) != 0)
	2897	return (error);
	2898	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	2899	return (error);
	2900	if ((error = ncp_writechk(&nd->nl_nch)) == 0)
	2901	error = setfown(vp, uid, gid);
	2902	vrele(vp);
	2903	return (error);
	2904	}
	2905
	2906	/*
	2907	* chown(char *path, int uid, int gid)
	2908	*
	2909	* Set ownership given a path name.
	2910	*/
	2911	int
	2912	sys_chown(struct chown_args *uap)
	2913	{
	2914	struct nlookupdata nd;
	2915	int error;
	2916
	2917	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	2918	if (error == 0)
	2919	error = kern_chown(&nd, uap->uid, uap->gid);
	2920	nlookup_done(&nd);
	2921	return (error);
	2922	}
	2923
	2924	/*
	2925	* lchown_args(char *path, int uid, int gid)
	2926	*
	2927	* Set ownership given a path name, do not cross symlinks.
	2928	*/
	2929	int
	2930	sys_lchown(struct lchown_args *uap)
	2931	{
	2932	struct nlookupdata nd;
	2933	int error;
	2934
	2935	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	2936	if (error == 0)
	2937	error = kern_chown(&nd, uap->uid, uap->gid);
	2938	nlookup_done(&nd);
	2939	return (error);
	2940	}
	2941
	2942	/*
	2943	* fchown_args(int fd, int uid, int gid)
	2944	*
	2945	* Set ownership given a file descriptor.
	2946	*/
	2947	/* ARGSUSED */
	2948	int
	2949	sys_fchown(struct fchown_args *uap)
	2950	{
	2951	struct thread *td = curthread;
	2952	struct proc *p = td->td_proc;
	2953	struct file *fp;
	2954	int error;
	2955
	2956	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	2957	return (error);
	2958	if (fp->f_nchandle.ncp)
	2959	error = ncp_writechk(&fp->f_nchandle);
	2960	if (error == 0)
	2961	error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid);
	2962	fdrop(fp);
	2963	return (error);
	2964	}
	2965
	2966	/*
	2967	* fchownat(int fd, char *path, int uid, int gid, int flags)
	2968	*
	2969	* Set ownership of file pointed to by fd/path.
	2970	*/
	2971	int
	2972	sys_fchownat(struct fchownat_args *uap)
	2973	{
	2974	struct nlookupdata nd;
	2975	struct file *fp;
	2976	int error;
	2977	int flags;
	2978
	2979	if (uap->flags & ~AT_SYMLINK_NOFOLLOW)
	2980	return (EINVAL);
	2981	flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW;
	2982
	2983	error = nlookup_init_at(&nd, &fp, uap->fd, uap->path,
	2984	UIO_USERSPACE, flags);
	2985	if (error == 0)
	2986	error = kern_chown(&nd, uap->uid, uap->gid);
	2987	nlookup_done_at(&nd, fp);
	2988	return (error);
	2989	}
	2990
	2991
	2992	static int
	2993	getutimes(const struct timeval tvp, struct timespec tsp)
	2994	{
	2995	struct timeval tv[2];
	2996
	2997	if (tvp == NULL) {
	2998	microtime(&tv[0]);
	2999	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	3000	tsp[1] = tsp[0];
	3001	} else {
	3002	TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
	3003	TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
	3004	}
	3005	return 0;
	3006	}
	3007
	3008	static int
	3009	setutimes(struct vnode vp, struct vattr vattr,
	3010	const struct timespec *ts, int nullflag)
	3011	{
	3012	struct thread *td = curthread;
	3013	struct proc *p = td->td_proc;
	3014	int error;
	3015
	3016	VATTR_NULL(vattr);
	3017	vattr->va_atime = ts[0];
	3018	vattr->va_mtime = ts[1];
	3019	if (nullflag)
	3020	vattr->va_vaflags \|= VA_UTIMES_NULL;
	3021	error = VOP_SETATTR(vp, vattr, p->p_ucred);
	3022
	3023	return error;
	3024	}
	3025
	3026	int
	3027	kern_utimes(struct nlookupdata nd, struct timeval tptr)
	3028	{
	3029	struct timespec ts[2];
	3030	struct vnode *vp;
	3031	struct vattr vattr;
	3032	int error;
	3033
	3034	if ((error = getutimes(tptr, ts)) != 0)
	3035	return (error);
	3036
	3037	/*
	3038	* NOTE: utimes() succeeds for the owner even if the file
	3039	* is not user-writable.
	3040	*/
	3041	nd->nl_flags \|= NLC_OWN \| NLC_WRITE;
	3042
	3043	if ((error = nlookup(nd)) != 0)
	3044	return (error);
	3045	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3046	return (error);
	3047	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	3048	return (error);
	3049
	3050	/*
	3051	* note: vget is required for any operation that might mod the vnode
	3052	* so VINACTIVE is properly cleared.
	3053	*/
	3054	if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
	3055	error = vget(vp, LK_EXCLUSIVE);
	3056	if (error == 0) {
	3057	error = setutimes(vp, &vattr, ts, (tptr == NULL));
	3058	vput(vp);
	3059	}
	3060	}
	3061	vrele(vp);
	3062	return (error);
	3063	}
	3064
	3065	/*
	3066	* utimes_args(char path, struct timeval tptr)
	3067	*
	3068	* Set the access and modification times of a file.
	3069	*/
	3070	int
	3071	sys_utimes(struct utimes_args *uap)
	3072	{
	3073	struct timeval tv[2];
	3074	struct nlookupdata nd;
	3075	int error;
	3076
	3077	if (uap->tptr) {
	3078	error = copyin(uap->tptr, tv, sizeof(tv));
	3079	if (error)
	3080	return (error);
	3081	}
	3082	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3083	if (error == 0)
	3084	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	3085	nlookup_done(&nd);
	3086	return (error);
	3087	}
	3088
	3089	/*
	3090	* lutimes_args(char path, struct timeval tptr)
	3091	*
	3092	* Set the access and modification times of a file.
	3093	*/
	3094	int
	3095	sys_lutimes(struct lutimes_args *uap)
	3096	{
	3097	struct timeval tv[2];
	3098	struct nlookupdata nd;
	3099	int error;
	3100
	3101	if (uap->tptr) {
	3102	error = copyin(uap->tptr, tv, sizeof(tv));
	3103	if (error)
	3104	return (error);
	3105	}
	3106	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3107	if (error == 0)
	3108	error = kern_utimes(&nd, uap->tptr ? tv : NULL);
	3109	nlookup_done(&nd);
	3110	return (error);
	3111	}
	3112
	3113	/*
	3114	* Set utimes on a file descriptor. The creds used to open the
	3115	* file are used to determine whether the operation is allowed
	3116	* or not.
	3117	*/
	3118	int
	3119	kern_futimes(int fd, struct timeval *tptr)
	3120	{
	3121	struct thread *td = curthread;
	3122	struct proc *p = td->td_proc;
	3123	struct timespec ts[2];
	3124	struct file *fp;
	3125	struct vnode *vp;
	3126	struct vattr vattr;
	3127	int error;
	3128
	3129	error = getutimes(tptr, ts);
	3130	if (error)
	3131	return (error);
	3132	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3133	return (error);
	3134	if (fp->f_nchandle.ncp)
	3135	error = ncp_writechk(&fp->f_nchandle);
	3136	if (error == 0) {
	3137	vp = fp->f_data;
	3138	error = vget(vp, LK_EXCLUSIVE);
	3139	if (error == 0) {
	3140	error = VOP_GETATTR(vp, &vattr);
	3141	if (error == 0) {
	3142	error = naccess_va(&vattr, NLC_OWN \| NLC_WRITE,
	3143	fp->f_cred);
	3144	}
	3145	if (error == 0) {
	3146	error = setutimes(vp, &vattr, ts,
	3147	(tptr == NULL));
	3148	}
	3149	vput(vp);
	3150	}
	3151	}
	3152	fdrop(fp);
	3153	return (error);
	3154	}
	3155
	3156	/*
	3157	* futimes_args(int fd, struct timeval *tptr)
	3158	*
	3159	* Set the access and modification times of a file.
	3160	*/
	3161	int
	3162	sys_futimes(struct futimes_args *uap)
	3163	{
	3164	struct timeval tv[2];
	3165	int error;
	3166
	3167	if (uap->tptr) {
	3168	error = copyin(uap->tptr, tv, sizeof(tv));
	3169	if (error)
	3170	return (error);
	3171	}
	3172
	3173	error = kern_futimes(uap->fd, uap->tptr ? tv : NULL);
	3174
	3175	return (error);
	3176	}
	3177
	3178	int
	3179	kern_truncate(struct nlookupdata *nd, off_t length)
	3180	{
	3181	struct vnode *vp;
	3182	struct vattr vattr;
	3183	int error;
	3184
	3185	if (length < 0)
	3186	return(EINVAL);
	3187	nd->nl_flags \|= NLC_WRITE \| NLC_TRUNCATE;
	3188	if ((error = nlookup(nd)) != 0)
	3189	return (error);
	3190	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3191	return (error);
	3192	if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0)
	3193	return (error);
	3194	if ((error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY)) != 0) {
	3195	vrele(vp);
	3196	return (error);
	3197	}
	3198	if (vp->v_type == VDIR) {
	3199	error = EISDIR;
	3200	} else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) {
	3201	VATTR_NULL(&vattr);
	3202	vattr.va_size = length;
	3203	error = VOP_SETATTR(vp, &vattr, nd->nl_cred);
	3204	}
	3205	vput(vp);
	3206	return (error);
	3207	}
	3208
	3209	/*
	3210	* truncate(char *path, int pad, off_t length)
	3211	*
	3212	* Truncate a file given its path name.
	3213	*/
	3214	int
	3215	sys_truncate(struct truncate_args *uap)
	3216	{
	3217	struct nlookupdata nd;
	3218	int error;
	3219
	3220	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3221	if (error == 0)
	3222	error = kern_truncate(&nd, uap->length);
	3223	nlookup_done(&nd);
	3224	return error;
	3225	}
	3226
	3227	int
	3228	kern_ftruncate(int fd, off_t length)
	3229	{
	3230	struct thread *td = curthread;
	3231	struct proc *p = td->td_proc;
	3232	struct vattr vattr;
	3233	struct vnode *vp;
	3234	struct file *fp;
	3235	int error;
	3236
	3237	if (length < 0)
	3238	return(EINVAL);
	3239	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3240	return (error);
	3241	if (fp->f_nchandle.ncp) {
	3242	error = ncp_writechk(&fp->f_nchandle);
	3243	if (error)
	3244	goto done;
	3245	}
	3246	if ((fp->f_flag & FWRITE) == 0) {
	3247	error = EINVAL;
	3248	goto done;
	3249	}
	3250	if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */
	3251	error = EINVAL;
	3252	goto done;
	3253	}
	3254	vp = (struct vnode *)fp->f_data;
	3255	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3256	if (vp->v_type == VDIR) {
	3257	error = EISDIR;
	3258	} else if ((error = vn_writechk(vp, NULL)) == 0) {
	3259	VATTR_NULL(&vattr);
	3260	vattr.va_size = length;
	3261	error = VOP_SETATTR(vp, &vattr, fp->f_cred);
	3262	}
	3263	vn_unlock(vp);
	3264	done:
	3265	fdrop(fp);
	3266	return (error);
	3267	}
	3268
	3269	/*
	3270	* ftruncate_args(int fd, int pad, off_t length)
	3271	*
	3272	* Truncate a file given a file descriptor.
	3273	*/
	3274	int
	3275	sys_ftruncate(struct ftruncate_args *uap)
	3276	{
	3277	int error;
	3278
	3279	error = kern_ftruncate(uap->fd, uap->length);
	3280
	3281	return (error);
	3282	}
	3283
	3284	/*
	3285	* fsync(int fd)
	3286	*
	3287	* Sync an open file.
	3288	*/
	3289	/* ARGSUSED */
	3290	int
	3291	sys_fsync(struct fsync_args *uap)
	3292	{
	3293	struct thread *td = curthread;
	3294	struct proc *p = td->td_proc;
	3295	struct vnode *vp;
	3296	struct file *fp;
	3297	vm_object_t obj;
	3298	int error;
	3299
	3300	if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0)
	3301	return (error);
	3302	vp = (struct vnode *)fp->f_data;
	3303	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3304	if ((obj = vp->v_object) != NULL)
	3305	vm_object_page_clean(obj, 0, 0, 0);
	3306	error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL);
	3307	if (error == 0 && vp->v_mount)
	3308	error = buf_fsync(vp);
	3309	vn_unlock(vp);
	3310	fdrop(fp);
	3311	return (error);
	3312	}
	3313
	3314	int
	3315	kern_rename(struct nlookupdata fromnd, struct nlookupdata tond)
	3316	{
	3317	struct nchandle fnchd;
	3318	struct nchandle tnchd;
	3319	struct namecache *ncp;
	3320	struct vnode *fdvp;
	3321	struct vnode *tdvp;
	3322	struct mount *mp;
	3323	int error;
	3324
	3325	bwillinode(1);
	3326	fromnd->nl_flags \|= NLC_REFDVP \| NLC_RENAME_SRC;
	3327	if ((error = nlookup(fromnd)) != 0)
	3328	return (error);
	3329	if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL)
	3330	return (ENOENT);
	3331	fnchd.mount = fromnd->nl_nch.mount;
	3332	cache_hold(&fnchd);
	3333
	3334	/*
	3335	* unlock the source nch so we can lookup the target nch without
	3336	* deadlocking. The target may or may not exist so we do not check
	3337	* for a target vp like kern_mkdir() and other creation functions do.
	3338	*
	3339	* The source and target directories are ref'd and rechecked after
	3340	* everything is relocked to determine if the source or target file
	3341	* has been renamed.
	3342	*/
	3343	KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED);
	3344	fromnd->nl_flags &= ~NLC_NCPISLOCKED;
	3345	cache_unlock(&fromnd->nl_nch);
	3346
	3347	tond->nl_flags \|= NLC_RENAME_DST \| NLC_REFDVP;
	3348	if ((error = nlookup(tond)) != 0) {
	3349	cache_drop(&fnchd);
	3350	return (error);
	3351	}
	3352	if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) {
	3353	cache_drop(&fnchd);
	3354	return (ENOENT);
	3355	}
	3356	tnchd.mount = tond->nl_nch.mount;
	3357	cache_hold(&tnchd);
	3358
	3359	/*
	3360	* If the source and target are the same there is nothing to do
	3361	*/
	3362	if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) {
	3363	cache_drop(&fnchd);
	3364	cache_drop(&tnchd);
	3365	return (0);
	3366	}
	3367
	3368	/*
	3369	* Mount points cannot be renamed or overwritten
	3370	*/
	3371	if ((fromnd->nl_nch.ncp->nc_flag \| tond->nl_nch.ncp->nc_flag) &
	3372	NCF_ISMOUNTPT
	3373	) {
	3374	cache_drop(&fnchd);
	3375	cache_drop(&tnchd);
	3376	return (EINVAL);
	3377	}
	3378
	3379	/*
	3380	* relock the source ncp. NOTE AFTER RELOCKING: the source ncp
	3381	* may have become invalid while it was unlocked, nc_vp and nc_mount
	3382	* could be NULL.
	3383	*/
	3384	if (cache_lock_nonblock(&fromnd->nl_nch) == 0) {
	3385	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3386	} else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) {
	3387	cache_lock(&fromnd->nl_nch);
	3388	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3389	} else {
	3390	cache_unlock(&tond->nl_nch);
	3391	cache_lock(&fromnd->nl_nch);
	3392	cache_resolve(&fromnd->nl_nch, fromnd->nl_cred);
	3393	cache_lock(&tond->nl_nch);
	3394	cache_resolve(&tond->nl_nch, tond->nl_cred);
	3395	}
	3396	fromnd->nl_flags \|= NLC_NCPISLOCKED;
	3397
	3398	/*
	3399	* make sure the parent directories linkages are the same
	3400	*/
	3401	if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent \|\|
	3402	tnchd.ncp != tond->nl_nch.ncp->nc_parent) {
	3403	cache_drop(&fnchd);
	3404	cache_drop(&tnchd);
	3405	return (ENOENT);
	3406	}
	3407
	3408	/*
	3409	* Both the source and target must be within the same filesystem and
	3410	* in the same filesystem as their parent directories within the
	3411	* namecache topology.
	3412	*
	3413	* NOTE: fromnd's nc_mount or nc_vp could be NULL.
	3414	*/
	3415	mp = fnchd.mount;
	3416	if (mp != tnchd.mount \|\| mp != fromnd->nl_nch.mount \|\|
	3417	mp != tond->nl_nch.mount) {
	3418	cache_drop(&fnchd);
	3419	cache_drop(&tnchd);
	3420	return (EXDEV);
	3421	}
	3422
	3423	/*
	3424	* Make sure the mount point is writable
	3425	*/
	3426	if ((error = ncp_writechk(&tond->nl_nch)) != 0) {
	3427	cache_drop(&fnchd);
	3428	cache_drop(&tnchd);
	3429	return (error);
	3430	}
	3431
	3432	/*
	3433	* If the target exists and either the source or target is a directory,
	3434	* then both must be directories.
	3435	*
	3436	* Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h
	3437	* have become NULL.
	3438	*/
	3439	if (tond->nl_nch.ncp->nc_vp) {
	3440	if (fromnd->nl_nch.ncp->nc_vp == NULL) {
	3441	error = ENOENT;
	3442	} else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3443	if (tond->nl_nch.ncp->nc_vp->v_type != VDIR)
	3444	error = ENOTDIR;
	3445	} else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) {
	3446	error = EISDIR;
	3447	}
	3448	}
	3449
	3450	/*
	3451	* You cannot rename a source into itself or a subdirectory of itself.
	3452	* We check this by travsersing the target directory upwards looking
	3453	* for a match against the source.
	3454	*/
	3455	if (error == 0) {
	3456	for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) {
	3457	if (fromnd->nl_nch.ncp == ncp) {
	3458	error = EINVAL;
	3459	break;
	3460	}
	3461	}
	3462	}
	3463
	3464	cache_drop(&fnchd);
	3465	cache_drop(&tnchd);
	3466
	3467	/*
	3468	* Even though the namespaces are different, they may still represent
	3469	* hardlinks to the same file. The filesystem might have a hard time
	3470	* with this so we issue a NREMOVE of the source instead of a NRENAME
	3471	* when we detect the situation.
	3472	*/
	3473	if (error == 0) {
	3474	fdvp = fromnd->nl_dvp;
	3475	tdvp = tond->nl_dvp;
	3476	if (fdvp == NULL \|\| tdvp == NULL) {
	3477	error = EPERM;
	3478	} else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) {
	3479	error = VOP_NREMOVE(&fromnd->nl_nch, fdvp,
	3480	fromnd->nl_cred);
	3481	} else {
	3482	error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch,
	3483	fdvp, tdvp, tond->nl_cred);
	3484	}
	3485	}
	3486	return (error);
	3487	}
	3488
	3489	/*
	3490	* rename_args(char from, char to)
	3491	*
	3492	* Rename files. Source and destination must either both be directories,
	3493	* or both not be directories. If target is a directory, it must be empty.
	3494	*/
	3495	int
	3496	sys_rename(struct rename_args *uap)
	3497	{
	3498	struct nlookupdata fromnd, tond;
	3499	int error;
	3500
	3501	error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0);
	3502	if (error == 0) {
	3503	error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0);
	3504	if (error == 0)
	3505	error = kern_rename(&fromnd, &tond);
	3506	nlookup_done(&tond);
	3507	}
	3508	nlookup_done(&fromnd);
	3509	return (error);
	3510	}
	3511
	3512	int
	3513	kern_mkdir(struct nlookupdata *nd, int mode)
	3514	{
	3515	struct thread *td = curthread;
	3516	struct proc *p = td->td_proc;
	3517	struct vnode *vp;
	3518	struct vattr vattr;
	3519	int error;
	3520
	3521	bwillinode(1);
	3522	nd->nl_flags \|= NLC_WILLBEDIR \| NLC_CREATE \| NLC_REFDVP;
	3523	if ((error = nlookup(nd)) != 0)
	3524	return (error);
	3525
	3526	if (nd->nl_nch.ncp->nc_vp)
	3527	return (EEXIST);
	3528	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3529	return (error);
	3530	VATTR_NULL(&vattr);
	3531	vattr.va_type = VDIR;
	3532	vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
	3533
	3534	vp = NULL;
	3535	error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr);
	3536	if (error == 0)
	3537	vput(vp);
	3538	return (error);
	3539	}
	3540
	3541	/*
	3542	* mkdir_args(char *path, int mode)
	3543	*
	3544	* Make a directory file.
	3545	*/
	3546	/* ARGSUSED */
	3547	int
	3548	sys_mkdir(struct mkdir_args *uap)
	3549	{
	3550	struct nlookupdata nd;
	3551	int error;
	3552
	3553	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3554	if (error == 0)
	3555	error = kern_mkdir(&nd, uap->mode);
	3556	nlookup_done(&nd);
	3557	return (error);
	3558	}
	3559
	3560	int
	3561	kern_rmdir(struct nlookupdata *nd)
	3562	{
	3563	int error;
	3564
	3565	bwillinode(1);
	3566	nd->nl_flags \|= NLC_DELETE \| NLC_REFDVP;
	3567	if ((error = nlookup(nd)) != 0)
	3568	return (error);
	3569
	3570	/*
	3571	* Do not allow directories representing mount points to be
	3572	* deleted, even if empty. Check write perms on mount point
	3573	* in case the vnode is aliased (aka nullfs).
	3574	*/
	3575	if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT))
	3576	return (EINVAL);
	3577	if ((error = ncp_writechk(&nd->nl_nch)) != 0)
	3578	return (error);
	3579	error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred);
	3580	return (error);
	3581	}
	3582
	3583	/*
	3584	* rmdir_args(char *path)
	3585	*
	3586	* Remove a directory file.
	3587	*/
	3588	/* ARGSUSED */
	3589	int
	3590	sys_rmdir(struct rmdir_args *uap)
	3591	{
	3592	struct nlookupdata nd;
	3593	int error;
	3594
	3595	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0);
	3596	if (error == 0)
	3597	error = kern_rmdir(&nd);
	3598	nlookup_done(&nd);
	3599	return (error);
	3600	}
	3601
	3602	int
	3603	kern_getdirentries(int fd, char buf, u_int count, long basep, int *res,
	3604	enum uio_seg direction)
	3605	{
	3606	struct thread *td = curthread;
	3607	struct proc *p = td->td_proc;
	3608	struct vnode *vp;
	3609	struct file *fp;
	3610	struct uio auio;
	3611	struct iovec aiov;
	3612	off_t loff;
	3613	int error, eofflag;
	3614
	3615	if ((error = holdvnode(p->p_fd, fd, &fp)) != 0)
	3616	return (error);
	3617	if ((fp->f_flag & FREAD) == 0) {
	3618	error = EBADF;
	3619	goto done;
	3620	}
	3621	vp = (struct vnode *)fp->f_data;
	3622	unionread:
	3623	if (vp->v_type != VDIR) {
	3624	error = EINVAL;
	3625	goto done;
	3626	}
	3627	aiov.iov_base = buf;
	3628	aiov.iov_len = count;
	3629	auio.uio_iov = &aiov;
	3630	auio.uio_iovcnt = 1;
	3631	auio.uio_rw = UIO_READ;
	3632	auio.uio_segflg = direction;
	3633	auio.uio_td = td;
	3634	auio.uio_resid = count;
	3635	loff = auio.uio_offset = fp->f_offset;
	3636	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
	3637	fp->f_offset = auio.uio_offset;
	3638	if (error)
	3639	goto done;
	3640	if (count == auio.uio_resid) {
	3641	if (union_dircheckp) {
	3642	error = union_dircheckp(td, &vp, fp);
	3643	if (error == -1)
	3644	goto unionread;
	3645	if (error)
	3646	goto done;
	3647	}
	3648	#if 0
	3649	if ((vp->v_flag & VROOT) &&
	3650	(vp->v_mount->mnt_flag & MNT_UNION)) {
	3651	struct vnode *tvp = vp;
	3652	vp = vp->v_mount->mnt_vnodecovered;
	3653	vref(vp);
	3654	fp->f_data = vp;
	3655	fp->f_offset = 0;
	3656	vrele(tvp);
	3657	goto unionread;
	3658	}
	3659	#endif
	3660	}
	3661
	3662	/*
	3663	* WARNING! *basep may not be wide enough to accomodate the
	3664	* seek offset. XXX should we hack this to return the upper 32 bits
	3665	* for offsets greater then 4G?
	3666	*/
	3667	if (basep) {
	3668	*basep = (long)loff;
	3669	}
	3670	*res = count - auio.uio_resid;
	3671	done:
	3672	fdrop(fp);
	3673	return (error);
	3674	}
	3675
	3676	/*
	3677	* getdirentries_args(int fd, char buf, u_int conut, long basep)
	3678	*
	3679	* Read a block of directory entries in a file system independent format.
	3680	*/
	3681	int
	3682	sys_getdirentries(struct getdirentries_args *uap)
	3683	{
	3684	long base;
	3685	int error;
	3686
	3687	error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base,
	3688	&uap->sysmsg_result, UIO_USERSPACE);
	3689
	3690	if (error == 0 && uap->basep)
	3691	error = copyout(&base, uap->basep, sizeof(*uap->basep));
	3692	return (error);
	3693	}
	3694
	3695	/*
	3696	* getdents_args(int fd, char *buf, size_t count)
	3697	*/
	3698	int
	3699	sys_getdents(struct getdents_args *uap)
	3700	{
	3701	int error;
	3702
	3703	error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL,
	3704	&uap->sysmsg_result, UIO_USERSPACE);
	3705
	3706	return (error);
	3707	}
	3708
	3709	/*
	3710	* umask(int newmask)
	3711	*
	3712	* Set the mode mask for creation of filesystem nodes.
	3713	*
	3714	* MP SAFE
	3715	*/
	3716	int
	3717	sys_umask(struct umask_args *uap)
	3718	{
	3719	struct thread *td = curthread;
	3720	struct proc *p = td->td_proc;
	3721	struct filedesc *fdp;
	3722
	3723	fdp = p->p_fd;
	3724	uap->sysmsg_result = fdp->fd_cmask;
	3725	fdp->fd_cmask = uap->newmask & ALLPERMS;
	3726	return (0);
	3727	}
	3728
	3729	/*
	3730	* revoke(char *path)
	3731	*
	3732	* Void all references to file by ripping underlying filesystem
	3733	* away from vnode.
	3734	*/
	3735	/* ARGSUSED */
	3736	int
	3737	sys_revoke(struct revoke_args *uap)
	3738	{
	3739	struct nlookupdata nd;
	3740	struct vattr vattr;
	3741	struct vnode *vp;
	3742	struct ucred *cred;
	3743	int error;
	3744
	3745	vp = NULL;
	3746	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	3747	if (error == 0)
	3748	error = nlookup(&nd);
	3749	if (error == 0)
	3750	error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp);
	3751	cred = crhold(nd.nl_cred);
	3752	nlookup_done(&nd);
	3753	if (error == 0) {
	3754	if (error == 0)
	3755	error = VOP_GETATTR(vp, &vattr);
	3756	if (error == 0 && cred->cr_uid != vattr.va_uid)
	3757	error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0);
	3758	if (error == 0 && (vp->v_type == VCHR \|\| vp->v_type == VBLK)) {
	3759	if (vcount(vp) > 0)
	3760	error = vrevoke(vp, cred);
	3761	} else if (error == 0) {
	3762	error = vrevoke(vp, cred);
	3763	}
	3764	vrele(vp);
	3765	}
	3766	if (cred)
	3767	crfree(cred);
	3768	return (error);
	3769	}
	3770
	3771	/*
	3772	* getfh_args(char fname, fhandle_t fhp)
	3773	*
	3774	* Get (NFS) file handle
	3775	*
	3776	* NOTE: We use the fsid of the covering mount, even if it is a nullfs
	3777	* mount. This allows nullfs mounts to be explicitly exported.
	3778	*
	3779	* WARNING: nullfs mounts of HAMMER PFS ROOTs are safe.
	3780	*
	3781	* nullfs mounts of subdirectories are not safe. That is, it will
	3782	* work, but you do not really have protection against access to
	3783	* the related parent directories.
	3784	*/
	3785	int
	3786	sys_getfh(struct getfh_args *uap)
	3787	{
	3788	struct thread *td = curthread;
	3789	struct nlookupdata nd;
	3790	fhandle_t fh;
	3791	struct vnode *vp;
	3792	struct mount *mp;
	3793	int error;
	3794
	3795	/*
	3796	* Must be super user
	3797	*/
	3798	if ((error = priv_check(td, PRIV_ROOT)) != 0)
	3799	return (error);
	3800
	3801	vp = NULL;
	3802	error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW);
	3803	if (error == 0)
	3804	error = nlookup(&nd);
	3805	if (error == 0)
	3806	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	3807	mp = nd.nl_nch.mount;
	3808	nlookup_done(&nd);
	3809	if (error == 0) {
	3810	bzero(&fh, sizeof(fh));
	3811	fh.fh_fsid = mp->mnt_stat.f_fsid;
	3812	error = VFS_VPTOFH(vp, &fh.fh_fid);
	3813	vput(vp);
	3814	if (error == 0)
	3815	error = copyout(&fh, uap->fhp, sizeof(fh));
	3816	}
	3817	return (error);
	3818	}
	3819
	3820	/*
	3821	* fhopen_args(const struct fhandle *u_fhp, int flags)
	3822	*
	3823	* syscall for the rpc.lockd to use to translate a NFS file handle into
	3824	* an open descriptor.
	3825	*
	3826	* warning: do not remove the priv_check() call or this becomes one giant
	3827	* security hole.
	3828	*/
	3829	int
	3830	sys_fhopen(struct fhopen_args *uap)
	3831	{
	3832	struct thread *td = curthread;
	3833	struct proc *p = td->td_proc;
	3834	struct mount *mp;
	3835	struct vnode *vp;
	3836	struct fhandle fhp;
	3837	struct vattr vat;
	3838	struct vattr *vap = &vat;
	3839	struct flock lf;
	3840	int fmode, mode, error, type;
	3841	struct file *nfp;
	3842	struct file *fp;
	3843	int indx;
	3844
	3845	/*
	3846	* Must be super user
	3847	*/
	3848	error = priv_check(td, PRIV_ROOT);
	3849	if (error)
	3850	return (error);
	3851
	3852	fmode = FFLAGS(uap->flags);
	3853	/* why not allow a non-read/write open for our lockd? */
	3854	if (((fmode & (FREAD \| FWRITE)) == 0) \|\| (fmode & O_CREAT))
	3855	return (EINVAL);
	3856	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
	3857	if (error)
	3858	return(error);
	3859	/* find the mount point */
	3860	mp = vfs_getvfs(&fhp.fh_fsid);
	3861	if (mp == NULL)
	3862	return (ESTALE);
	3863	/* now give me my vnode, it gets returned to me locked */
	3864	error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp);
	3865	if (error)
	3866	return (error);
	3867	/*
	3868	* from now on we have to make sure not
	3869	* to forget about the vnode
	3870	* any error that causes an abort must vput(vp)
	3871	* just set error = err and 'goto bad;'.
	3872	*/
	3873
	3874	/*
	3875	* from vn_open
	3876	*/
	3877	if (vp->v_type == VLNK) {
	3878	error = EMLINK;
	3879	goto bad;
	3880	}
	3881	if (vp->v_type == VSOCK) {
	3882	error = EOPNOTSUPP;
	3883	goto bad;
	3884	}
	3885	mode = 0;
	3886	if (fmode & (FWRITE \| O_TRUNC)) {
	3887	if (vp->v_type == VDIR) {
	3888	error = EISDIR;
	3889	goto bad;
	3890	}
	3891	error = vn_writechk(vp, NULL);
	3892	if (error)
	3893	goto bad;
	3894	mode \|= VWRITE;
	3895	}
	3896	if (fmode & FREAD)
	3897	mode \|= VREAD;
	3898	if (mode) {
	3899	error = VOP_ACCESS(vp, mode, p->p_ucred);
	3900	if (error)
	3901	goto bad;
	3902	}
	3903	if (fmode & O_TRUNC) {
	3904	vn_unlock(vp); /* XXX */
	3905	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY); /* XXX */
	3906	VATTR_NULL(vap);
	3907	vap->va_size = 0;
	3908	error = VOP_SETATTR(vp, vap, p->p_ucred);
	3909	if (error)
	3910	goto bad;
	3911	}
	3912
	3913	/*
	3914	* VOP_OPEN needs the file pointer so it can potentially override
	3915	* it.
	3916	*
	3917	* WARNING! no f_nchandle will be associated when fhopen()ing a
	3918	* directory. XXX
	3919	*/
	3920	if ((error = falloc(p, &nfp, &indx)) != 0)
	3921	goto bad;
	3922	fp = nfp;
	3923
	3924	error = VOP_OPEN(vp, fmode, p->p_ucred, fp);
	3925	if (error) {
	3926	/*
	3927	* setting f_ops this way prevents VOP_CLOSE from being
	3928	* called or fdrop() releasing the vp from v_data. Since
	3929	* the VOP_OPEN failed we don't want to VOP_CLOSE.
	3930	*/
	3931	fp->f_ops = &badfileops;
	3932	fp->f_data = NULL;
	3933	goto bad_drop;
	3934	}
	3935
	3936	/*
	3937	* The fp is given its own reference, we still have our ref and lock.
	3938	*
	3939	* Assert that all regular files must be created with a VM object.
	3940	*/
	3941	if (vp->v_type == VREG && vp->v_object == NULL) {
	3942	kprintf("fhopen: regular file did not have VM object: %p\n", vp);
	3943	goto bad_drop;
	3944	}
	3945
	3946	/*
	3947	* The open was successful. Handle any locking requirements.
	3948	*/
	3949	if (fmode & (O_EXLOCK \| O_SHLOCK)) {
	3950	lf.l_whence = SEEK_SET;
	3951	lf.l_start = 0;
	3952	lf.l_len = 0;
	3953	if (fmode & O_EXLOCK)
	3954	lf.l_type = F_WRLCK;
	3955	else
	3956	lf.l_type = F_RDLCK;
	3957	if (fmode & FNONBLOCK)
	3958	type = 0;
	3959	else
	3960	type = F_WAIT;
	3961	vn_unlock(vp);
	3962	if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) {
	3963	/*
	3964	* release our private reference.
	3965	*/
	3966	fsetfd(p, NULL, indx);
	3967	fdrop(fp);
	3968	vrele(vp);
	3969	return (error);
	3970	}
	3971	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	3972	fp->f_flag \|= FHASLOCK;
	3973	}
	3974
	3975	/*
	3976	* Clean up. Associate the file pointer with the previously
	3977	* reserved descriptor and return it.
	3978	*/
	3979	vput(vp);
	3980	fsetfd(p, fp, indx);
	3981	fdrop(fp);
	3982	uap->sysmsg_result = indx;
	3983	return (0);
	3984
	3985	bad_drop:
	3986	fsetfd(p, NULL, indx);
	3987	fdrop(fp);
	3988	bad:
	3989	vput(vp);
	3990	return (error);
	3991	}
	3992
	3993	/*
	3994	* fhstat_args(struct fhandle u_fhp, struct stat sb)
	3995	*/
	3996	int
	3997	sys_fhstat(struct fhstat_args *uap)
	3998	{
	3999	struct thread *td = curthread;
	4000	struct stat sb;
	4001	fhandle_t fh;
	4002	struct mount *mp;
	4003	struct vnode *vp;
	4004	int error;
	4005
	4006	/*
	4007	* Must be super user
	4008	*/
	4009	error = priv_check(td, PRIV_ROOT);
	4010	if (error)
	4011	return (error);
	4012
	4013	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
	4014	if (error)
	4015	return (error);
	4016
	4017	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	4018	return (ESTALE);
	4019	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	4020	return (error);
	4021	error = vn_stat(vp, &sb, td->td_proc->p_ucred);
	4022	vput(vp);
	4023	if (error)
	4024	return (error);
	4025	error = copyout(&sb, uap->sb, sizeof(sb));
	4026	return (error);
	4027	}
	4028
	4029	/*
	4030	* fhstatfs_args(struct fhandle u_fhp, struct statfs buf)
	4031	*/
	4032	int
	4033	sys_fhstatfs(struct fhstatfs_args *uap)
	4034	{
	4035	struct thread *td = curthread;
	4036	struct proc *p = td->td_proc;
	4037	struct statfs *sp;
	4038	struct mount *mp;
	4039	struct vnode *vp;
	4040	struct statfs sb;
	4041	char fullpath, freepath;
	4042	fhandle_t fh;
	4043	int error;
	4044
	4045	/*
	4046	* Must be super user
	4047	*/
	4048	if ((error = priv_check(td, PRIV_ROOT)))
	4049	return (error);
	4050
	4051	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	4052	return (error);
	4053
	4054	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	4055	return (ESTALE);
	4056
	4057	if (p != NULL && !chroot_visible_mnt(mp, p))
	4058	return (ESTALE);
	4059
	4060	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	4061	return (error);
	4062	mp = vp->v_mount;
	4063	sp = &mp->mnt_stat;
	4064	vput(vp);
	4065	if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0)
	4066	return (error);
	4067
	4068	error = mount_path(p, mp, &fullpath, &freepath);
	4069	if (error)
	4070	return(error);
	4071	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
	4072	strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname));
	4073	kfree(freepath, M_TEMP);
	4074
	4075	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	4076	if (priv_check(td, PRIV_ROOT)) {
	4077	bcopy(sp, &sb, sizeof(sb));
	4078	sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
	4079	sp = &sb;
	4080	}
	4081	return (copyout(sp, uap->buf, sizeof(*sp)));
	4082	}
	4083
	4084	/*
	4085	* fhstatvfs_args(struct fhandle u_fhp, struct statvfs buf)
	4086	*/
	4087	int
	4088	sys_fhstatvfs(struct fhstatvfs_args *uap)
	4089	{
	4090	struct thread *td = curthread;
	4091	struct proc *p = td->td_proc;
	4092	struct statvfs *sp;
	4093	struct mount *mp;
	4094	struct vnode *vp;
	4095	fhandle_t fh;
	4096	int error;
	4097
	4098	/*
	4099	* Must be super user
	4100	*/
	4101	if ((error = priv_check(td, PRIV_ROOT)))
	4102	return (error);
	4103
	4104	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
	4105	return (error);
	4106
	4107	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
	4108	return (ESTALE);
	4109
	4110	if (p != NULL && !chroot_visible_mnt(mp, p))
	4111	return (ESTALE);
	4112
	4113	if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)))
	4114	return (error);
	4115	mp = vp->v_mount;
	4116	sp = &mp->mnt_vstat;
	4117	vput(vp);
	4118	if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0)
	4119	return (error);
	4120
	4121	sp->f_flag = 0;
	4122	if (mp->mnt_flag & MNT_RDONLY)
	4123	sp->f_flag \|= ST_RDONLY;
	4124	if (mp->mnt_flag & MNT_NOSUID)
	4125	sp->f_flag \|= ST_NOSUID;
	4126
	4127	return (copyout(sp, uap->buf, sizeof(*sp)));
	4128	}
	4129
	4130
	4131	/*
	4132	* Syscall to push extended attribute configuration information into the
	4133	* VFS. Accepts a path, which it converts to a mountpoint, as well as
	4134	* a command (int cmd), and attribute name and misc data. For now, the
	4135	* attribute name is left in userspace for consumption by the VFS_op.
	4136	* It will probably be changed to be copied into sysspace by the
	4137	* syscall in the future, once issues with various consumers of the
	4138	* attribute code have raised their hands.
	4139	*
	4140	* Currently this is used only by UFS Extended Attributes.
	4141	*/
	4142	int
	4143	sys_extattrctl(struct extattrctl_args *uap)
	4144	{
	4145	struct nlookupdata nd;
	4146	struct mount *mp;
	4147	struct vnode *vp;
	4148	int error;
	4149
	4150	vp = NULL;
	4151	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4152	if (error == 0)
	4153	error = nlookup(&nd);
	4154	if (error == 0) {
	4155	mp = nd.nl_nch.mount;
	4156	error = VFS_EXTATTRCTL(mp, uap->cmd,
	4157	uap->attrname, uap->arg,
	4158	nd.nl_cred);
	4159	}
	4160	nlookup_done(&nd);
	4161	return (error);
	4162	}
	4163
	4164	/*
	4165	* Syscall to set a named extended attribute on a file or directory.
	4166	* Accepts attribute name, and a uio structure pointing to the data to set.
	4167	* The uio is consumed in the style of writev(). The real work happens
	4168	* in VOP_SETEXTATTR().
	4169	*/
	4170	int
	4171	sys_extattr_set_file(struct extattr_set_file_args *uap)
	4172	{
	4173	char attrname[EXTATTR_MAXNAMELEN];
	4174	struct iovec aiov[UIO_SMALLIOV];
	4175	struct iovec *needfree;
	4176	struct nlookupdata nd;
	4177	struct iovec *iov;
	4178	struct vnode *vp;
	4179	struct uio auio;
	4180	u_int iovlen;
	4181	u_int cnt;
	4182	int error;
	4183	int i;
	4184
	4185	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	4186	if (error)
	4187	return (error);
	4188
	4189	vp = NULL;
	4190	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4191	if (error == 0)
	4192	error = nlookup(&nd);
	4193	if (error == 0)
	4194	error = ncp_writechk(&nd.nl_nch);
	4195	if (error == 0)
	4196	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4197	if (error) {
	4198	nlookup_done(&nd);
	4199	return (error);
	4200	}
	4201
	4202	needfree = NULL;
	4203	iovlen = uap->iovcnt * sizeof(struct iovec);
	4204	if (uap->iovcnt > UIO_SMALLIOV) {
	4205	if (uap->iovcnt > UIO_MAXIOV) {
	4206	error = EINVAL;
	4207	goto done;
	4208	}
	4209	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	4210	needfree = iov;
	4211	} else {
	4212	iov = aiov;
	4213	}
	4214	auio.uio_iov = iov;
	4215	auio.uio_iovcnt = uap->iovcnt;
	4216	auio.uio_rw = UIO_WRITE;
	4217	auio.uio_segflg = UIO_USERSPACE;
	4218	auio.uio_td = nd.nl_td;
	4219	auio.uio_offset = 0;
	4220	if ((error = copyin(uap->iovp, iov, iovlen)))
	4221	goto done;
	4222	auio.uio_resid = 0;
	4223	for (i = 0; i < uap->iovcnt; i++) {
	4224	if (iov->iov_len > LONG_MAX - auio.uio_resid) {
	4225	error = EINVAL;
	4226	goto done;
	4227	}
	4228	auio.uio_resid += iov->iov_len;
	4229	iov++;
	4230	}
	4231	cnt = auio.uio_resid;
	4232	error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	4233	cnt -= auio.uio_resid;
	4234	uap->sysmsg_result = cnt;
	4235	done:
	4236	vput(vp);
	4237	nlookup_done(&nd);
	4238	if (needfree)
	4239	FREE(needfree, M_IOV);
	4240	return (error);
	4241	}
	4242
	4243	/*
	4244	* Syscall to get a named extended attribute on a file or directory.
	4245	* Accepts attribute name, and a uio structure pointing to a buffer for the
	4246	* data. The uio is consumed in the style of readv(). The real work
	4247	* happens in VOP_GETEXTATTR();
	4248	*/
	4249	int
	4250	sys_extattr_get_file(struct extattr_get_file_args *uap)
	4251	{
	4252	char attrname[EXTATTR_MAXNAMELEN];
	4253	struct iovec aiov[UIO_SMALLIOV];
	4254	struct iovec *needfree;
	4255	struct nlookupdata nd;
	4256	struct iovec *iov;
	4257	struct vnode *vp;
	4258	struct uio auio;
	4259	u_int iovlen;
	4260	u_int cnt;
	4261	int error;
	4262	int i;
	4263
	4264	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	4265	if (error)
	4266	return (error);
	4267
	4268	vp = NULL;
	4269	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4270	if (error == 0)
	4271	error = nlookup(&nd);
	4272	if (error == 0)
	4273	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4274	if (error) {
	4275	nlookup_done(&nd);
	4276	return (error);
	4277	}
	4278
	4279	iovlen = uap->iovcnt * sizeof (struct iovec);
	4280	needfree = NULL;
	4281	if (uap->iovcnt > UIO_SMALLIOV) {
	4282	if (uap->iovcnt > UIO_MAXIOV) {
	4283	error = EINVAL;
	4284	goto done;
	4285	}
	4286	MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
	4287	needfree = iov;
	4288	} else {
	4289	iov = aiov;
	4290	}
	4291	auio.uio_iov = iov;
	4292	auio.uio_iovcnt = uap->iovcnt;
	4293	auio.uio_rw = UIO_READ;
	4294	auio.uio_segflg = UIO_USERSPACE;
	4295	auio.uio_td = nd.nl_td;
	4296	auio.uio_offset = 0;
	4297	if ((error = copyin(uap->iovp, iov, iovlen)))
	4298	goto done;
	4299	auio.uio_resid = 0;
	4300	for (i = 0; i < uap->iovcnt; i++) {
	4301	if (iov->iov_len > LONG_MAX - auio.uio_resid) {
	4302	error = EINVAL;
	4303	goto done;
	4304	}
	4305	auio.uio_resid += iov->iov_len;
	4306	iov++;
	4307	}
	4308	cnt = auio.uio_resid;
	4309	error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred);
	4310	cnt -= auio.uio_resid;
	4311	uap->sysmsg_result = cnt;
	4312	done:
	4313	vput(vp);
	4314	nlookup_done(&nd);
	4315	if (needfree)
	4316	FREE(needfree, M_IOV);
	4317	return(error);
	4318	}
	4319
	4320	/*
	4321	* Syscall to delete a named extended attribute from a file or directory.
	4322	* Accepts attribute name. The real work happens in VOP_SETEXTATTR().
	4323	*/
	4324	int
	4325	sys_extattr_delete_file(struct extattr_delete_file_args *uap)
	4326	{
	4327	char attrname[EXTATTR_MAXNAMELEN];
	4328	struct nlookupdata nd;
	4329	struct vnode *vp;
	4330	int error;
	4331
	4332	error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN);
	4333	if (error)
	4334	return(error);
	4335
	4336	vp = NULL;
	4337	error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW);
	4338	if (error == 0)
	4339	error = nlookup(&nd);
	4340	if (error == 0)
	4341	error = ncp_writechk(&nd.nl_nch);
	4342	if (error == 0)
	4343	error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
	4344	if (error) {
	4345	nlookup_done(&nd);
	4346	return (error);
	4347	}
	4348
	4349	error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred);
	4350	vput(vp);
	4351	nlookup_done(&nd);
	4352	return(error);
	4353	}
	4354
	4355	/*
	4356	* Determine if the mount is visible to the process.
	4357	*/
	4358	static int
	4359	chroot_visible_mnt(struct mount mp, struct proc p)
	4360	{
	4361	struct nchandle nch;
	4362
	4363	/*
	4364	* Traverse from the mount point upwards. If we hit the process
	4365	* root then the mount point is visible to the process.
	4366	*/
	4367	nch = mp->mnt_ncmountpt;
	4368	while (nch.ncp) {
	4369	if (nch.mount == p->p_fd->fd_nrdir.mount &&
	4370	nch.ncp == p->p_fd->fd_nrdir.ncp) {
	4371	return(1);
	4372	}
	4373	if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) {
	4374	nch = nch.mount->mnt_ncmounton;
	4375	} else {
	4376	nch.ncp = nch.ncp->nc_parent;
	4377	}
	4378	}
	4379
	4380	/*
	4381	* If the mount point is not visible to the process, but the
	4382	* process root is in a subdirectory of the mount, return
	4383	* TRUE anyway.
	4384	*/
	4385	if (p->p_fd->fd_nrdir.mount == mp)
	4386	return(1);
	4387
	4388	return(0);
	4389	}
	4390