gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
	3	* Copyright (c) 1992, 1993, 1994, 1995
	4	* The Regents of the University of California. All rights reserved.
	5	*
	6	* This code is derived from software contributed to Berkeley by
	7	* Jan-Simon Pendry.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	* 1. Redistributions of source code must retain the above copyright
	13	* notice, this list of conditions and the following disclaimer.
	14	* 2. Redistributions in binary form must reproduce the above copyright
	15	* notice, this list of conditions and the following disclaimer in the
	16	* documentation and/or other materials provided with the distribution.
	17	* 3. All advertising materials mentioning features or use of this software
	18	* must display the following acknowledgement:
	19	* This product includes software developed by the University of
	20	* California, Berkeley and its contributors.
	21	* 4. Neither the name of the University nor the names of its contributors
	22	* may be used to endorse or promote products derived from this software
	23	* without specific prior written permission.
	24	*
	25	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	26	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	27	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	28	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	29	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	30	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	31	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	32	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	33	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	34	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	35	* SUCH DAMAGE.
	36	*
	37	* @(#)union_vnops.c 8.32 (Berkeley) 6/23/95
	38	* $FreeBSD: src/sys/miscfs/union/union_vnops.c,v 1.72 1999/12/15 23:02:14 eivind Exp $
	39	* $DragonFly: src/sys/vfs/union/union_vnops.c,v 1.36 2006/12/23 00:41:30 swildner Exp $
	40	*/
	41
	42	#include <sys/param.h>
	43	#include <sys/systm.h>
	44	#include <sys/proc.h>
	45	#include <sys/fcntl.h>
	46	#include <sys/stat.h>
	47	#include <sys/kernel.h>
	48	#include <sys/vnode.h>
	49	#include <sys/mount.h>
	50	#include <sys/namei.h>
	51	#include <sys/malloc.h>
	52	#include <sys/buf.h>
	53	#include <sys/lock.h>
	54	#include <sys/sysctl.h>
	55	#include "union.h"
	56
	57	#include <vm/vm.h>
	58	#include <vm/vnode_pager.h>
	59
	60	#include <vm/vm_page.h>
	61	#include <vm/vm_object.h>
	62
	63	int uniondebug = 0;
	64
	65	#if UDEBUG_ENABLED
	66	SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
	67	#else
	68	SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
	69	#endif
	70
	71	static int union_access (struct vop_access_args *ap);
	72	static int union_advlock (struct vop_advlock_args *ap);
	73	static int union_bmap (struct vop_bmap_args *ap);
	74	static int union_close (struct vop_close_args *ap);
	75	static int union_create (struct vop_old_create_args *ap);
	76	static int union_fsync (struct vop_fsync_args *ap);
	77	static int union_getattr (struct vop_getattr_args *ap);
	78	static int union_inactive (struct vop_inactive_args *ap);
	79	static int union_ioctl (struct vop_ioctl_args *ap);
	80	static int union_link (struct vop_old_link_args *ap);
	81	static int union_lookup (struct vop_old_lookup_args *ap);
	82	static int union_lookup1 (struct vnode udvp, struct vnode *dvp,
	83	struct vnode **vpp,
	84	struct componentname *cnp);
	85	static int union_mkdir (struct vop_old_mkdir_args *ap);
	86	static int union_mknod (struct vop_old_mknod_args *ap);
	87	static int union_mmap (struct vop_mmap_args *ap);
	88	static int union_open (struct vop_open_args *ap);
	89	static int union_pathconf (struct vop_pathconf_args *ap);
	90	static int union_print (struct vop_print_args *ap);
	91	static int union_read (struct vop_read_args *ap);
	92	static int union_readdir (struct vop_readdir_args *ap);
	93	static int union_readlink (struct vop_readlink_args *ap);
	94	static int union_reclaim (struct vop_reclaim_args *ap);
	95	static int union_remove (struct vop_old_remove_args *ap);
	96	static int union_rename (struct vop_old_rename_args *ap);
	97	static int union_revoke (struct vop_revoke_args *ap);
	98	static int union_rmdir (struct vop_old_rmdir_args *ap);
	99	static int union_poll (struct vop_poll_args *ap);
	100	static int union_setattr (struct vop_setattr_args *ap);
	101	static int union_strategy (struct vop_strategy_args *ap);
	102	static int union_getpages (struct vop_getpages_args *ap);
	103	static int union_putpages (struct vop_putpages_args *ap);
	104	static int union_symlink (struct vop_old_symlink_args *ap);
	105	static int union_whiteout (struct vop_old_whiteout_args *ap);
	106	static int union_write (struct vop_read_args *ap);
	107
	108	static __inline
	109	struct vnode *
	110	union_lock_upper(struct union_node un, struct thread td)
	111	{
	112	struct vnode *uppervp;
	113
	114	if ((uppervp = un->un_uppervp) != NULL) {
	115	vref(uppervp);
	116	vn_lock(uppervp, LK_EXCLUSIVE \| LK_CANRECURSE \| LK_RETRY);
	117	}
	118	KASSERT((uppervp == NULL \|\| uppervp->v_usecount > 0), ("uppervp usecount is 0"));
	119	return(uppervp);
	120	}
	121
	122	static __inline
	123	struct vnode *
	124	union_ref_upper(struct union_node *un)
	125	{
	126	struct vnode *uppervp;
	127
	128	if ((uppervp = un->un_uppervp) != NULL) {
	129	vref(uppervp);
	130	if (uppervp->v_flag & VRECLAIMED) {
	131	vrele(uppervp);
	132	return (NULLVP);
	133	}
	134	}
	135	return (uppervp);
	136	}
	137
	138	static __inline
	139	void
	140	union_unlock_upper(struct vnode uppervp, struct thread td)
	141	{
	142	vput(uppervp);
	143	}
	144
	145	static __inline
	146	struct vnode *
	147	union_lock_other(struct union_node un, struct thread td)
	148	{
	149	struct vnode *vp;
	150
	151	if (un->un_uppervp != NULL) {
	152	vp = union_lock_upper(un, td);
	153	} else if ((vp = un->un_lowervp) != NULL) {
	154	vref(vp);
	155	vn_lock(vp, LK_EXCLUSIVE \| LK_CANRECURSE \| LK_RETRY);
	156	}
	157	return(vp);
	158	}
	159
	160	static __inline
	161	void
	162	union_unlock_other(struct vnode vp, struct thread td)
	163	{
	164	vput(vp);
	165	}
	166
	167	/*
	168	* union_lookup:
	169	*
	170	* udvp must be exclusively locked on call and will remain
	171	* exclusively locked on return. This is the mount point
	172	* for out filesystem.
	173	*
	174	* dvp Our base directory, locked and referenced.
	175	* The passed dvp will be dereferenced and unlocked on return
	176	* and a new dvp will be returned which is locked and
	177	* referenced in the same variable.
	178	*
	179	* vpp is filled in with the result if no error occured,
	180	* locked and ref'd.
	181	*
	182	* If an error is returned, *vpp is set to NULLVP. If no
	183	* error occurs, *vpp is returned with a reference and an
	184	* exclusive lock.
	185	*/
	186
	187	static int
	188	union_lookup1(struct vnode udvp, struct vnode pdvp, struct vnode *vpp,
	189	struct componentname *cnp)
	190	{
	191	int error;
	192	struct thread *td = cnp->cn_td;
	193	struct vnode dvp = pdvp;
	194	struct vnode *tdvp;
	195	struct mount *mp;
	196
	197	/*
	198	* If stepping up the directory tree, check for going
	199	* back across the mount point, in which case do what
	200	* lookup would do by stepping back down the mount
	201	* hierarchy.
	202	*/
	203	if (cnp->cn_flags & CNP_ISDOTDOT) {
	204	while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
	205	/*
	206	* Don't do the NOCROSSMOUNT check
	207	* at this level. By definition,
	208	* union fs deals with namespaces, not
	209	* filesystems.
	210	*/
	211	tdvp = dvp;
	212	dvp = dvp->v_mount->mnt_vnodecovered;
	213	vref(dvp);
	214	vput(tdvp);
	215	vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	216	}
	217	}
	218
	219	/*
	220	* Set return dvp to be the upperdvp 'parent directory.
	221	*/
	222	*pdvp = dvp;
	223
	224	/*
	225	* If the VOP_LOOKUP call generates an error, tdvp is invalid and no
	226	* changes will have been made to dvp, so we are set to return.
	227	*/
	228
	229	error = VOP_LOOKUP(dvp, &tdvp, cnp);
	230	if (error) {
	231	UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
	232	*vpp = NULL;
	233	return (error);
	234	}
	235
	236	/*
	237	* The parent directory will have been unlocked, unless lookup
	238	* found the last component or if dvp == tdvp (tdvp must be locked).
	239	*
	240	* We want our dvp to remain locked and ref'd. We also want tdvp
	241	* to remain locked and ref'd.
	242	*/
	243	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
	244
	245	#if 0
	246	if (dvp != tdvp && (cnp->cn_flags & CNP_XXXISLASTCN) == 0)
	247	vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	248	#endif
	249
	250	/*
	251	* Lastly check if the current node is a mount point in
	252	* which case walk up the mount hierarchy making sure not to
	253	* bump into the root of the mount tree (ie. dvp != udvp).
	254	*
	255	* We use dvp as a temporary variable here, it is no longer related
	256	* to the dvp above. However, we have to ensure that both *pdvp and
	257	* tdvp are locked on return.
	258	*/
	259
	260	dvp = tdvp;
	261	while (
	262	dvp != udvp &&
	263	(dvp->v_type == VDIR) &&
	264	(mp = dvp->v_mountedhere)
	265	) {
	266	int relock_pdvp = 0;
	267
	268	if (vfs_busy(mp, 0))
	269	continue;
	270
	271	if (dvp == *pdvp)
	272	relock_pdvp = 1;
	273	vput(dvp);
	274	dvp = NULL;
	275	error = VFS_ROOT(mp, &dvp);
	276
	277	vfs_unbusy(mp);
	278
	279	if (relock_pdvp)
	280	vn_lock(*pdvp, LK_EXCLUSIVE \| LK_RETRY);
	281
	282	if (error) {
	283	*vpp = NULL;
	284	return (error);
	285	}
	286	}
	287	*vpp = dvp;
	288	return (0);
	289	}
	290
	291	/*
	292	* union_lookup(struct vnode a_dvp, struct vnode *a_vpp,
	293	* struct componentname *a_cnp)
	294	*/
	295	static int
	296	union_lookup(struct vop_old_lookup_args *ap)
	297	{
	298	int error;
	299	int uerror, lerror;
	300	struct vnode uppervp, lowervp;
	301	struct vnode upperdvp, lowerdvp;
	302	struct vnode dvp = ap->a_dvp; / starting dir */
	303	struct union_node dun = VTOUNION(dvp); / associated union node */
	304	struct componentname *cnp = ap->a_cnp;
	305	struct thread *td = cnp->cn_td;
	306	int lockparent = cnp->cn_flags & CNP_LOCKPARENT;
	307	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
	308	struct ucred *saved_cred = NULL;
	309	int iswhiteout;
	310	struct vattr va;
	311
	312	*ap->a_vpp = NULLVP;
	313
	314	/*
	315	* Disallow write attemps to the filesystem mounted read-only.
	316	*/
	317	if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
	318	(cnp->cn_nameiop == NAMEI_DELETE \|\| cnp->cn_nameiop == NAMEI_RENAME)) {
	319	return (EROFS);
	320	}
	321
	322	/*
	323	* For any lookup's we do, always return with the parent locked
	324	*/
	325	cnp->cn_flags \|= CNP_LOCKPARENT;
	326
	327	lowerdvp = dun->un_lowervp;
	328	uppervp = NULLVP;
	329	lowervp = NULLVP;
	330	iswhiteout = 0;
	331
	332	uerror = ENOENT;
	333	lerror = ENOENT;
	334
	335	/*
	336	* Get a private lock on uppervp and a reference, effectively
	337	* taking it out of the union_node's control.
	338	*
	339	* We must lock upperdvp while holding our lock on dvp
	340	* to avoid a deadlock.
	341	*/
	342	upperdvp = union_lock_upper(dun, td);
	343
	344	/*
	345	* do the lookup in the upper level.
	346	* if that level comsumes additional pathnames,
	347	* then assume that something special is going
	348	* on and just return that vnode.
	349	*/
	350	if (upperdvp != NULLVP) {
	351	/*
	352	* We do not have to worry about the DOTDOT case, we've
	353	* already unlocked dvp.
	354	*/
	355	UDEBUG(("A %p\n", upperdvp));
	356
	357	/*
	358	* Do the lookup. We must supply a locked and referenced
	359	* upperdvp to the function and will get a new locked and
	360	* referenced upperdvp back with the old having been
	361	* dereferenced.
	362	*
	363	* If an error is returned, uppervp will be NULLVP. If no
	364	* error occurs, uppervp will be the locked and referenced
	365	* return vnode or possibly NULL, depending on what is being
	366	* requested. It is possible that the returned uppervp
	367	* will be the same as upperdvp.
	368	*/
	369	uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
	370	UDEBUG((
	371	"uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
	372	uerror,
	373	upperdvp,
	374	upperdvp->v_usecount,
	375	vn_islocked(upperdvp),
	376	uppervp,
	377	(uppervp ? uppervp->v_usecount : -99),
	378	(uppervp ? vn_islocked(uppervp) : -99)
	379	));
	380
	381	/*
	382	* Disallow write attemps to the filesystem mounted read-only.
	383	*/
	384	if (uerror == EJUSTRETURN &&
	385	(dvp->v_mount->mnt_flag & MNT_RDONLY) &&
	386	(cnp->cn_nameiop == NAMEI_CREATE \|\| cnp->cn_nameiop == NAMEI_RENAME)) {
	387	error = EROFS;
	388	goto out;
	389	}
	390
	391	/*
	392	* Special case. If cn_consume != 0 skip out. The result
	393	* of the lookup is transfered to our return variable. If
	394	* an error occured we have to throw away the results.
	395	*/
	396
	397	if (cnp->cn_consume != 0) {
	398	if ((error = uerror) == 0) {
	399	*ap->a_vpp = uppervp;
	400	uppervp = NULL;
	401	}
	402	goto out;
	403	}
	404
	405	/*
	406	* Calculate whiteout, fall through
	407	*/
	408
	409	if (uerror == ENOENT \|\| uerror == EJUSTRETURN) {
	410	if (cnp->cn_flags & CNP_ISWHITEOUT) {
	411	iswhiteout = 1;
	412	} else if (lowerdvp != NULLVP) {
	413	int terror;
	414
	415	terror = VOP_GETATTR(upperdvp, &va);
	416	if (terror == 0 && (va.va_flags & OPAQUE))
	417	iswhiteout = 1;
	418	}
	419	}
	420	}
	421
	422	/*
	423	* in a similar way to the upper layer, do the lookup
	424	* in the lower layer. this time, if there is some
	425	* component magic going on, then vput whatever we got
	426	* back from the upper layer and return the lower vnode
	427	* instead.
	428	*/
	429
	430	if (lowerdvp != NULLVP && !iswhiteout) {
	431	int nameiop;
	432
	433	UDEBUG(("B %p\n", lowerdvp));
	434
	435	/*
	436	* Force only LOOKUPs on the lower node, since
	437	* we won't be making changes to it anyway.
	438	*/
	439	nameiop = cnp->cn_nameiop;
	440	cnp->cn_nameiop = NAMEI_LOOKUP;
	441	if (um->um_op == UNMNT_BELOW) {
	442	saved_cred = cnp->cn_cred;
	443	cnp->cn_cred = um->um_cred;
	444	}
	445
	446	/*
	447	* We shouldn't have to worry about locking interactions
	448	* between the lower layer and our union layer (w.r.t.
	449	* `..' processing) because we don't futz with lowervp
	450	* locks in the union-node instantiation code path.
	451	*
	452	* union_lookup1() requires lowervp to be locked on entry,
	453	* and it will be unlocked on return. The ref count will
	454	* not change. On return lowervp doesn't represent anything
	455	* to us so we NULL it out.
	456	*/
	457	vref(lowerdvp);
	458	vn_lock(lowerdvp, LK_EXCLUSIVE \| LK_RETRY);
	459	lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
	460	if (lowerdvp == lowervp)
	461	vrele(lowerdvp);
	462	else
	463	vput(lowerdvp);
	464	lowerdvp = NULL; /* lowerdvp invalid after vput */
	465
	466	if (um->um_op == UNMNT_BELOW)
	467	cnp->cn_cred = saved_cred;
	468	cnp->cn_nameiop = nameiop;
	469
	470	if (cnp->cn_consume != 0 \|\| lerror == EACCES) {
	471	if ((error = lerror) == 0) {
	472	*ap->a_vpp = lowervp;
	473	lowervp = NULL;
	474	}
	475	goto out;
	476	}
	477	} else {
	478	UDEBUG(("C %p\n", lowerdvp));
	479	if ((cnp->cn_flags & CNP_ISDOTDOT) && dun->un_pvp != NULLVP) {
	480	if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
	481	vref(lowervp);
	482	vn_lock(lowervp, LK_EXCLUSIVE \| LK_RETRY);
	483	lerror = 0;
	484	}
	485	}
	486	}
	487
	488	/*
	489	* Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
	490	*
	491	* 1. If both layers returned an error, select the upper layer.
	492	*
	493	* 2. If the upper layer faile and the bottom layer succeeded,
	494	* two subcases occur:
	495	*
	496	* a. The bottom vnode is not a directory, in which case
	497	* just return a new union vnode referencing an
	498	* empty top layer and the existing bottom layer.
	499	*
	500	* b. The button vnode is a directory, in which case
	501	* create a new directory in the top layer and
	502	* and fall through to case 3.
	503	*
	504	* 3. If the top layer succeeded then return a new union
	505	* vnode referencing whatever the new top layer and
	506	* whatever the bottom layer returned.
	507	*/
	508
	509	/* case 1. */
	510	if ((uerror != 0) && (lerror != 0)) {
	511	error = uerror;
	512	goto out;
	513	}
	514
	515	/* case 2. */
	516	if (uerror != 0 /* && (lerror == 0) */ ) {
	517	if (lowervp->v_type == VDIR) { /* case 2b. */
	518	KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
	519	/*
	520	* oops, uppervp has a problem, we may have to shadow.
	521	*/
	522	uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
	523	if (uerror) {
	524	error = uerror;
	525	goto out;
	526	}
	527	}
	528	}
	529
	530	/*
	531	* Must call union_allocvp with both the upper and lower vnodes
	532	* referenced and the upper vnode locked. ap->a_vpp is returned
	533	* referenced and locked. lowervp, uppervp, and upperdvp are
	534	* absorbed by union_allocvp() whether it succeeds or fails.
	535	*
	536	* upperdvp is the parent directory of uppervp which may be
	537	* different, depending on the path, from dvp->un_uppervp. That's
	538	* why it is a separate argument. Note that it must be unlocked.
	539	*
	540	* dvp must be locked on entry to the call and will be locked on
	541	* return.
	542	*/
	543
	544	if (uppervp && uppervp != upperdvp)
	545	vn_unlock(uppervp);
	546	if (lowervp)
	547	vn_unlock(lowervp);
	548	if (upperdvp)
	549	vn_unlock(upperdvp);
	550
	551	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
	552	uppervp, lowervp, 1);
	553
	554	UDEBUG(("Create %p = %p %p refs=%d\n", ap->a_vpp, uppervp, lowervp, (ap->a_vpp) ? ((*ap->a_vpp)->v_usecount) : -99));
	555
	556	uppervp = NULL;
	557	upperdvp = NULL;
	558	lowervp = NULL;
	559
	560	/*
	561	* Termination Code
	562	*
	563	* - put away any extra junk laying around. Note that lowervp
	564	* (if not NULL) will never be the same as *ap->a_vp and
	565	* neither will uppervp, because when we set that state we
	566	* NULL-out lowervp or uppervp. On the otherhand, upperdvp
	567	* may match uppervp or *ap->a_vpp.
	568	*
	569	* - relock/unlock dvp if appropriate.
	570	*/
	571
	572	out:
	573	if (upperdvp) {
	574	if (upperdvp == uppervp \|\| upperdvp == *ap->a_vpp)
	575	vrele(upperdvp);
	576	else
	577	vput(upperdvp);
	578	}
	579
	580	if (uppervp)
	581	vput(uppervp);
	582
	583	if (lowervp)
	584	vput(lowervp);
	585
	586	/*
	587	* Restore LOCKPARENT state
	588	*/
	589
	590	if (!lockparent)
	591	cnp->cn_flags &= ~CNP_LOCKPARENT;
	592
	593	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
	594	((ap->a_vpp) ? (ap->a_vpp)->v_usecount : -99),
	595	lowervp, uppervp));
	596
	597	/*
	598	* dvp lock state, determine whether to relock dvp. dvp is expected
	599	* to be locked on return if:
	600	*
	601	* - there was an error (except not EJUSTRETURN), or
	602	* - we hit the last component and lockparent is true
	603	*
	604	* dvp_is_locked is the current state of the dvp lock, not counting
	605	* the possibility that *ap->a_vpp == dvp (in which case it is locked
	606	* anyway). Note that *ap->a_vpp == dvp only if no error occured.
	607	*/
	608
	609	if (*ap->a_vpp != dvp) {
	610	if ((error == 0 \|\| error == EJUSTRETURN) && !lockparent) {
	611	vn_unlock(dvp);
	612	}
	613	}
	614
	615	/*
	616	* Diagnostics
	617	*/
	618
	619	#ifdef DIAGNOSTIC
	620	if (cnp->cn_namelen == 1 &&
	621	cnp->cn_nameptr[0] == '.' &&
	622	*ap->a_vpp != dvp) {
	623	panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
	624	}
	625	#endif
	626
	627	return (error);
	628	}
	629
	630	/*
	631	* union_create:
	632	*
	633	* a_dvp is locked on entry and remains locked on return. a_vpp is returned
	634	* locked if no error occurs, otherwise it is garbage.
	635	*
	636	* union_create(struct vnode a_dvp, struct vnode *a_vpp,
	637	* struct componentname a_cnp, struct vattr a_vap)
	638	*/
	639	static int
	640	union_create(struct vop_old_create_args *ap)
	641	{
	642	struct union_node *dun = VTOUNION(ap->a_dvp);
	643	struct componentname *cnp = ap->a_cnp;
	644	struct thread *td = cnp->cn_td;
	645	struct vnode *dvp;
	646	int error = EROFS;
	647
	648	if ((dvp = union_lock_upper(dun, td)) != NULL) {
	649	struct vnode *vp;
	650	struct mount *mp;
	651
	652	error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
	653	if (error == 0) {
	654	mp = ap->a_dvp->v_mount;
	655	vn_unlock(vp);
	656	UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vp->v_usecount));
	657	error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
	658	cnp, vp, NULLVP, 1);
	659	UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount));
	660	}
	661	union_unlock_upper(dvp, td);
	662	}
	663	return (error);
	664	}
	665
	666	/*
	667	* union_whiteout(struct vnode a_dvp, struct componentname a_cnp,
	668	* int a_flags)
	669	*/
	670	static int
	671	union_whiteout(struct vop_old_whiteout_args *ap)
	672	{
	673	struct union_node *un = VTOUNION(ap->a_dvp);
	674	struct componentname *cnp = ap->a_cnp;
	675	struct vnode *uppervp;
	676	int error = EOPNOTSUPP;
	677
	678	if ((uppervp = union_lock_upper(un, cnp->cn_td)) != NULLVP) {
	679	error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
	680	union_unlock_upper(uppervp, cnp->cn_td);
	681	}
	682	return(error);
	683	}
	684
	685	/*
	686	* union_mknod:
	687	*
	688	* a_dvp is locked on entry and should remain locked on return.
	689	* a_vpp is garbagre whether an error occurs or not.
	690	*
	691	* union_mknod(struct vnode a_dvp, struct vnode *a_vpp,
	692	* struct componentname a_cnp, struct vattr a_vap)
	693	*/
	694	static int
	695	union_mknod(struct vop_old_mknod_args *ap)
	696	{
	697	struct union_node *dun = VTOUNION(ap->a_dvp);
	698	struct componentname *cnp = ap->a_cnp;
	699	struct vnode *dvp;
	700	int error = EROFS;
	701
	702	if ((dvp = union_lock_upper(dun, cnp->cn_td)) != NULL) {
	703	error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
	704	union_unlock_upper(dvp, cnp->cn_td);
	705	}
	706	return (error);
	707	}
	708
	709	/*
	710	* union_open:
	711	*
	712	* run open VOP. When opening the underlying vnode we have to mimic
	713	* vn_open. What we really need to do to avoid screwups if the
	714	* open semantics change is to call vn_open(). For example, ufs blows
	715	* up if you open a file but do not vmio it prior to writing.
	716	*
	717	* union_open(struct vnode *a_vp, int a_mode,
	718	* struct ucred a_cred, struct thread a_td)
	719	*/
	720	static int
	721	union_open(struct vop_open_args *ap)
	722	{
	723	struct union_node *un = VTOUNION(ap->a_vp);
	724	struct vnode *tvp;
	725	int mode = ap->a_mode;
	726	struct ucred *cred = ap->a_cred;
	727	struct thread *td = ap->a_td;
	728	int error = 0;
	729	int tvpisupper = 1;
	730
	731	/*
	732	* If there is an existing upper vp then simply open that.
	733	* The upper vp takes precedence over the lower vp. When opening
	734	* a lower vp for writing copy it to the uppervp and then open the
	735	* uppervp.
	736	*
	737	* At the end of this section tvp will be left locked.
	738	*/
	739	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
	740	/*
	741	* If the lower vnode is being opened for writing, then
	742	* copy the file contents to the upper vnode and open that,
	743	* otherwise can simply open the lower vnode.
	744	*/
	745	tvp = un->un_lowervp;
	746	if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
	747	int docopy = !(mode & O_TRUNC);
	748	error = union_copyup(un, docopy, cred, td);
	749	tvp = union_lock_upper(un, td);
	750	} else {
	751	un->un_openl++;
	752	vref(tvp);
	753	vn_lock(tvp, LK_EXCLUSIVE \| LK_RETRY);
	754	tvpisupper = 0;
	755	}
	756	}
	757
	758	/*
	759	* We are holding the correct vnode, open it. Note
	760	* that in DragonFly, VOP_OPEN is responsible for associating
	761	* a VM object with the vnode if the vnode is mappable or the
	762	* underlying filesystem uses buffer cache calls on it.
	763	*/
	764	if (error == 0)
	765	error = VOP_OPEN(tvp, mode, cred, NULL);
	766
	767	/*
	768	* Release any locks held
	769	*/
	770	if (tvpisupper) {
	771	if (tvp)
	772	union_unlock_upper(tvp, td);
	773	} else {
	774	vput(tvp);
	775	}
	776	return (error);
	777	}
	778
	779	/*
	780	* union_close:
	781	*
	782	* It is unclear whether a_vp is passed locked or unlocked. Whatever
	783	* the case we do not change it.
	784	*
	785	* union_close(struct vnode a_vp, int a_fflag, struct ucred a_cred,
	786	* struct thread *a_td)
	787	*/
	788	static int
	789	union_close(struct vop_close_args *ap)
	790	{
	791	struct union_node *un = VTOUNION(ap->a_vp);
	792	struct vnode *vp;
	793
	794	if ((vp = un->un_uppervp) == NULLVP) {
	795	#ifdef UNION_DIAGNOSTIC
	796	if (un->un_openl <= 0)
	797	panic("union: un_openl cnt");
	798	#endif
	799	--un->un_openl;
	800	vp = un->un_lowervp;
	801	}
	802	ap->a_head.a_ops = *vp->v_ops;
	803	ap->a_vp = vp;
	804	return(vop_close_ap(ap));
	805	}
	806
	807	/*
	808	* Check access permission on the union vnode.
	809	* The access check being enforced is to check
	810	* against both the underlying vnode, and any
	811	* copied vnode. This ensures that no additional
	812	* file permissions are given away simply because
	813	* the user caused an implicit file copy.
	814	*
	815	* union_access(struct vnode *a_vp, int a_mode,
	816	* struct ucred a_cred, struct thread a_td)
	817	*/
	818	static int
	819	union_access(struct vop_access_args *ap)
	820	{
	821	struct union_node *un = VTOUNION(ap->a_vp);
	822	struct thread *td = ap->a_td;
	823	int error = EACCES;
	824	struct vnode *vp;
	825
	826	/*
	827	* Disallow write attempts on filesystems mounted read-only.
	828	*/
	829	if ((ap->a_mode & VWRITE) &&
	830	(ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
	831	switch (ap->a_vp->v_type) {
	832	case VREG:
	833	case VDIR:
	834	case VLNK:
	835	return (EROFS);
	836	default:
	837	break;
	838	}
	839	}
	840
	841	if ((vp = union_lock_upper(un, td)) != NULLVP) {
	842	ap->a_head.a_ops = *vp->v_ops;
	843	ap->a_vp = vp;
	844	error = vop_access_ap(ap);
	845	union_unlock_upper(vp, td);
	846	return(error);
	847	}
	848
	849	if ((vp = un->un_lowervp) != NULLVP) {
	850	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	851	ap->a_head.a_ops = *vp->v_ops;
	852	ap->a_vp = vp;
	853
	854	/*
	855	* Remove VWRITE from a_mode if our mount point is RW, because
	856	* we want to allow writes and lowervp may be read-only.
	857	*/
	858	if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
	859	ap->a_mode &= ~VWRITE;
	860
	861	error = vop_access_ap(ap);
	862	if (error == 0) {
	863	struct union_mount *um;
	864
	865	um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
	866
	867	if (um->um_op == UNMNT_BELOW) {
	868	ap->a_cred = um->um_cred;
	869	error = vop_access_ap(ap);
	870	}
	871	}
	872	vn_unlock(vp);
	873	}
	874	return(error);
	875	}
	876
	877	/*
	878	* We handle getattr only to change the fsid and
	879	* track object sizes
	880	*
	881	* It's not clear whether VOP_GETATTR is to be
	882	* called with the vnode locked or not. stat() calls
	883	* it with (vp) locked, and fstat calls it with
	884	* (vp) unlocked.
	885	*
	886	* Because of this we cannot use our normal locking functions
	887	* if we do not intend to lock the main a_vp node. At the moment
	888	* we are running without any specific locking at all, but beware
	889	* to any programmer that care must be taken if locking is added
	890	* to this function.
	891	*
	892	* union_getattr(struct vnode a_vp, struct vattr a_vap,
	893	* struct ucred a_cred, struct thread a_td)
	894	*/
	895	static int
	896	union_getattr(struct vop_getattr_args *ap)
	897	{
	898	int error;
	899	struct union_node *un = VTOUNION(ap->a_vp);
	900	struct vnode *vp;
	901	struct vattr *vap;
	902	struct vattr va;
	903
	904	/*
	905	* Some programs walk the filesystem hierarchy by counting
	906	* links to directories to avoid stat'ing all the time.
	907	* This means the link count on directories needs to be "correct".
	908	* The only way to do that is to call getattr on both layers
	909	* and fix up the link count. The link count will not necessarily
	910	* be accurate but will be large enough to defeat the tree walkers.
	911	*/
	912
	913	vap = ap->a_vap;
	914
	915	if ((vp = un->un_uppervp) != NULLVP) {
	916	error = VOP_GETATTR(vp, vap);
	917	if (error)
	918	return (error);
	919	/* XXX isn't this dangerouso without a lock? */
	920	union_newsize(ap->a_vp, vap->va_size, VNOVAL);
	921	}
	922
	923	if (vp == NULLVP) {
	924	vp = un->un_lowervp;
	925	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
	926	vp = un->un_lowervp;
	927	vap = &va;
	928	} else {
	929	vp = NULLVP;
	930	}
	931
	932	if (vp != NULLVP) {
	933	error = VOP_GETATTR(vp, vap);
	934	if (error)
	935	return (error);
	936	/* XXX isn't this dangerous without a lock? */
	937	union_newsize(ap->a_vp, VNOVAL, vap->va_size);
	938	}
	939
	940	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
	941	ap->a_vap->va_nlink += vap->va_nlink;
	942	return (0);
	943	}
	944
	945	/*
	946	* union_setattr(struct vnode a_vp, struct vattr a_vap,
	947	* struct ucred a_cred, struct thread a_td)
	948	*/
	949	static int
	950	union_setattr(struct vop_setattr_args *ap)
	951	{
	952	struct union_node *un = VTOUNION(ap->a_vp);
	953	struct thread *td = ap->a_td;
	954	struct vattr *vap = ap->a_vap;
	955	struct vnode *uppervp;
	956	int error;
	957
	958	/*
	959	* Disallow write attempts on filesystems mounted read-only.
	960	*/
	961	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
	962	(vap->va_flags != VNOVAL \|\| vap->va_uid != (uid_t)VNOVAL \|\|
	963	vap->va_gid != (gid_t)VNOVAL \|\| vap->va_atime.tv_sec != VNOVAL \|\|
	964	vap->va_mtime.tv_sec != VNOVAL \|\|
	965	vap->va_mode != (mode_t)VNOVAL)) {
	966	return (EROFS);
	967	}
	968
	969	/*
	970	* Handle case of truncating lower object to zero size,
	971	* by creating a zero length upper object. This is to
	972	* handle the case of open with O_TRUNC and O_CREAT.
	973	*/
	974	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
	975	error = union_copyup(un, (ap->a_vap->va_size != 0),
	976	ap->a_cred, ap->a_td);
	977	if (error)
	978	return (error);
	979	}
	980
	981	/*
	982	* Try to set attributes in upper layer,
	983	* otherwise return read-only filesystem error.
	984	*/
	985	error = EROFS;
	986	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
	987	error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred);
	988	if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
	989	union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
	990	union_unlock_upper(uppervp, td);
	991	}
	992	return (error);
	993	}
	994
	995	/*
	996	* union_getpages:
	997	*/
	998
	999	static int
	1000	union_getpages(struct vop_getpages_args *ap)
	1001	{
	1002	int r;
	1003
	1004	r = vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
	1005	ap->a_count, ap->a_reqpage);
	1006	return(r);
	1007	}
	1008
	1009	/*
	1010	* union_putpages:
	1011	*/
	1012
	1013	static int
	1014	union_putpages(struct vop_putpages_args *ap)
	1015	{
	1016	int r;
	1017
	1018	r = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
	1019	ap->a_sync, ap->a_rtvals);
	1020	return(r);
	1021	}
	1022
	1023	/*
	1024	* union_read(struct vnode a_vp, struct uio a_uio, int a_ioflag,
	1025	* struct ucred *a_cred)
	1026	*/
	1027	static int
	1028	union_read(struct vop_read_args *ap)
	1029	{
	1030	struct union_node *un = VTOUNION(ap->a_vp);
	1031	struct thread *td = ap->a_uio->uio_td;
	1032	struct vnode *uvp;
	1033	int error;
	1034
	1035	uvp = union_lock_other(un, td);
	1036	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
	1037
	1038	if (ap->a_vp->v_flag & VOBJBUF)
	1039	union_vm_coherency(ap->a_vp, ap->a_uio, 0);
	1040
	1041	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
	1042	union_unlock_other(uvp, td);
	1043
	1044	/*
	1045	* XXX
	1046	* perhaps the size of the underlying object has changed under
	1047	* our feet. take advantage of the offset information present
	1048	* in the uio structure.
	1049	*/
	1050	if (error == 0) {
	1051	struct union_node *un = VTOUNION(ap->a_vp);
	1052	off_t cur = ap->a_uio->uio_offset;
	1053
	1054	if (uvp == un->un_uppervp) {
	1055	if (cur > un->un_uppersz)
	1056	union_newsize(ap->a_vp, cur, VNOVAL);
	1057	} else {
	1058	if (cur > un->un_lowersz)
	1059	union_newsize(ap->a_vp, VNOVAL, cur);
	1060	}
	1061	}
	1062	return (error);
	1063	}
	1064
	1065	/*
	1066	* union_write(struct vnode a_vp, struct uio a_uio, int a_ioflag,
	1067	* struct ucred *a_cred)
	1068	*/
	1069	static int
	1070	union_write(struct vop_read_args *ap)
	1071	{
	1072	struct union_node *un = VTOUNION(ap->a_vp);
	1073	struct thread *td = ap->a_uio->uio_td;
	1074	struct vnode *uppervp;
	1075	int error;
	1076
	1077	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
	1078	panic("union: missing upper layer in write");
	1079
	1080	/*
	1081	* Since our VM pages are associated with our vnode rather then
	1082	* the real vnode, and since we do not run our reads and writes
	1083	* through our own VM cache, we have a VM/VFS coherency problem.
	1084	* We solve them by invalidating or flushing the associated VM
	1085	* pages prior to allowing a normal read or write to occur.
	1086	*
	1087	* VM-backed writes (UIO_NOCOPY) have to be converted to normal
	1088	* writes because we are not cache-coherent. Normal writes need
	1089	* to be made coherent with our VM-backing store, which we do by
	1090	* first flushing any dirty VM pages associated with the write
	1091	* range, and then destroying any clean VM pages associated with
	1092	* the write range.
	1093	*/
	1094
	1095	if (ap->a_uio->uio_segflg == UIO_NOCOPY) {
	1096	ap->a_uio->uio_segflg = UIO_SYSSPACE;
	1097	} else if (ap->a_vp->v_flag & VOBJBUF) {
	1098	union_vm_coherency(ap->a_vp, ap->a_uio, 1);
	1099	}
	1100
	1101	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
	1102
	1103	/*
	1104	* the size of the underlying object may be changed by the
	1105	* write.
	1106	*/
	1107	if (error == 0) {
	1108	off_t cur = ap->a_uio->uio_offset;
	1109
	1110	if (cur > un->un_uppersz)
	1111	union_newsize(ap->a_vp, cur, VNOVAL);
	1112	}
	1113	union_unlock_upper(uppervp, td);
	1114	return (error);
	1115	}
	1116
	1117	/*
	1118	* union_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data, int a_fflag,
	1119	* struct ucred a_cred, struct thread a_td)
	1120	*/
	1121	static int
	1122	union_ioctl(struct vop_ioctl_args *ap)
	1123	{
	1124	struct vnode *ovp = OTHERVP(ap->a_vp);
	1125
	1126	ap->a_head.a_ops = *ovp->v_ops;
	1127	ap->a_vp = ovp;
	1128	return(vop_ioctl_ap(ap));
	1129	}
	1130
	1131	/*
	1132	* union_poll(struct vnode a_vp, int a_events, struct ucred a_cred,
	1133	* struct thread *a_td)
	1134	*/
	1135	static int
	1136	union_poll(struct vop_poll_args *ap)
	1137	{
	1138	struct vnode *ovp = OTHERVP(ap->a_vp);
	1139
	1140	ap->a_head.a_ops = *ovp->v_ops;
	1141	ap->a_vp = ovp;
	1142	return(vop_poll_ap(ap));
	1143	}
	1144
	1145	/*
	1146	* union_revoke(struct vnode a_vp, int a_flags, struct thread a_td)
	1147	*/
	1148	static int
	1149	union_revoke(struct vop_revoke_args *ap)
	1150	{
	1151	struct vnode *vp = ap->a_vp;
	1152	struct vnode *vx;
	1153
	1154	if ((vx = UPPERVP(vp)) != NULL) {
	1155	vx_get(vx);
	1156	VOP_REVOKE(vx, ap->a_flags);
	1157	vx_put(vx);
	1158	}
	1159	if ((vx = LOWERVP(vp)) != NULL) {
	1160	vx_get(vx);
	1161	VOP_REVOKE(vx, ap->a_flags);
	1162	vx_put(vx);
	1163	}
	1164	vgone(vp);
	1165	return (0);
	1166	}
	1167
	1168	/*
	1169	* union_mmap(struct vnode a_vp, int a_fflags, struct ucred a_cred,
	1170	* struct thread *a_td)
	1171	*/
	1172	static int
	1173	union_mmap(struct vop_mmap_args *ap)
	1174	{
	1175	struct vnode *ovp = OTHERVP(ap->a_vp);
	1176
	1177	ap->a_head.a_ops = *ovp->v_ops;
	1178	ap->a_vp = ovp;
	1179	return (vop_mmap_ap(ap));
	1180	}
	1181
	1182	/*
	1183	* union_fsync(struct vnode a_vp, struct ucred a_cred, int a_waitfor,
	1184	* struct thread *a_td)
	1185	*/
	1186	static int
	1187	union_fsync(struct vop_fsync_args *ap)
	1188	{
	1189	int error = 0;
	1190	struct thread *td = ap->a_td;
	1191	struct vnode *targetvp;
	1192	struct union_node *un = VTOUNION(ap->a_vp);
	1193
	1194	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
	1195	error = VOP_FSYNC(targetvp, ap->a_waitfor);
	1196	union_unlock_other(targetvp, td);
	1197	}
	1198
	1199	return (error);
	1200	}
	1201
	1202	/*
	1203	* union_remove:
	1204	*
	1205	* Remove the specified cnp. The dvp and vp are passed to us locked
	1206	* and must remain locked on return.
	1207	*
	1208	* union_remove(struct vnode a_dvp, struct vnode a_vp,
	1209	* struct componentname *a_cnp)
	1210	*/
	1211	static int
	1212	union_remove(struct vop_old_remove_args *ap)
	1213	{
	1214	struct union_node *dun = VTOUNION(ap->a_dvp);
	1215	struct union_node *un = VTOUNION(ap->a_vp);
	1216	struct componentname *cnp = ap->a_cnp;
	1217	struct thread *td = cnp->cn_td;
	1218	struct vnode *uppervp;
	1219	struct vnode *upperdvp;
	1220	int error;
	1221
	1222	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
	1223	panic("union remove: null upper vnode");
	1224
	1225	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
	1226	if (union_dowhiteout(un, cnp->cn_cred, td))
	1227	cnp->cn_flags \|= CNP_DOWHITEOUT;
	1228	error = VOP_REMOVE(upperdvp, uppervp, cnp);
	1229	#if 0
	1230	/* XXX */
	1231	if (!error)
	1232	union_removed_upper(un);
	1233	#endif
	1234	union_unlock_upper(uppervp, td);
	1235	} else {
	1236	error = union_mkwhiteout(
	1237	MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
	1238	upperdvp, ap->a_cnp, un->un_path);
	1239	}
	1240	union_unlock_upper(upperdvp, td);
	1241	return (error);
	1242	}
	1243
	1244	/*
	1245	* union_link:
	1246	*
	1247	* tdvp will be locked on entry, vp will not be locked on entry.
	1248	* tdvp should remain locked on return and vp should remain unlocked
	1249	* on return.
	1250	*
	1251	* union_link(struct vnode a_tdvp, struct vnode a_vp,
	1252	* struct componentname *a_cnp)
	1253	*/
	1254	static int
	1255	union_link(struct vop_old_link_args *ap)
	1256	{
	1257	struct componentname *cnp = ap->a_cnp;
	1258	struct thread *td = cnp->cn_td;
	1259	struct union_node *dun = VTOUNION(ap->a_tdvp);
	1260	struct vnode *vp;
	1261	struct vnode *tdvp;
	1262	int error = 0;
	1263
	1264	if (ap->a_tdvp->v_ops != ap->a_vp->v_ops) {
	1265	vp = ap->a_vp;
	1266	} else {
	1267	struct union_node *tun = VTOUNION(ap->a_vp);
	1268
	1269	if (tun->un_uppervp == NULLVP) {
	1270	vn_lock(ap->a_vp, LK_EXCLUSIVE \| LK_RETRY);
	1271	#if 0
	1272	if (dun->un_uppervp == tun->un_dirvp) {
	1273	if (dun->un_flags & UN_ULOCK) {
	1274	dun->un_flags &= ~UN_ULOCK;
	1275	vn_unlock(dun->un_uppervp);
	1276	}
	1277	}
	1278	#endif
	1279	error = union_copyup(tun, 1, cnp->cn_cred, td);
	1280	#if 0
	1281	if (dun->un_uppervp == tun->un_dirvp) {
	1282	vn_lock(dun->un_uppervp,
	1283	LK_EXCLUSIVE \| LK_RETRY);
	1284	dun->un_flags \|= UN_ULOCK;
	1285	}
	1286	#endif
	1287	vn_unlock(ap->a_vp);
	1288	}
	1289	vp = tun->un_uppervp;
	1290	}
	1291
	1292	if (error)
	1293	return (error);
	1294
	1295	/*
	1296	* Make sure upper is locked, then unlock the union directory we were
	1297	* called with to avoid a deadlock while we are calling VOP_LINK on
	1298	* the upper (with tdvp locked and vp not locked). Our ap->a_tdvp
	1299	* is expected to be locked on return.
	1300	*/
	1301
	1302	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
	1303	return (EROFS);
	1304
	1305	vn_unlock(ap->a_tdvp); /* unlock calling node */
	1306	error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */
	1307
	1308	/*
	1309	* We have to unlock tdvp prior to relocking our calling node in
	1310	* order to avoid a deadlock.
	1311	*/
	1312	union_unlock_upper(tdvp, td);
	1313	vn_lock(ap->a_tdvp, LK_EXCLUSIVE \| LK_RETRY);
	1314	return (error);
	1315	}
	1316
	1317	/*
	1318	* union_rename(struct vnode a_fdvp, struct vnode a_fvp,
	1319	* struct componentname a_fcnp, struct vnode a_tdvp,
	1320	* struct vnode a_tvp, struct componentname a_tcnp)
	1321	*/
	1322	static int
	1323	union_rename(struct vop_old_rename_args *ap)
	1324	{
	1325	int error;
	1326	struct vnode *fdvp = ap->a_fdvp;
	1327	struct vnode *fvp = ap->a_fvp;
	1328	struct vnode *tdvp = ap->a_tdvp;
	1329	struct vnode *tvp = ap->a_tvp;
	1330
	1331	/*
	1332	* Figure out what fdvp to pass to our upper or lower vnode. If we
	1333	* replace the fdvp, release the original one and ref the new one.
	1334	*/
	1335
	1336	if (fdvp->v_tag == VT_UNION) { /* always true */
	1337	struct union_node *un = VTOUNION(fdvp);
	1338	if (un->un_uppervp == NULLVP) {
	1339	/*
	1340	* this should never happen in normal
	1341	* operation but might if there was
	1342	* a problem creating the top-level shadow
	1343	* directory.
	1344	*/
	1345	error = EXDEV;
	1346	goto bad;
	1347	}
	1348	fdvp = un->un_uppervp;
	1349	vref(fdvp);
	1350	vrele(ap->a_fdvp);
	1351	}
	1352
	1353	/*
	1354	* Figure out what fvp to pass to our upper or lower vnode. If we
	1355	* replace the fvp, release the original one and ref the new one.
	1356	*/
	1357
	1358	if (fvp->v_tag == VT_UNION) { /* always true */
	1359	struct union_node *un = VTOUNION(fvp);
	1360	#if 0
	1361	struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
	1362	#endif
	1363
	1364	if (un->un_uppervp == NULLVP) {
	1365	switch(fvp->v_type) {
	1366	case VREG:
	1367	vn_lock(un->un_vnode, LK_EXCLUSIVE \| LK_RETRY);
	1368	error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_td);
	1369	vn_unlock(un->un_vnode);
	1370	if (error)
	1371	goto bad;
	1372	break;
	1373	case VDIR:
	1374	/*
	1375	* XXX not yet.
	1376	*
	1377	* There is only one way to rename a directory
	1378	* based in the lowervp, and that is to copy
	1379	* the entire directory hierarchy. Otherwise
	1380	* it would not last across a reboot.
	1381	*/
	1382	#if 0
	1383	vrele(fvp);
	1384	fvp = NULL;
	1385	vn_lock(fdvp, LK_EXCLUSIVE \| LK_RETRY);
	1386	error = union_mkshadow(um, fdvp,
	1387	ap->a_fcnp, &un->un_uppervp);
	1388	vn_unlock(fdvp);
	1389	if (un->un_uppervp)
	1390	vn_unlock(un->un_uppervp);
	1391	if (error)
	1392	goto bad;
	1393	break;
	1394	#endif
	1395	default:
	1396	error = EXDEV;
	1397	goto bad;
	1398	}
	1399	}
	1400
	1401	if (un->un_lowervp != NULLVP)
	1402	ap->a_fcnp->cn_flags \|= CNP_DOWHITEOUT;
	1403	fvp = un->un_uppervp;
	1404	vref(fvp);
	1405	vrele(ap->a_fvp);
	1406	}
	1407
	1408	/*
	1409	* Figure out what tdvp (destination directory) to pass to the
	1410	* lower level. If we replace it with uppervp, we need to vput the
	1411	* old one. The exclusive lock is transfered to what we will pass
	1412	* down in the VOP_RENAME and we replace uppervp with a simple
	1413	* reference.
	1414	*/
	1415
	1416	if (tdvp->v_tag == VT_UNION) {
	1417	struct union_node *un = VTOUNION(tdvp);
	1418
	1419	if (un->un_uppervp == NULLVP) {
	1420	/*
	1421	* this should never happen in normal
	1422	* operation but might if there was
	1423	* a problem creating the top-level shadow
	1424	* directory.
	1425	*/
	1426	error = EXDEV;
	1427	goto bad;
	1428	}
	1429
	1430	/*
	1431	* new tdvp is a lock and reference on uppervp, put away
	1432	* the old tdvp.
	1433	*/
	1434	tdvp = union_lock_upper(un, ap->a_tcnp->cn_td);
	1435	vput(ap->a_tdvp);
	1436	}
	1437
	1438	/*
	1439	* Figure out what tvp (destination file) to pass to the
	1440	* lower level.
	1441	*
	1442	* If the uppervp file does not exist put away the (wrong)
	1443	* file and change tvp to NULL.
	1444	*/
	1445
	1446	if (tvp != NULLVP && tvp->v_tag == VT_UNION) {
	1447	struct union_node *un = VTOUNION(tvp);
	1448
	1449	tvp = union_lock_upper(un, ap->a_tcnp->cn_td);
	1450	vput(ap->a_tvp);
	1451	/* note: tvp may be NULL */
	1452	}
	1453
	1454	/*
	1455	* VOP_RENAME releases/vputs prior to returning, so we have no
	1456	* cleanup to do.
	1457	*/
	1458
	1459	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
	1460
	1461	/*
	1462	* Error. We still have to release / vput the various elements.
	1463	*/
	1464
	1465	bad:
	1466	vrele(fdvp);
	1467	if (fvp)
	1468	vrele(fvp);
	1469	vput(tdvp);
	1470	if (tvp != NULLVP) {
	1471	if (tvp != tdvp)
	1472	vput(tvp);
	1473	else
	1474	vrele(tvp);
	1475	}
	1476	return (error);
	1477	}
	1478
	1479	/*
	1480	* union_mkdir(struct vnode a_dvp, struct vnode *a_vpp,
	1481	* struct componentname a_cnp, struct vattr a_vap)
	1482	*/
	1483	static int
	1484	union_mkdir(struct vop_old_mkdir_args *ap)
	1485	{
	1486	struct union_node *dun = VTOUNION(ap->a_dvp);
	1487	struct componentname *cnp = ap->a_cnp;
	1488	struct thread *td = cnp->cn_td;
	1489	struct vnode *upperdvp;
	1490	int error = EROFS;
	1491
	1492	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
	1493	struct vnode *vp;
	1494
	1495	error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
	1496	union_unlock_upper(upperdvp, td);
	1497
	1498	if (error == 0) {
	1499	vn_unlock(vp);
	1500	UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vp->v_usecount));
	1501	error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
	1502	ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
	1503	UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vp->v_usecount));
	1504	}
	1505	}
	1506	return (error);
	1507	}
	1508
	1509	/*
	1510	* union_rmdir(struct vnode a_dvp, struct vnode a_vp,
	1511	* struct componentname *a_cnp)
	1512	*/
	1513	static int
	1514	union_rmdir(struct vop_old_rmdir_args *ap)
	1515	{
	1516	struct union_node *dun = VTOUNION(ap->a_dvp);
	1517	struct union_node *un = VTOUNION(ap->a_vp);
	1518	struct componentname *cnp = ap->a_cnp;
	1519	struct thread *td = cnp->cn_td;
	1520	struct vnode *upperdvp;
	1521	struct vnode *uppervp;
	1522	int error;
	1523
	1524	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
	1525	panic("union rmdir: null upper vnode");
	1526
	1527	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
	1528	if (union_dowhiteout(un, cnp->cn_cred, td))
	1529	cnp->cn_flags \|= CNP_DOWHITEOUT;
	1530	error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
	1531	union_unlock_upper(uppervp, td);
	1532	} else {
	1533	error = union_mkwhiteout(
	1534	MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
	1535	dun->un_uppervp, ap->a_cnp, un->un_path);
	1536	}
	1537	union_unlock_upper(upperdvp, td);
	1538	return (error);
	1539	}
	1540
	1541	/*
	1542	* union_symlink:
	1543	*
	1544	* dvp is locked on entry and remains locked on return. a_vpp is garbage
	1545	* (unused).
	1546	*
	1547	* union_symlink(struct vnode a_dvp, struct vnode *a_vpp,
	1548	* struct componentname a_cnp, struct vattr a_vap,
	1549	* char *a_target)
	1550	*/
	1551	static int
	1552	union_symlink(struct vop_old_symlink_args *ap)
	1553	{
	1554	struct union_node *dun = VTOUNION(ap->a_dvp);
	1555	struct componentname *cnp = ap->a_cnp;
	1556	struct thread *td = cnp->cn_td;
	1557	struct vnode *dvp;
	1558	int error = EROFS;
	1559
	1560	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
	1561	error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
	1562	ap->a_target);
	1563	union_unlock_upper(dvp, td);
	1564	}
	1565	return (error);
	1566	}
	1567
	1568	/*
	1569	* union_readdir works in concert with getdirentries and
	1570	* readdir(3) to provide a list of entries in the unioned
	1571	* directories. getdirentries is responsible for walking
	1572	* down the union stack. readdir(3) is responsible for
	1573	* eliminating duplicate names from the returned data stream.
	1574	*
	1575	* union_readdir(struct vnode a_vp, struct uio a_uio, struct ucred *a_cred,
	1576	* int a_eofflag, u_long a_cookies, int a_ncookies)
	1577	*/
	1578	static int
	1579	union_readdir(struct vop_readdir_args *ap)
	1580	{
	1581	struct union_node *un = VTOUNION(ap->a_vp);
	1582	struct thread *td = ap->a_uio->uio_td;
	1583	struct vnode *uvp;
	1584	int error = 0;
	1585
	1586	if ((uvp = union_ref_upper(un)) != NULLVP) {
	1587	ap->a_head.a_ops = *uvp->v_ops;
	1588	ap->a_vp = uvp;
	1589	error = vop_readdir_ap(ap);
	1590	vrele(uvp);
	1591	}
	1592	return(error);
	1593	}
	1594
	1595	/*
	1596	* union_readlink(struct vnode a_vp, struct uio a_uio, struct ucred *a_cred)
	1597	*/
	1598	static int
	1599	union_readlink(struct vop_readlink_args *ap)
	1600	{
	1601	int error;
	1602	struct union_node *un = VTOUNION(ap->a_vp);
	1603	struct uio *uio = ap->a_uio;
	1604	struct thread *td = uio->uio_td;
	1605	struct vnode *vp;
	1606
	1607	vp = union_lock_other(un, td);
	1608	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
	1609
	1610	ap->a_head.a_ops = *vp->v_ops;
	1611	ap->a_vp = vp;
	1612	error = vop_readlink_ap(ap);
	1613	union_unlock_other(vp, td);
	1614
	1615	return (error);
	1616	}
	1617
	1618	/*
	1619	* union_inactive:
	1620	*
	1621	* Called with the vnode locked. We are expected to unlock the vnode.
	1622	*
	1623	* union_inactive(struct vnode a_vp, struct thread a_td)
	1624	*/
	1625	static int
	1626	union_inactive(struct vop_inactive_args *ap)
	1627	{
	1628	struct vnode *vp = ap->a_vp;
	1629	/struct thread td = ap->a_td;*/
	1630	struct union_node *un = VTOUNION(vp);
	1631	struct vnode **vpp;
	1632
	1633	/*
	1634	* Do nothing (and _don't_ bypass).
	1635	* Wait to vrele lowervp until reclaim,
	1636	* so that until then our union_node is in the
	1637	* cache and reusable.
	1638	*
	1639	* NEEDSWORK: Someday, consider inactive'ing
	1640	* the lowervp and then trying to reactivate it
	1641	* with capabilities (v_id)
	1642	* like they do in the name lookup cache code.
	1643	* That's too much work for now.
	1644	*/
	1645
	1646	if (un->un_dircache != 0) {
	1647	for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
	1648	vrele(*vpp);
	1649	kfree (un->un_dircache, M_TEMP);
	1650	un->un_dircache = 0;
	1651	}
	1652
	1653	#if 0
	1654	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
	1655	un->un_flags &= ~UN_ULOCK;
	1656	vn_unlock(un->un_uppervp);
	1657	}
	1658	#endif
	1659
	1660	if ((un->un_flags & UN_CACHED) == 0)
	1661	vgone(vp);
	1662
	1663	return (0);
	1664	}
	1665
	1666	/*
	1667	* union_reclaim(struct vnode *a_vp)
	1668	*/
	1669	static int
	1670	union_reclaim(struct vop_reclaim_args *ap)
	1671	{
	1672	union_freevp(ap->a_vp);
	1673
	1674	return (0);
	1675	}
	1676
	1677	/*
	1678	* union_bmap:
	1679	*
	1680	* There isn't much we can do. We cannot push through to the real vnode
	1681	* to get to the underlying device because this will bypass data
	1682	* cached by the real vnode.
	1683	*
	1684	* For some reason we cannot return the 'real' vnode either, it seems
	1685	* to blow up memory maps.
	1686	*
	1687	* union_bmap(struct vnode a_vp, off_t a_loffset, struct vnode *a_vpp,
	1688	* off_t a_doffsetp, int a_runp, int *a_runb)
	1689	*/
	1690	static int
	1691	union_bmap(struct vop_bmap_args *ap)
	1692	{
	1693	return(EOPNOTSUPP);
	1694	}
	1695
	1696	/*
	1697	* union_print(struct vnode *a_vp)
	1698	*/
	1699	static int
	1700	union_print(struct vop_print_args *ap)
	1701	{
	1702	struct vnode *vp = ap->a_vp;
	1703
	1704	kprintf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
	1705	vp, UPPERVP(vp), LOWERVP(vp));
	1706	if (UPPERVP(vp) != NULLVP)
	1707	vprint("union: upper", UPPERVP(vp));
	1708	if (LOWERVP(vp) != NULLVP)
	1709	vprint("union: lower", LOWERVP(vp));
	1710
	1711	return (0);
	1712	}
	1713
	1714	/*
	1715	* union_pathconf(struct vnode a_vp, int a_name, int a_retval)
	1716	*/
	1717	static int
	1718	union_pathconf(struct vop_pathconf_args *ap)
	1719	{
	1720	int error;
	1721	struct thread td = curthread; / XXX */
	1722	struct union_node *un = VTOUNION(ap->a_vp);
	1723	struct vnode *vp;
	1724
	1725	vp = union_lock_other(un, td);
	1726	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
	1727
	1728	ap->a_head.a_ops = *vp->v_ops;
	1729	ap->a_vp = vp;
	1730	error = vop_pathconf_ap(ap);
	1731	union_unlock_other(vp, td);
	1732
	1733	return (error);
	1734	}
	1735
	1736	/*
	1737	* union_advlock(struct vnode *a_vp, caddr_t a_id, int a_op,
	1738	* struct flock *a_fl, int a_flags)
	1739	*/
	1740	static int
	1741	union_advlock(struct vop_advlock_args *ap)
	1742	{
	1743	struct vnode *ovp = OTHERVP(ap->a_vp);
	1744
	1745	ap->a_head.a_ops = *ovp->v_ops;
	1746	ap->a_vp = ovp;
	1747	return (vop_advlock_ap(ap));
	1748	}
	1749
	1750
	1751	/*
	1752	* XXX - vop_strategy must be hand coded because it has no
	1753	* YYY - and it is not coherent with anything
	1754	*
	1755	* vnode in its arguments.
	1756	* This goes away with a merged VM/buffer cache.
	1757	*
	1758	* union_strategy(struct vnode a_vp, struct bio a_bio)
	1759	*/
	1760	static int
	1761	union_strategy(struct vop_strategy_args *ap)
	1762	{
	1763	struct bio *bio = ap->a_bio;
	1764	struct buf *bp = bio->bio_buf;
	1765	struct vnode *othervp = OTHERVP(ap->a_vp);
	1766
	1767	#ifdef DIAGNOSTIC
	1768	if (othervp == NULLVP)
	1769	panic("union_strategy: nil vp");
	1770	if (bp->b_cmd != BUF_CMD_READ && (othervp == LOWERVP(ap->a_vp)))
	1771	panic("union_strategy: writing to lowervp");
	1772	#endif
	1773	return (vn_strategy(othervp, bio));
	1774	}
	1775
	1776	/*
	1777	* Global vfs data structures
	1778	*/
	1779	struct vop_ops union_vnode_vops = {
	1780	.vop_default = vop_defaultop,
	1781	.vop_access = union_access,
	1782	.vop_advlock = union_advlock,
	1783	.vop_bmap = union_bmap,
	1784	.vop_close = union_close,
	1785	.vop_old_create = union_create,
	1786	.vop_fsync = union_fsync,
	1787	.vop_getpages = union_getpages,
	1788	.vop_putpages = union_putpages,
	1789	.vop_getattr = union_getattr,
	1790	.vop_inactive = union_inactive,
	1791	.vop_ioctl = union_ioctl,
	1792	.vop_old_link = union_link,
	1793	.vop_old_lookup = union_lookup,
	1794	.vop_old_mkdir = union_mkdir,
	1795	.vop_old_mknod = union_mknod,
	1796	.vop_mmap = union_mmap,
	1797	.vop_open = union_open,
	1798	.vop_pathconf = union_pathconf,
	1799	.vop_poll = union_poll,
	1800	.vop_print = union_print,
	1801	.vop_read = union_read,
	1802	.vop_readdir = union_readdir,
	1803	.vop_readlink = union_readlink,
	1804	.vop_reclaim = union_reclaim,
	1805	.vop_old_remove = union_remove,
	1806	.vop_old_rename = union_rename,
	1807	.vop_revoke = union_revoke,
	1808	.vop_old_rmdir = union_rmdir,
	1809	.vop_setattr = union_setattr,
	1810	.vop_strategy = union_strategy,
	1811	.vop_old_symlink = union_symlink,
	1812	.vop_old_whiteout = union_whiteout,
	1813	.vop_write = union_write
	1814	};
	1815