gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
	3	*
	4	* This code is derived from software contributed to The DragonFly Project
	5	* by Matthew Dillon <dillon@backplane.com>
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* 1. Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* 2. Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in
	15	* the documentation and/or other materials provided with the
	16	* distribution.
	17	* 3. Neither the name of The DragonFly Project nor the names of its
	18	* contributors may be used to endorse or promote products derived
	19	* from this software without specific, prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	22	* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	23	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	24	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	25	* COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	26	* INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
	27	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	28	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	29	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
	30	* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
	31	* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	32	* SUCH DAMAGE.
	33	*
	34	* $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.60 2008/05/18 01:48:50 dillon Exp $
	35	*/
	36
	37	#include "hammer.h"
	38	#include <vm/vm_extern.h>
	39	#include <sys/buf.h>
	40	#include <sys/buf2.h>
	41
	42	static int hammer_unload_inode(struct hammer_inode *ip);
	43	static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
	44	static int hammer_setup_child_callback(hammer_record_t rec, void *data);
	45	static int hammer_setup_parent_inodes(hammer_record_t record);
	46
	47	/*
	48	* The kernel is not actively referencing this vnode but is still holding
	49	* it cached.
	50	*
	51	* This is called from the frontend.
	52	*/
	53	int
	54	hammer_vop_inactive(struct vop_inactive_args *ap)
	55	{
	56	struct hammer_inode *ip = VTOI(ap->a_vp);
	57
	58	/*
	59	* Degenerate case
	60	*/
	61	if (ip == NULL) {
	62	vrecycle(ap->a_vp);
	63	return(0);
	64	}
	65
	66	/*
	67	* If the inode no longer has visibility in the filesystem and is
	68	* fairly clean, try to recycle it immediately. This can deadlock
	69	* in vfsync() if we aren't careful.
	70	*
	71	* Do not queue the inode to the flusher if we still have visibility,
	72	* otherwise namespace calls such as chmod will unnecessarily generate
	73	* multiple inode updates.
	74	*/
	75	hammer_inode_unloadable_check(ip, 0);
	76	if (ip->ino_data.nlinks == 0) {
	77	if (ip->flags & HAMMER_INODE_MODMASK)
	78	hammer_flush_inode(ip, 0);
	79	else
	80	vrecycle(ap->a_vp);
	81	}
	82	return(0);
	83	}
	84
	85	/*
	86	* Release the vnode association. This is typically (but not always)
	87	* the last reference on the inode.
	88	*
	89	* Once the association is lost we are on our own with regards to
	90	* flushing the inode.
	91	*/
	92	int
	93	hammer_vop_reclaim(struct vop_reclaim_args *ap)
	94	{
	95	struct hammer_inode *ip;
	96	struct vnode *vp;
	97
	98	vp = ap->a_vp;
	99
	100	if ((ip = vp->v_data) != NULL) {
	101	vp->v_data = NULL;
	102	ip->vp = NULL;
	103	hammer_rel_inode(ip, 1);
	104	}
	105	return(0);
	106	}
	107
	108	/*
	109	* Return a locked vnode for the specified inode. The inode must be
	110	* referenced but NOT LOCKED on entry and will remain referenced on
	111	* return.
	112	*
	113	* Called from the frontend.
	114	*/
	115	int
	116	hammer_get_vnode(struct hammer_inode ip, struct vnode *vpp)
	117	{
	118	struct vnode *vp;
	119	int error = 0;
	120
	121	for (;;) {
	122	if ((vp = ip->vp) == NULL) {
	123	error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
	124	if (error)
	125	break;
	126	hammer_lock_ex(&ip->lock);
	127	if (ip->vp != NULL) {
	128	hammer_unlock(&ip->lock);
	129	vp->v_type = VBAD;
	130	vx_put(vp);
	131	continue;
	132	}
	133	hammer_ref(&ip->lock);
	134	vp = *vpp;
	135	ip->vp = vp;
	136	vp->v_type =
	137	hammer_get_vnode_type(ip->ino_data.obj_type);
	138
	139	switch(ip->ino_data.obj_type) {
	140	case HAMMER_OBJTYPE_CDEV:
	141	case HAMMER_OBJTYPE_BDEV:
	142	vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
	143	addaliasu(vp, ip->ino_data.rmajor,
	144	ip->ino_data.rminor);
	145	break;
	146	case HAMMER_OBJTYPE_FIFO:
	147	vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
	148	break;
	149	default:
	150	break;
	151	}
	152
	153	/*
	154	* Only mark as the root vnode if the ip is not
	155	* historical, otherwise the VFS cache will get
	156	* confused. The other half of the special handling
	157	* is in hammer_vop_nlookupdotdot().
	158	*/
	159	if (ip->obj_id == HAMMER_OBJID_ROOT &&
	160	ip->obj_asof == ip->hmp->asof) {
	161	vp->v_flag \|= VROOT;
	162	}
	163
	164	vp->v_data = (void *)ip;
	165	/* vnode locked by getnewvnode() */
	166	/* make related vnode dirty if inode dirty? */
	167	hammer_unlock(&ip->lock);
	168	if (vp->v_type == VREG)
	169	vinitvmio(vp, ip->ino_data.size);
	170	break;
	171	}
	172
	173	/*
	174	* loop if the vget fails (aka races), or if the vp
	175	* no longer matches ip->vp.
	176	*/
	177	if (vget(vp, LK_EXCLUSIVE) == 0) {
	178	if (vp == ip->vp)
	179	break;
	180	vput(vp);
	181	}
	182	}
	183	*vpp = vp;
	184	return(error);
	185	}
	186
	187	/*
	188	* Acquire a HAMMER inode. The returned inode is not locked. These functions
	189	* do not attach or detach the related vnode (use hammer_get_vnode() for
	190	* that).
	191	*
	192	* The flags argument is only applied for newly created inodes, and only
	193	* certain flags are inherited.
	194	*
	195	* Called from the frontend.
	196	*/
	197	struct hammer_inode *
	198	hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
	199	u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
	200	{
	201	hammer_mount_t hmp = trans->hmp;
	202	struct hammer_inode_info iinfo;
	203	struct hammer_cursor cursor;
	204	struct hammer_inode *ip;
	205
	206	/*
	207	* Determine if we already have an inode cached. If we do then
	208	* we are golden.
	209	*/
	210	iinfo.obj_id = obj_id;
	211	iinfo.obj_asof = asof;
	212	loop:
	213	ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
	214	if (ip) {
	215	hammer_ref(&ip->lock);
	216	*errorp = 0;
	217	return(ip);
	218	}
	219
	220	ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK\|M_ZERO);
	221	++hammer_count_inodes;
	222	ip->obj_id = obj_id;
	223	ip->obj_asof = iinfo.obj_asof;
	224	ip->hmp = hmp;
	225	ip->flags = flags & HAMMER_INODE_RO;
	226	if (hmp->ronly)
	227	ip->flags \|= HAMMER_INODE_RO;
	228	ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
	229	RB_INIT(&ip->rec_tree);
	230	TAILQ_INIT(&ip->bio_list);
	231	TAILQ_INIT(&ip->bio_alt_list);
	232	TAILQ_INIT(&ip->target_list);
	233
	234	/*
	235	* Locate the on-disk inode.
	236	*/
	237	retry:
	238	hammer_init_cursor(trans, &cursor, cache, NULL);
	239	cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
	240	cursor.key_beg.obj_id = ip->obj_id;
	241	cursor.key_beg.key = 0;
	242	cursor.key_beg.create_tid = 0;
	243	cursor.key_beg.delete_tid = 0;
	244	cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
	245	cursor.key_beg.obj_type = 0;
	246	cursor.asof = iinfo.obj_asof;
	247	cursor.flags = HAMMER_CURSOR_GET_LEAF \| HAMMER_CURSOR_GET_DATA \|
	248	HAMMER_CURSOR_ASOF;
	249
	250	*errorp = hammer_btree_lookup(&cursor);
	251	if (*errorp == EDEADLK) {
	252	hammer_done_cursor(&cursor);
	253	goto retry;
	254	}
	255
	256	/*
	257	* On success the B-Tree lookup will hold the appropriate
	258	* buffer cache buffers and provide a pointer to the requested
	259	* information. Copy the information to the in-memory inode
	260	* and cache the B-Tree node to improve future operations.
	261	*/
	262	if (*errorp == 0) {
	263	ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
	264	ip->ino_data = cursor.data->inode;
	265	hammer_cache_node(cursor.node, &ip->cache[0]);
	266	if (cache)
	267	hammer_cache_node(cursor.node, cache);
	268	}
	269
	270	/*
	271	* On success load the inode's record and data and insert the
	272	* inode into the B-Tree. It is possible to race another lookup
	273	* insertion of the same inode so deal with that condition too.
	274	*
	275	* The cursor's locked node interlocks against others creating and
	276	* destroying ip while we were blocked.
	277	*/
	278	if (*errorp == 0) {
	279	hammer_ref(&ip->lock);
	280	if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
	281	hammer_uncache_node(&ip->cache[0]);
	282	hammer_uncache_node(&ip->cache[1]);
	283	KKASSERT(ip->lock.refs == 1);
	284	--hammer_count_inodes;
	285	kfree(ip, M_HAMMER);
	286	hammer_done_cursor(&cursor);
	287	goto loop;
	288	}
	289	ip->flags \|= HAMMER_INODE_ONDISK;
	290	} else {
	291	/*
	292	* Do not panic on read-only accesses which fail, particularly
	293	* historical accesses where the snapshot might not have
	294	* complete connectivity.
	295	*/
	296	if ((flags & HAMMER_INODE_RO) == 0) {
	297	kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
	298	ip, ip->obj_id, &cursor, *errorp);
	299	Debugger("x");
	300	}
	301	--hammer_count_inodes;
	302	kfree(ip, M_HAMMER);
	303	ip = NULL;
	304	}
	305	hammer_done_cursor(&cursor);
	306	return (ip);
	307	}
	308
	309	/*
	310	* Create a new filesystem object, returning the inode in *ipp. The
	311	* returned inode will be referenced.
	312	*
	313	* The inode is created in-memory.
	314	*/
	315	int
	316	hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
	317	struct ucred *cred, hammer_inode_t dip,
	318	struct hammer_inode **ipp)
	319	{
	320	hammer_mount_t hmp;
	321	hammer_inode_t ip;
	322	uid_t xuid;
	323
	324	hmp = trans->hmp;
	325	ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK\|M_ZERO);
	326	++hammer_count_inodes;
	327	ip->obj_id = hammer_alloc_objid(trans, dip);
	328	KKASSERT(ip->obj_id != 0);
	329	ip->obj_asof = hmp->asof;
	330	ip->hmp = hmp;
	331	ip->flush_state = HAMMER_FST_IDLE;
	332	ip->flags = HAMMER_INODE_DDIRTY \| HAMMER_INODE_ITIMES;
	333
	334	ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
	335	RB_INIT(&ip->rec_tree);
	336	TAILQ_INIT(&ip->bio_list);
	337	TAILQ_INIT(&ip->bio_alt_list);
	338	TAILQ_INIT(&ip->target_list);
	339
	340	ip->ino_leaf.atime = trans->time;
	341	ip->ino_data.mtime = trans->time;
	342	ip->ino_data.size = 0;
	343	ip->ino_data.nlinks = 0;
	344	/* XXX */
	345	ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
	346	ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
	347	ip->ino_leaf.base.obj_id = ip->obj_id;
	348	ip->ino_leaf.base.key = 0;
	349	ip->ino_leaf.base.create_tid = 0;
	350	ip->ino_leaf.base.delete_tid = 0;
	351	ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
	352	ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
	353
	354	ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
	355	ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
	356	ip->ino_data.mode = vap->va_mode;
	357	ip->ino_data.ctime = trans->time;
	358	ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
	359
	360	switch(ip->ino_leaf.base.obj_type) {
	361	case HAMMER_OBJTYPE_CDEV:
	362	case HAMMER_OBJTYPE_BDEV:
	363	ip->ino_data.rmajor = vap->va_rmajor;
	364	ip->ino_data.rminor = vap->va_rminor;
	365	break;
	366	default:
	367	break;
	368	}
	369
	370	/*
	371	* Calculate default uid/gid and overwrite with information from
	372	* the vap.
	373	*/
	374	xuid = hammer_to_unix_xid(&dip->ino_data.uid);
	375	ip->ino_data.gid = dip->ino_data.gid;
	376	xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
	377	&vap->va_mode);
	378	ip->ino_data.mode = vap->va_mode;
	379
	380	if (vap->va_vaflags & VA_UID_UUID_VALID)
	381	ip->ino_data.uid = vap->va_uid_uuid;
	382	else if (vap->va_uid != (uid_t)VNOVAL)
	383	hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
	384	if (vap->va_vaflags & VA_GID_UUID_VALID)
	385	ip->ino_data.gid = vap->va_gid_uuid;
	386	else if (vap->va_gid != (gid_t)VNOVAL)
	387	hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
	388
	389	hammer_ref(&ip->lock);
	390	if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
	391	hammer_unref(&ip->lock);
	392	panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
	393	}
	394	*ipp = ip;
	395	return(0);
	396	}
	397
	398	/*
	399	* Called by hammer_sync_inode().
	400	*/
	401	static int
	402	hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
	403	{
	404	hammer_transaction_t trans = cursor->trans;
	405	hammer_record_t record;
	406	int error;
	407
	408	retry:
	409	error = 0;
	410
	411	/*
	412	* If the inode has a presence on-disk then locate it and mark
	413	* it deleted, setting DELONDISK.
	414	*
	415	* The record may or may not be physically deleted, depending on
	416	* the retention policy.
	417	*/
	418	if ((ip->flags & (HAMMER_INODE_ONDISK\|HAMMER_INODE_DELONDISK)) ==
	419	HAMMER_INODE_ONDISK) {
	420	hammer_normalize_cursor(cursor);
	421	cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
	422	cursor->key_beg.obj_id = ip->obj_id;
	423	cursor->key_beg.key = 0;
	424	cursor->key_beg.create_tid = 0;
	425	cursor->key_beg.delete_tid = 0;
	426	cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
	427	cursor->key_beg.obj_type = 0;
	428	cursor->asof = ip->obj_asof;
	429	cursor->flags &= ~HAMMER_CURSOR_INITMASK;
	430	cursor->flags \|= HAMMER_CURSOR_GET_LEAF \| HAMMER_CURSOR_ASOF;
	431	cursor->flags \|= HAMMER_CURSOR_BACKEND;
	432
	433	error = hammer_btree_lookup(cursor);
	434	if (hammer_debug_inode)
	435	kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
	436	if (error) {
	437	kprintf("error %d\n", error);
	438	Debugger("hammer_update_inode");
	439	}
	440
	441	if (error == 0) {
	442	error = hammer_ip_delete_record(cursor, trans->tid);
	443	if (hammer_debug_inode)
	444	kprintf(" error %d\n", error);
	445	if (error && error != EDEADLK) {
	446	kprintf("error %d\n", error);
	447	Debugger("hammer_update_inode2");
	448	}
	449	if (error == 0) {
	450	ip->flags \|= HAMMER_INODE_DELONDISK;
	451	}
	452	if (cursor->node)
	453	hammer_cache_node(cursor->node, &ip->cache[0]);
	454	}
	455	if (error == EDEADLK) {
	456	hammer_done_cursor(cursor);
	457	error = hammer_init_cursor(trans, cursor,
	458	&ip->cache[0], ip);
	459	if (hammer_debug_inode)
	460	kprintf("IPDED %p %d\n", ip, error);
	461	if (error == 0)
	462	goto retry;
	463	}
	464	}
	465
	466	/*
	467	* Ok, write out the initial record or a new record (after deleting
	468	* the old one), unless the DELETED flag is set. This routine will
	469	* clear DELONDISK if it writes out a record.
	470	*
	471	* Update our inode statistics if this is the first application of
	472	* the inode on-disk.
	473	*/
	474	if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
	475	/*
	476	* Generate a record and write it to the media
	477	*/
	478	record = hammer_alloc_mem_record(ip, 0);
	479	record->type = HAMMER_MEM_RECORD_INODE;
	480	record->flush_state = HAMMER_FST_FLUSH;
	481	record->leaf = ip->sync_ino_leaf;
	482	record->leaf.base.create_tid = trans->tid;
	483	record->leaf.data_len = sizeof(ip->sync_ino_data);
	484	record->data = (void *)&ip->sync_ino_data;
	485	record->flags \|= HAMMER_RECF_INTERLOCK_BE;
	486	for (;;) {
	487	error = hammer_ip_sync_record_cursor(cursor, record);
	488	if (hammer_debug_inode)
	489	kprintf("GENREC %p rec %08x %d\n",
	490	ip, record->flags, error);
	491	if (error != EDEADLK)
	492	break;
	493	hammer_done_cursor(cursor);
	494	error = hammer_init_cursor(trans, cursor,
	495	&ip->cache[0], ip);
	496	if (hammer_debug_inode)
	497	kprintf("GENREC reinit %d\n", error);
	498	if (error)
	499	break;
	500	}
	501	if (error) {
	502	kprintf("error %d\n", error);
	503	Debugger("hammer_update_inode3");
	504	}
	505
	506	/*
	507	* The record isn't managed by the inode's record tree,
	508	* destroy it whether we succeed or fail.
	509	*/
	510	record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
	511	record->flags \|= HAMMER_RECF_DELETED_FE;
	512	record->flush_state = HAMMER_FST_IDLE;
	513	hammer_rel_mem_record(record);
	514
	515	/*
	516	* Finish up.
	517	*/
	518	if (error == 0) {
	519	if (hammer_debug_inode)
	520	kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
	521	ip->sync_flags &= ~(HAMMER_INODE_DDIRTY \|
	522	HAMMER_INODE_ITIMES);
	523	ip->flags &= ~HAMMER_INODE_DELONDISK;
	524
	525	/*
	526	* Root volume count of inodes
	527	*/
	528	if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
	529	hammer_modify_volume_field(trans,
	530	trans->rootvol,
	531	vol0_stat_inodes);
	532	++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
	533	hammer_modify_volume_done(trans->rootvol);
	534	ip->flags \|= HAMMER_INODE_ONDISK;
	535	if (hammer_debug_inode)
	536	kprintf("NOWONDISK %p\n", ip);
	537	}
	538	}
	539	}
	540
	541	/*
	542	* If the inode has been destroyed, clean out any left-over flags
	543	* that may have been set by the frontend.
	544	*/
	545	if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
	546	ip->sync_flags &= ~(HAMMER_INODE_DDIRTY \|
	547	HAMMER_INODE_ITIMES);
	548	}
	549	return(error);
	550	}
	551
	552	/*
	553	* Update only the itimes fields. This is done no-historically. The
	554	* record is updated in-place on the disk.
	555	*/
	556	static int
	557	hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
	558	{
	559	hammer_transaction_t trans = cursor->trans;
	560	struct hammer_btree_leaf_elm *leaf;
	561	int error;
	562
	563	retry:
	564	error = 0;
	565	if ((ip->flags & (HAMMER_INODE_ONDISK\|HAMMER_INODE_DELONDISK)) ==
	566	HAMMER_INODE_ONDISK) {
	567	hammer_normalize_cursor(cursor);
	568	cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
	569	cursor->key_beg.obj_id = ip->obj_id;
	570	cursor->key_beg.key = 0;
	571	cursor->key_beg.create_tid = 0;
	572	cursor->key_beg.delete_tid = 0;
	573	cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
	574	cursor->key_beg.obj_type = 0;
	575	cursor->asof = ip->obj_asof;
	576	cursor->flags &= ~HAMMER_CURSOR_INITMASK;
	577	cursor->flags \|= HAMMER_CURSOR_GET_LEAF \| HAMMER_CURSOR_ASOF;
	578	cursor->flags \|= HAMMER_CURSOR_BACKEND;
	579
	580	error = hammer_btree_lookup(cursor);
	581	if (error) {
	582	kprintf("error %d\n", error);
	583	Debugger("hammer_update_itimes1");
	584	}
	585	if (error == 0) {
	586	/*
	587	* Do not generate UNDO records for atime updates.
	588	*/
	589	leaf = cursor->leaf;
	590	hammer_modify_node(trans, cursor->node,
	591	&leaf->atime, sizeof(leaf->atime));
	592	leaf->atime = ip->sync_ino_leaf.atime;
	593	hammer_modify_node_done(cursor->node);
	594	/rec->ino_mtime = ip->sync_ino_rec.ino_mtime;/
	595	ip->sync_flags &= ~HAMMER_INODE_ITIMES;
	596	/* XXX recalculate crc */
	597	hammer_cache_node(cursor->node, &ip->cache[0]);
	598	}
	599	if (error == EDEADLK) {
	600	hammer_done_cursor(cursor);
	601	error = hammer_init_cursor(trans, cursor,
	602	&ip->cache[0], ip);
	603	if (error == 0)
	604	goto retry;
	605	}
	606	}
	607	return(error);
	608	}
	609
	610	/*
	611	* Release a reference on an inode, flush as requested.
	612	*
	613	* On the last reference we queue the inode to the flusher for its final
	614	* disposition.
	615	*/
	616	void
	617	hammer_rel_inode(struct hammer_inode *ip, int flush)
	618	{
	619	hammer_mount_t hmp = ip->hmp;
	620
	621	/*
	622	* Handle disposition when dropping the last ref.
	623	*/
	624	for (;;) {
	625	if (ip->lock.refs == 1) {
	626	/*
	627	* Determine whether on-disk action is needed for
	628	* the inode's final disposition.
	629	*/
	630	KKASSERT(ip->vp == NULL);
	631	hammer_inode_unloadable_check(ip, 0);
	632	if (ip->flags & HAMMER_INODE_MODMASK) {
	633	hammer_flush_inode(ip, 0);
	634	} else if (ip->lock.refs == 1) {
	635	hammer_unload_inode(ip);
	636	break;
	637	}
	638	} else {
	639	if (flush)
	640	hammer_flush_inode(ip, 0);
	641
	642	/*
	643	* The inode still has multiple refs, try to drop
	644	* one ref.
	645	*/
	646	KKASSERT(ip->lock.refs >= 1);
	647	if (ip->lock.refs > 1) {
	648	hammer_unref(&ip->lock);
	649	break;
	650	}
	651	}
	652	}
	653
	654	/*
	655	* XXX bad hack until I add code to track inodes in SETUP. We
	656	* can queue a lot of inodes to the syncer but if we don't wake
	657	* it up the undo sets will be too large or too many unflushed
	658	* records will build up and blow our malloc limit.
	659	*/
	660	if (++hmp->reclaim_count > 256) {
	661	hmp->reclaim_count = 0;
	662	hammer_flusher_async(hmp);
	663	}
	664	}
	665
	666	/*
	667	* Unload and destroy the specified inode. Must be called with one remaining
	668	* reference. The reference is disposed of.
	669	*
	670	* This can only be called in the context of the flusher.
	671	*/
	672	static int
	673	hammer_unload_inode(struct hammer_inode *ip)
	674	{
	675	KASSERT(ip->lock.refs == 1,
	676	("hammer_unload_inode: %d refs\n", ip->lock.refs));
	677	KKASSERT(ip->vp == NULL);
	678	KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
	679	KKASSERT(ip->cursor_ip_refs == 0);
	680	KKASSERT(ip->lock.lockcount == 0);
	681	KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
	682
	683	KKASSERT(RB_EMPTY(&ip->rec_tree));
	684	KKASSERT(TAILQ_EMPTY(&ip->target_list));
	685	KKASSERT(TAILQ_EMPTY(&ip->bio_list));
	686	KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
	687
	688	RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
	689
	690	hammer_uncache_node(&ip->cache[0]);
	691	hammer_uncache_node(&ip->cache[1]);
	692	if (ip->objid_cache)
	693	hammer_clear_objid(ip);
	694	--hammer_count_inodes;
	695	kfree(ip, M_HAMMER);
	696
	697	return(0);
	698	}
	699
	700	/*
	701	* A transaction has modified an inode, requiring updates as specified by
	702	* the passed flags.
	703	*
	704	* HAMMER_INODE_DDIRTY: Inode data has been updated
	705	* HAMMER_INODE_XDIRTY: Dirty in-memory records
	706	* HAMMER_INODE_BUFS: Dirty buffer cache buffers
	707	* HAMMER_INODE_DELETED: Inode record/data must be deleted
	708	* HAMMER_INODE_ITIMES: mtime/atime has been updated
	709	*/
	710	void
	711	hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
	712	{
	713	KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 \|\|
	714	(flags & (HAMMER_INODE_DDIRTY \|
	715	HAMMER_INODE_XDIRTY \| HAMMER_INODE_BUFS \|
	716	HAMMER_INODE_DELETED \| HAMMER_INODE_ITIMES)) == 0);
	717
	718	ip->flags \|= flags;
	719	}
	720
	721	/*
	722	* Request that an inode be flushed. This whole mess cannot block and may
	723	* recurse. Once requested HAMMER will attempt to actively flush it until
	724	* the flush can be done.
	725	*
	726	* The inode may already be flushing, or may be in a setup state. We can
	727	* place the inode in a flushing state if it is currently idle and flag it
	728	* to reflush if it is currently flushing.
	729	*/
	730	void
	731	hammer_flush_inode(hammer_inode_t ip, int flags)
	732	{
	733	hammer_record_t depend;
	734	int r, good;
	735
	736	/*
	737	* Trivial 'nothing to flush' case. If the inode is ina SETUP
	738	* state we have to put it back into an IDLE state so we can
	739	* drop the extra ref.
	740	*/
	741	if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
	742	if (ip->flush_state == HAMMER_FST_SETUP) {
	743	ip->flush_state = HAMMER_FST_IDLE;
	744	hammer_rel_inode(ip, 0);
	745	}
	746	return;
	747	}
	748
	749	/*
	750	* Our flush action will depend on the current state.
	751	*/
	752	switch(ip->flush_state) {
	753	case HAMMER_FST_IDLE:
	754	/*
	755	* We have no dependancies and can flush immediately. Some
	756	* our children may not be flushable so we have to re-test
	757	* with that additional knowledge.
	758	*/
	759	hammer_flush_inode_core(ip, flags);
	760	break;
	761	case HAMMER_FST_SETUP:
	762	/*
	763	* Recurse upwards through dependancies via target_list
	764	* and start their flusher actions going if possible.
	765	*
	766	* 'good' is our connectivity. -1 means we have none and
	767	* can't flush, 0 means there weren't any dependancies, and
	768	* 1 means we have good connectivity.
	769	*/
	770	good = 0;
	771	TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
	772	r = hammer_setup_parent_inodes(depend);
	773	if (r < 0 && good == 0)
	774	good = -1;
	775	if (r > 0)
	776	good = 1;
	777	}
	778
	779	/*
	780	* We can continue if good >= 0. Determine how many records
	781	* under our inode can be flushed (and mark them).
	782	*/
	783	if (good >= 0) {
	784	hammer_flush_inode_core(ip, flags);
	785	} else {
	786	ip->flags \|= HAMMER_INODE_REFLUSH;
	787	if (flags & HAMMER_FLUSH_SIGNAL) {
	788	ip->flags \|= HAMMER_INODE_RESIGNAL;
	789	hammer_flusher_async(ip->hmp);
	790	}
	791	}
	792	break;
	793	default:
	794	/*
	795	* We are already flushing, flag the inode to reflush
	796	* if needed after it completes its current flush.
	797	*/
	798	if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
	799	ip->flags \|= HAMMER_INODE_REFLUSH;
	800	if (flags & HAMMER_FLUSH_SIGNAL) {
	801	ip->flags \|= HAMMER_INODE_RESIGNAL;
	802	hammer_flusher_async(ip->hmp);
	803	}
	804	break;
	805	}
	806	}
	807
	808	/*
	809	* We are asked to recurse upwards and convert the record from SETUP
	810	* to FLUSH if possible. record->ip is a parent of the caller's inode,
	811	* and record->target_ip is the caller's inode.
	812	*
	813	* Return 1 if the record gives us connectivity
	814	*
	815	* Return 0 if the record is not relevant
	816	*
	817	* Return -1 if we can't resolve the dependancy and there is no connectivity.
	818	*/
	819	static int
	820	hammer_setup_parent_inodes(hammer_record_t record)
	821	{
	822	hammer_mount_t hmp = record->ip->hmp;
	823	hammer_record_t depend;
	824	hammer_inode_t ip;
	825	int r, good;
	826
	827	KKASSERT(record->flush_state != HAMMER_FST_IDLE);
	828	ip = record->ip;
	829
	830	/*
	831	* If the record is already flushing, is it in our flush group?
	832	*
	833	* If it is in our flush group but it is a general record or a
	834	* delete-on-disk, it does not improve our connectivity (return 0),
	835	* and if the target inode is not trying to destroy itself we can't
	836	* allow the operation yet anyway (the second return -1).
	837	*/
	838	if (record->flush_state == HAMMER_FST_FLUSH) {
	839	if (record->flush_group != hmp->flusher_next) {
	840	ip->flags \|= HAMMER_INODE_REFLUSH;
	841	return(-1);
	842	}
	843	if (record->type == HAMMER_MEM_RECORD_ADD)
	844	return(1);
	845	/* GENERAL or DEL */
	846	return(0);
	847	}
	848
	849	/*
	850	* It must be a setup record. Try to resolve the setup dependancies
	851	* by recursing upwards so we can place ip on the flush list.
	852	*/
	853	KKASSERT(record->flush_state == HAMMER_FST_SETUP);
	854
	855	good = 0;
	856	TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
	857	r = hammer_setup_parent_inodes(depend);
	858	if (r < 0 && good == 0)
	859	good = -1;
	860	if (r > 0)
	861	good = 1;
	862	}
	863
	864	/*
	865	* We can't flush ip because it has no connectivity (XXX also check
	866	* nlinks for pre-existing connectivity!). Flag it so any resolution
	867	* recurses back down.
	868	*/
	869	if (good < 0) {
	870	ip->flags \|= HAMMER_INODE_REFLUSH;
	871	return(good);
	872	}
	873
	874	/*
	875	* We are go, place the parent inode in a flushing state so we can
	876	* place its record in a flushing state. Note that the parent
	877	* may already be flushing. The record must be in the same flush
	878	* group as the parent.
	879	*/
	880	if (ip->flush_state != HAMMER_FST_FLUSH)
	881	hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
	882	KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
	883	KKASSERT(record->flush_state == HAMMER_FST_SETUP);
	884
	885	#if 0
	886	if (record->type == HAMMER_MEM_RECORD_DEL &&
	887	(record->target_ip->flags & (HAMMER_INODE_DELETED\|HAMMER_INODE_DELONDISK)) == 0) {
	888	/*
	889	* Regardless of flushing state we cannot sync this path if the
	890	* record represents a delete-on-disk but the target inode
	891	* is not ready to sync its own deletion.
	892	*
	893	* XXX need to count effective nlinks to determine whether
	894	* the flush is ok, otherwise removing a hardlink will
	895	* just leave the DEL record to rot.
	896	*/
	897	record->target_ip->flags \|= HAMMER_INODE_REFLUSH;
	898	return(-1);
	899	} else
	900	#endif
	901	if (ip->flush_group == ip->hmp->flusher_next) {
	902	/*
	903	* This is the record we wanted to synchronize.
	904	*/
	905	record->flush_state = HAMMER_FST_FLUSH;
	906	record->flush_group = ip->flush_group;
	907	hammer_ref(&record->lock);
	908	if (record->type == HAMMER_MEM_RECORD_ADD)
	909	return(1);
	910
	911	/*
	912	* A general or delete-on-disk record does not contribute
	913	* to our visibility. We can still flush it, however.
	914	*/
	915	return(0);
	916	} else {
	917	/*
	918	* We couldn't resolve the dependancies, request that the
	919	* inode be flushed when the dependancies can be resolved.
	920	*/
	921	ip->flags \|= HAMMER_INODE_REFLUSH;
	922	return(-1);
	923	}
	924	}
	925
	926	/*
	927	* This is the core routine placing an inode into the FST_FLUSH state.
	928	*/
	929	static void
	930	hammer_flush_inode_core(hammer_inode_t ip, int flags)
	931	{
	932	int go_count;
	933
	934	/*
	935	* Set flush state and prevent the flusher from cycling into
	936	* the next flush group. Do not place the ip on the list yet.
	937	* Inodes not in the idle state get an extra reference.
	938	*/
	939	KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
	940	if (ip->flush_state == HAMMER_FST_IDLE)
	941	hammer_ref(&ip->lock);
	942	ip->flush_state = HAMMER_FST_FLUSH;
	943	ip->flush_group = ip->hmp->flusher_next;
	944	++ip->hmp->flusher_lock;
	945
	946	/*
	947	* We need to be able to vfsync/truncate from the backend.
	948	*/
	949	KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
	950	if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
	951	ip->flags \|= HAMMER_INODE_VHELD;
	952	vref(ip->vp);
	953	}
	954
	955	/*
	956	* Figure out how many in-memory records we can actually flush
	957	* (not including inode meta-data, buffers, etc).
	958	*/
	959	if (flags & HAMMER_FLUSH_RECURSION) {
	960	go_count = 1;
	961	} else {
	962	go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
	963	hammer_setup_child_callback, NULL);
	964	}
	965
	966	/*
	967	* This is a more involved test that includes go_count. If we
	968	* can't flush, flag the inode and return. If go_count is 0 we
	969	* were are unable to flush any records in our rec_tree and
	970	* must ignore the XDIRTY flag.
	971	*/
	972	if (go_count == 0) {
	973	if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
	974	ip->flags \|= HAMMER_INODE_REFLUSH;
	975	ip->flush_state = HAMMER_FST_SETUP;
	976	if (ip->flags & HAMMER_INODE_VHELD) {
	977	ip->flags &= ~HAMMER_INODE_VHELD;
	978	vrele(ip->vp);
	979	}
	980	if (flags & HAMMER_FLUSH_SIGNAL) {
	981	ip->flags \|= HAMMER_INODE_RESIGNAL;
	982	hammer_flusher_async(ip->hmp);
	983	}
	984	if (--ip->hmp->flusher_lock == 0)
	985	wakeup(&ip->hmp->flusher_lock);
	986	return;
	987	}
	988	}
	989
	990	/*
	991	* Snapshot the state of the inode for the backend flusher.
	992	*
	993	* The truncation must be retained in the frontend until after
	994	* we've actually performed the record deletion.
	995	*
	996	* NOTE: The DELETING flag is a mod flag, but it is also sticky,
	997	* and stays in ip->flags. Once set, it stays set until the
	998	* inode is destroyed.
	999	*/
	1000	ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
	1001	ip->sync_trunc_off = ip->trunc_off;
	1002	ip->sync_ino_leaf = ip->ino_leaf;
	1003	ip->sync_ino_data = ip->ino_data;
	1004	ip->flags &= ~HAMMER_INODE_MODMASK \| HAMMER_INODE_TRUNCATED;
	1005
	1006	/*
	1007	* The flusher list inherits our inode and reference.
	1008	*/
	1009	TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
	1010	if (--ip->hmp->flusher_lock == 0)
	1011	wakeup(&ip->hmp->flusher_lock);
	1012
	1013	if (flags & HAMMER_FLUSH_SIGNAL)
	1014	hammer_flusher_async(ip->hmp);
	1015	}
	1016
	1017	/*
	1018	* Callback for scan of ip->rec_tree. Try to include each record in our
	1019	* flush. ip->flush_group has been set but the inode has not yet been
	1020	* moved into a flushing state.
	1021	*
	1022	* If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
	1023	* both inodes.
	1024	*
	1025	* We return 1 for any record placed or found in FST_FLUSH, which prevents
	1026	* the caller from shortcutting the flush.
	1027	*/
	1028	static int
	1029	hammer_setup_child_callback(hammer_record_t rec, void *data)
	1030	{
	1031	hammer_inode_t target_ip;
	1032	hammer_inode_t ip;
	1033	int r;
	1034
	1035	/*
	1036	* If the record has been deleted by the backend (it's being held
	1037	* by the frontend in a race), just ignore it.
	1038	*/
	1039	if (rec->flags & HAMMER_RECF_DELETED_BE)
	1040	return(0);
	1041
	1042	/*
	1043	* If the record is in an idle state it has no dependancies and
	1044	* can be flushed.
	1045	*/
	1046	ip = rec->ip;
	1047	r = 0;
	1048
	1049	switch(rec->flush_state) {
	1050	case HAMMER_FST_IDLE:
	1051	/*
	1052	* Record has no setup dependancy, we can flush it.
	1053	*/
	1054	KKASSERT(rec->target_ip == NULL);
	1055	rec->flush_state = HAMMER_FST_FLUSH;
	1056	rec->flush_group = ip->flush_group;
	1057	hammer_ref(&rec->lock);
	1058	r = 1;
	1059	break;
	1060	case HAMMER_FST_SETUP:
	1061	/*
	1062	* Record has a setup dependancy. Try to include the
	1063	* target ip in the flush.
	1064	*
	1065	* We have to be careful here, if we do not do the right
	1066	* thing we can lose track of dirty inodes and the system
	1067	* will lockup trying to allocate buffers.
	1068	*/
	1069	target_ip = rec->target_ip;
	1070	KKASSERT(target_ip != NULL);
	1071	KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
	1072	if (target_ip->flush_state == HAMMER_FST_FLUSH) {
	1073	/*
	1074	* If the target IP is already flushing in our group
	1075	* we are golden, otherwise make sure the target
	1076	* reflushes.
	1077	*/
	1078	if (target_ip->flush_group == ip->flush_group) {
	1079	rec->flush_state = HAMMER_FST_FLUSH;
	1080	rec->flush_group = ip->flush_group;
	1081	hammer_ref(&rec->lock);
	1082	r = 1;
	1083	} else {
	1084	target_ip->flags \|= HAMMER_INODE_REFLUSH;
	1085	}
	1086	} else if (rec->type == HAMMER_MEM_RECORD_ADD) {
	1087	/*
	1088	* If the target IP is not flushing we can force
	1089	* it to flush, even if it is unable to write out
	1090	* any of its own records we have at least one in
	1091	* hand that we CAN deal with.
	1092	*/
	1093	rec->flush_state = HAMMER_FST_FLUSH;
	1094	rec->flush_group = ip->flush_group;
	1095	hammer_ref(&rec->lock);
	1096	hammer_flush_inode_core(target_ip,
	1097	HAMMER_FLUSH_RECURSION);
	1098	r = 1;
	1099	} else {
	1100	/*
	1101	* General or delete-on-disk record.
	1102	*
	1103	* XXX this needs help. If a delete-on-disk we could
	1104	* disconnect the target. If the target has its own
	1105	* dependancies they really need to be flushed.
	1106	*
	1107	* XXX
	1108	*/
	1109	rec->flush_state = HAMMER_FST_FLUSH;
	1110	rec->flush_group = ip->flush_group;
	1111	hammer_ref(&rec->lock);
	1112	hammer_flush_inode_core(target_ip,
	1113	HAMMER_FLUSH_RECURSION);
	1114	r = 1;
	1115	}
	1116	break;
	1117	case HAMMER_FST_FLUSH:
	1118	/*
	1119	* Record already associated with a flush group. It had
	1120	* better be ours.
	1121	*/
	1122	KKASSERT(rec->flush_group == ip->flush_group);
	1123	r = 1;
	1124	break;
	1125	}
	1126	return(r);
	1127	}
	1128
	1129	/*
	1130	* Wait for a previously queued flush to complete
	1131	*/
	1132	void
	1133	hammer_wait_inode(hammer_inode_t ip)
	1134	{
	1135	while (ip->flush_state != HAMMER_FST_IDLE) {
	1136	ip->flags \|= HAMMER_INODE_FLUSHW;
	1137	tsleep(&ip->flags, 0, "hmrwin", 0);
	1138	}
	1139	}
	1140
	1141	/*
	1142	* Called by the backend code when a flush has been completed.
	1143	* The inode has already been removed from the flush list.
	1144	*
	1145	* A pipelined flush can occur, in which case we must re-enter the
	1146	* inode on the list and re-copy its fields.
	1147	*/
	1148	void
	1149	hammer_flush_inode_done(hammer_inode_t ip)
	1150	{
	1151	struct bio *bio;
	1152	int dorel = 0;
	1153
	1154	KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
	1155
	1156	/*
	1157	* Allow BIOs to queue to the inode's primary bioq again.
	1158	*/
	1159	ip->flags &= ~HAMMER_INODE_WRITE_ALT;
	1160
	1161	/*
	1162	* Merge left-over flags back into the frontend and fix the state.
	1163	*/
	1164	ip->flags \|= ip->sync_flags;
	1165
	1166	/*
	1167	* The backend may have adjusted nlinks, so if the adjusted nlinks
	1168	* does not match the fronttend set the frontend's RDIRTY flag again.
	1169	*/
	1170	if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
	1171	ip->flags \|= HAMMER_INODE_DDIRTY;
	1172
	1173	/*
	1174	* Reflush any BIOs that wound up in the alt list. Our inode will
	1175	* also wind up at the end of the flusher's list.
	1176	*/
	1177	while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) {
	1178	TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act);
	1179	TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
	1180	}
	1181	/*
	1182	* Fix up the dirty buffer status.
	1183	*/
	1184	if (TAILQ_FIRST(&ip->bio_list) \|\|
	1185	(ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree))) {
	1186	ip->flags \|= HAMMER_INODE_BUFS;
	1187	}
	1188
	1189	/*
	1190	* Re-set the XDIRTY flag if some of the inode's in-memory records
	1191	* could not be flushed.
	1192	*/
	1193	if (RB_ROOT(&ip->rec_tree))
	1194	ip->flags \|= HAMMER_INODE_XDIRTY;
	1195
	1196	/*
	1197	* Do not lose track of inodes which no longer have vnode
	1198	* assocations, otherwise they may never get flushed again.
	1199	*/
	1200	if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
	1201	ip->flags \|= HAMMER_INODE_REFLUSH;
	1202
	1203	/*
	1204	* Adjust flush_state. The target state (idle or setup) shouldn't
	1205	* be terribly important since we will reflush if we really need
	1206	* to do anything. XXX
	1207	*/
	1208	if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
	1209	ip->flush_state = HAMMER_FST_IDLE;
	1210	dorel = 1;
	1211	} else {
	1212	ip->flush_state = HAMMER_FST_SETUP;
	1213	}
	1214
	1215	/*
	1216	* Clean up the vnode ref
	1217	*/
	1218	if (ip->flags & HAMMER_INODE_VHELD) {
	1219	ip->flags &= ~HAMMER_INODE_VHELD;
	1220	vrele(ip->vp);
	1221	}
	1222
	1223	/*
	1224	* If the frontend made more changes and requested another flush,
	1225	* then try to get it running.
	1226	*/
	1227	if (ip->flags & HAMMER_INODE_REFLUSH) {
	1228	ip->flags &= ~HAMMER_INODE_REFLUSH;
	1229	if (ip->flags & HAMMER_INODE_RESIGNAL) {
	1230	ip->flags &= ~HAMMER_INODE_RESIGNAL;
	1231	hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
	1232	} else {
	1233	hammer_flush_inode(ip, 0);
	1234	}
	1235	}
	1236
	1237	/*
	1238	* Finally, if the frontend is waiting for a flush to complete,
	1239	* wake it up.
	1240	*/
	1241	if (ip->flush_state != HAMMER_FST_FLUSH) {
	1242	if (ip->flags & HAMMER_INODE_FLUSHW) {
	1243	ip->flags &= ~HAMMER_INODE_FLUSHW;
	1244	wakeup(&ip->flags);
	1245	}
	1246	}
	1247	if (dorel)
	1248	hammer_rel_inode(ip, 0);
	1249	}
	1250
	1251	/*
	1252	* Called from hammer_sync_inode() to synchronize in-memory records
	1253	* to the media.
	1254	*/
	1255	static int
	1256	hammer_sync_record_callback(hammer_record_t record, void *data)
	1257	{
	1258	hammer_cursor_t cursor = data;
	1259	hammer_transaction_t trans = cursor->trans;
	1260	int error;
	1261
	1262	/*
	1263	* Skip records that do not belong to the current flush.
	1264	*/
	1265	if (record->flush_state != HAMMER_FST_FLUSH)
	1266	return(0);
	1267	KKASSERT((record->flags & HAMMER_RECF_DELETED_BE) == 0);
	1268	#if 1
	1269	if (record->flush_group != record->ip->flush_group) {
	1270	kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
	1271	Debugger("blah2");
	1272	return(0);
	1273	}
	1274	#endif
	1275	KKASSERT(record->flush_group == record->ip->flush_group);
	1276
	1277	/*
	1278	* Interlock the record using the BE flag. Once BE is set the
	1279	* frontend cannot change the state of FE.
	1280	*
	1281	* NOTE: If FE is set prior to us setting BE we still sync the
	1282	* record out, but the flush completion code converts it to
	1283	* a delete-on-disk record instead of destroying it.
	1284	*/
	1285	KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
	1286	record->flags \|= HAMMER_RECF_INTERLOCK_BE;
	1287
	1288	/*
	1289	* If the whole inode is being deleting all on-disk records will
	1290	* be deleted very soon, we can't sync any new records to disk
	1291	* because they will be deleted in the same transaction they were
	1292	* created in (delete_tid == create_tid), which will assert.
	1293	*
	1294	* XXX There may be a case with RECORD_ADD with DELETED_FE set
	1295	* that we currently panic on.
	1296	*/
	1297	if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
	1298	switch(record->type) {
	1299	case HAMMER_MEM_RECORD_GENERAL:
	1300	record->flags \|= HAMMER_RECF_DELETED_FE;
	1301	record->flags \|= HAMMER_RECF_DELETED_BE;
	1302	error = 0;
	1303	goto done;
	1304	case HAMMER_MEM_RECORD_ADD:
	1305	panic("hammer_sync_record_callback: illegal add "
	1306	"during inode deletion record %p", record);
	1307	break; /* NOT REACHED */
	1308	case HAMMER_MEM_RECORD_INODE:
	1309	panic("hammer_sync_record_callback: attempt to "
	1310	"sync inode record %p?", record);
	1311	break; /* NOT REACHED */
	1312	case HAMMER_MEM_RECORD_DEL:
	1313	/*
	1314	* Follow through and issue the on-disk deletion
	1315	*/
	1316	break;
	1317	}
	1318	}
	1319
	1320	/*
	1321	* If DELETED_FE is set we may have already sent dependant pieces
	1322	* to the disk and we must flush the record as if it hadn't been
	1323	* deleted. This creates a bit of a mess because we have to
	1324	* have ip_sync_record convert the record to MEM_RECORD_DEL before
	1325	* it inserts the B-Tree record. Otherwise the media sync might
	1326	* be visible to the frontend.
	1327	*/
	1328	if (record->flags & HAMMER_RECF_DELETED_FE) {
	1329	if (record->type == HAMMER_MEM_RECORD_ADD) {
	1330	record->flags \|= HAMMER_RECF_CONVERT_DELETE;
	1331	} else {
	1332	KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
	1333	return(0);
	1334	}
	1335	}
	1336
	1337	/*
	1338	* Assign the create_tid for new records. Deletions already
	1339	* have the record's entire key properly set up.
	1340	*/
	1341	if (record->type != HAMMER_MEM_RECORD_DEL)
	1342	record->leaf.base.create_tid = trans->tid;
	1343	for (;;) {
	1344	error = hammer_ip_sync_record_cursor(cursor, record);
	1345	if (error != EDEADLK)
	1346	break;
	1347	hammer_done_cursor(cursor);
	1348	error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
	1349	record->ip);
	1350	if (error)
	1351	break;
	1352	}
	1353	record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
	1354
	1355	if (error) {
	1356	error = -error;
	1357	if (error != -ENOSPC) {
	1358	kprintf("hammer_sync_record_callback: sync failed rec "
	1359	"%p, error %d\n", record, error);
	1360	Debugger("sync failed rec");
	1361	}
	1362	}
	1363	done:
	1364	hammer_flush_record_done(record, error);
	1365	return(error);
	1366	}
	1367
	1368	/*
	1369	* XXX error handling
	1370	*/
	1371	int
	1372	hammer_sync_inode(hammer_inode_t ip)
	1373	{
	1374	struct hammer_transaction trans;
	1375	struct hammer_cursor cursor;
	1376	struct bio *bio;
	1377	hammer_record_t depend;
	1378	hammer_record_t next;
	1379	int error, tmp_error;
	1380	u_int64_t nlinks;
	1381
	1382	if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
	1383	return(0);
	1384
	1385	hammer_start_transaction_fls(&trans, ip->hmp);
	1386	error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
	1387	if (error)
	1388	goto done;
	1389
	1390	/*
	1391	* Any directory records referencing this inode which are not in
	1392	* our current flush group must adjust our nlink count for the
	1393	* purposes of synchronization to disk.
	1394	*
	1395	* Records which are in our flush group can be unlinked from our
	1396	* inode now, potentially allowing the inode to be physically
	1397	* deleted.
	1398	*/
	1399	nlinks = ip->ino_data.nlinks;
	1400	next = TAILQ_FIRST(&ip->target_list);
	1401	while ((depend = next) != NULL) {
	1402	next = TAILQ_NEXT(depend, target_entry);
	1403	if (depend->flush_state == HAMMER_FST_FLUSH &&
	1404	depend->flush_group == ip->hmp->flusher_act) {
	1405	/*
	1406	* If this is an ADD that was deleted by the frontend
	1407	* the frontend nlinks count will have already been
	1408	* decremented, but the backend is going to sync its
	1409	* directory entry and must account for it. The
	1410	* record will be converted to a delete-on-disk when
	1411	* it gets synced.
	1412	*
	1413	* If the ADD was not deleted by the frontend we
	1414	* can remove the dependancy from our target_list.
	1415	*/
	1416	if (depend->flags & HAMMER_RECF_DELETED_FE) {
	1417	++nlinks;
	1418	} else {
	1419	TAILQ_REMOVE(&ip->target_list, depend,
	1420	target_entry);
	1421	depend->target_ip = NULL;
	1422	}
	1423	} else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
	1424	/*
	1425	* Not part of our flush group
	1426	*/
	1427	KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
	1428	switch(depend->type) {
	1429	case HAMMER_MEM_RECORD_ADD:
	1430	--nlinks;
	1431	break;
	1432	case HAMMER_MEM_RECORD_DEL:
	1433	++nlinks;
	1434	break;
	1435	default:
	1436	break;
	1437	}
	1438	}
	1439	}
	1440
	1441	/*
	1442	* Set dirty if we had to modify the link count.
	1443	*/
	1444	if (ip->sync_ino_data.nlinks != nlinks) {
	1445	KKASSERT((int64_t)nlinks >= 0);
	1446	ip->sync_ino_data.nlinks = nlinks;
	1447	ip->sync_flags \|= HAMMER_INODE_DDIRTY;
	1448	}
	1449
	1450	/*
	1451	* Queue up as many dirty buffers as we can then set a flag to
	1452	* cause any further BIOs to go to the alternative queue.
	1453	*/
	1454	if (ip->flags & HAMMER_INODE_VHELD)
	1455	error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
	1456	ip->flags \|= HAMMER_INODE_WRITE_ALT;
	1457
	1458	/*
	1459	* The buffer cache may contain dirty buffers beyond the inode
	1460	* state we copied from the frontend to the backend. Because
	1461	* we are syncing our buffer cache on the backend, resync
	1462	* the truncation point and the file size so we don't wipe out
	1463	* any data.
	1464	*
	1465	* Syncing the buffer cache on the frontend has serious problems
	1466	* because it prevents us from passively queueing dirty inodes
	1467	* to the backend (the BIO's could stall indefinitely).
	1468	*/
	1469	if (ip->flags & HAMMER_INODE_TRUNCATED) {
	1470	ip->sync_trunc_off = ip->trunc_off;
	1471	ip->sync_flags \|= HAMMER_INODE_TRUNCATED;
	1472	}
	1473	if (ip->sync_ino_data.size != ip->ino_data.size) {
	1474	ip->sync_ino_data.size = ip->ino_data.size;
	1475	ip->sync_flags \|= HAMMER_INODE_DDIRTY;
	1476	}
	1477
	1478	/*
	1479	* If there is a trunction queued destroy any data past the (aligned)
	1480	* truncation point. Userland will have dealt with the buffer
	1481	* containing the truncation point for us.
	1482	*
	1483	* We don't flush pending frontend data buffers until after we've
	1484	* dealth with the truncation.
	1485	*
	1486	* Don't bother if the inode is or has been deleted.
	1487	*/
	1488	if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
	1489	/*
	1490	* Interlock trunc_off. The VOP front-end may continue to
	1491	* make adjustments to it while we are blocked.
	1492	*/
	1493	off_t trunc_off;
	1494	off_t aligned_trunc_off;
	1495
	1496	trunc_off = ip->sync_trunc_off;
	1497	aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
	1498	~HAMMER_BUFMASK64;
	1499
	1500	/*
	1501	* Delete any whole blocks on-media. The front-end has
	1502	* already cleaned out any partial block and made it
	1503	* pending. The front-end may have updated trunc_off
	1504	* while we were blocked so do not just unconditionally
	1505	* set it to the maximum offset.
	1506	*/
	1507	error = hammer_ip_delete_range(&cursor, ip,
	1508	aligned_trunc_off,
	1509	0x7FFFFFFFFFFFFFFFLL);
	1510	if (error)
	1511	Debugger("hammer_ip_delete_range errored");
	1512	ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
	1513	if (ip->trunc_off >= trunc_off) {
	1514	ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
	1515	ip->flags &= ~HAMMER_INODE_TRUNCATED;
	1516	}
	1517	} else {
	1518	error = 0;
	1519	}
	1520
	1521	/*
	1522	* Now sync related records. These will typically be directory
	1523	* entries or delete-on-disk records.
	1524	*
	1525	* Not all records will be flushed, but clear XDIRTY anyway. We
	1526	* will set it again in the frontend hammer_flush_inode_done()
	1527	* if records remain.
	1528	*/
	1529	if (error == 0) {
	1530	tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
	1531	hammer_sync_record_callback, &cursor);
	1532	if (tmp_error < 0)
	1533	tmp_error = -error;
	1534	if (tmp_error)
	1535	error = tmp_error;
	1536	if (RB_EMPTY(&ip->rec_tree))
	1537	ip->sync_flags &= ~HAMMER_INODE_XDIRTY;
	1538	}
	1539
	1540	/*
	1541	* If we are deleting the inode the frontend had better not have
	1542	* any active references on elements making up the inode.
	1543	*/
	1544	if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
	1545	RB_EMPTY(&ip->rec_tree) &&
	1546	(ip->sync_flags & HAMMER_INODE_DELETING) &&
	1547	(ip->flags & HAMMER_INODE_DELETED) == 0) {
	1548	int count1 = 0;
	1549
	1550	hkprintf("Y");
	1551	ip->flags \|= HAMMER_INODE_DELETED;
	1552	error = hammer_ip_delete_range_all(&cursor, ip, &count1);
	1553	if (error == 0) {
	1554	ip->sync_flags &= ~HAMMER_INODE_DELETING;
	1555	ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
	1556	KKASSERT(RB_EMPTY(&ip->rec_tree));
	1557
	1558	/*
	1559	* Set delete_tid in both the frontend and backend
	1560	* copy of the inode record. The DELETED flag handles
	1561	* this, do not set RDIRTY.
	1562	*/
	1563	ip->ino_leaf.base.delete_tid = trans.tid;
	1564	ip->sync_ino_leaf.base.delete_tid = trans.tid;
	1565
	1566	/*
	1567	* Adjust the inode count in the volume header
	1568	*/
	1569	if (ip->flags & HAMMER_INODE_ONDISK) {
	1570	hammer_modify_volume_field(&trans,
	1571	trans.rootvol,
	1572	vol0_stat_inodes);
	1573	--ip->hmp->rootvol->ondisk->vol0_stat_inodes;
	1574	hammer_modify_volume_done(trans.rootvol);
	1575	}
	1576	} else {
	1577	ip->flags &= ~HAMMER_INODE_DELETED;
	1578	Debugger("hammer_ip_delete_range_all errored");
	1579	}
	1580	}
	1581
	1582	/*
	1583	* Flush any queued BIOs. These will just biodone() the IO's if
	1584	* the inode has been deleted.
	1585	*/
	1586	while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) {
	1587	TAILQ_REMOVE(&ip->bio_list, bio, bio_act);
	1588	tmp_error = hammer_dowrite(&cursor, ip, bio);
	1589	if (tmp_error)
	1590	error = tmp_error;
	1591	}
	1592	ip->sync_flags &= ~HAMMER_INODE_BUFS;
	1593
	1594	if (error)
	1595	Debugger("RB_SCAN errored");
	1596
	1597	/*
	1598	* Now update the inode's on-disk inode-data and/or on-disk record.
	1599	* DELETED and ONDISK are managed only in ip->flags.
	1600	*/
	1601	switch(ip->flags & (HAMMER_INODE_DELETED \| HAMMER_INODE_ONDISK)) {
	1602	case HAMMER_INODE_DELETED\|HAMMER_INODE_ONDISK:
	1603	/*
	1604	* If deleted and on-disk, don't set any additional flags.
	1605	* the delete flag takes care of things.
	1606	*
	1607	* Clear flags which may have been set by the frontend.
	1608	*/
	1609	ip->sync_flags &= ~(HAMMER_INODE_DDIRTY\|
	1610	HAMMER_INODE_XDIRTY\|HAMMER_INODE_ITIMES\|
	1611	HAMMER_INODE_DELETING);
	1612	break;
	1613	case HAMMER_INODE_DELETED:
	1614	/*
	1615	* Take care of the case where a deleted inode was never
	1616	* flushed to the disk in the first place.
	1617	*
	1618	* Clear flags which may have been set by the frontend.
	1619	*/
	1620	ip->sync_flags &= ~(HAMMER_INODE_DDIRTY\|
	1621	HAMMER_INODE_XDIRTY\|HAMMER_INODE_ITIMES\|
	1622	HAMMER_INODE_DELETING);
	1623	while (RB_ROOT(&ip->rec_tree)) {
	1624	hammer_record_t record = RB_ROOT(&ip->rec_tree);
	1625	hammer_ref(&record->lock);
	1626	KKASSERT(record->lock.refs == 1);
	1627	record->flags \|= HAMMER_RECF_DELETED_FE;
	1628	record->flags \|= HAMMER_RECF_DELETED_BE;
	1629	hammer_rel_mem_record(record);
	1630	}
	1631	break;
	1632	case HAMMER_INODE_ONDISK:
	1633	/*
	1634	* If already on-disk, do not set any additional flags.
	1635	*/
	1636	break;
	1637	default:
	1638	/*
	1639	* If not on-disk and not deleted, set both dirty flags
	1640	* to force an initial record to be written. Also set
	1641	* the create_tid for the inode.
	1642	*
	1643	* Set create_tid in both the frontend and backend
	1644	* copy of the inode record.
	1645	*/
	1646	ip->ino_leaf.base.create_tid = trans.tid;
	1647	ip->sync_ino_leaf.base.create_tid = trans.tid;
	1648	ip->sync_flags \|= HAMMER_INODE_DDIRTY;
	1649	break;
	1650	}
	1651
	1652	/*
	1653	* If RDIRTY or DDIRTY is set, write out a new record. If the inode
	1654	* is already on-disk the old record is marked as deleted.
	1655	*
	1656	* If DELETED is set hammer_update_inode() will delete the existing
	1657	* record without writing out a new one.
	1658	*
	1659	* If ONLY the ITIMES flag is set we can update the record in-place.
	1660	*/
	1661	if (ip->flags & HAMMER_INODE_DELETED) {
	1662	error = hammer_update_inode(&cursor, ip);
	1663	} else
	1664	if ((ip->sync_flags & (HAMMER_INODE_DDIRTY \| HAMMER_INODE_ITIMES)) ==
	1665	HAMMER_INODE_ITIMES) {
	1666	error = hammer_update_itimes(&cursor, ip);
	1667	} else
	1668	if (ip->sync_flags & (HAMMER_INODE_DDIRTY \| HAMMER_INODE_ITIMES)) {
	1669	error = hammer_update_inode(&cursor, ip);
	1670	}
	1671	if (error)
	1672	Debugger("hammer_update_itimes/inode errored");
	1673	done:
	1674	/*
	1675	* Save the TID we used to sync the inode with to make sure we
	1676	* do not improperly reuse it.
	1677	*/
	1678	hammer_done_cursor(&cursor);
	1679	hammer_done_transaction(&trans);
	1680	return(error);
	1681	}
	1682
	1683	/*
	1684	* This routine is called when the OS is no longer actively referencing
	1685	* the inode (but might still be keeping it cached), or when releasing
	1686	* the last reference to an inode.
	1687	*
	1688	* At this point if the inode's nlinks count is zero we want to destroy
	1689	* it, which may mean destroying it on-media too.
	1690	*/
	1691	void
	1692	hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
	1693	{
	1694	struct vnode *vp;
	1695
	1696	/*
	1697	* Set the DELETING flag when the link count drops to 0 and the
	1698	* OS no longer has any opens on the inode.
	1699	*
	1700	* The backend will clear DELETING (a mod flag) and set DELETED
	1701	* (a state flag) when it is actually able to perform the
	1702	* operation.
	1703	*/
	1704	if (ip->ino_data.nlinks == 0 &&
	1705	(ip->flags & (HAMMER_INODE_DELETING\|HAMMER_INODE_DELETED)) == 0) {
	1706	ip->flags \|= HAMMER_INODE_DELETING;
	1707	ip->flags \|= HAMMER_INODE_TRUNCATED;
	1708	ip->trunc_off = 0;
	1709	vp = NULL;
	1710	if (getvp) {
	1711	if (hammer_get_vnode(ip, &vp) != 0)
	1712	return;
	1713	}
	1714	if (ip->vp) {
	1715	vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
	1716	vnode_pager_setsize(ip->vp, 0);
	1717	}
	1718	if (getvp) {
	1719	vput(vp);
	1720	}
	1721	}
	1722	}
	1723
	1724	/*
	1725	* Re-test an inode when a dependancy had gone away to see if we
	1726	* can chain flush it.
	1727	*/
	1728	void
	1729	hammer_test_inode(hammer_inode_t ip)
	1730	{
	1731	if (ip->flags & HAMMER_INODE_REFLUSH) {
	1732	ip->flags &= ~HAMMER_INODE_REFLUSH;
	1733	hammer_ref(&ip->lock);
	1734	if (ip->flags & HAMMER_INODE_RESIGNAL) {
	1735	ip->flags &= ~HAMMER_INODE_RESIGNAL;
	1736	hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
	1737	} else {
	1738	hammer_flush_inode(ip, 0);
	1739	}
	1740	hammer_rel_inode(ip, 0);
	1741	}
	1742	}
	1743