HAMMER 40B/Many: Inode/link-count sequencer cleanup pass.
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.46 2008/05/02 06:51:57 dillon Exp $
35  */
36
37 #include "hammer.h"
38 #include <vm/vm_extern.h>
39 #include <sys/buf.h>
40 #include <sys/buf2.h>
41
42 static int hammer_unload_inode(struct hammer_inode *ip);
43 static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
44 static int hammer_setup_child_callback(hammer_record_t rec, void *data);
45 static int hammer_inode_unloadable_check(hammer_inode_t ip);
46 static int hammer_setup_parent_inodes(hammer_record_t record);
47
48 /*
49  * The kernel is not actively referencing this vnode but is still holding
50  * it cached.
51  *
52  * This is called from the frontend.
53  */
54 int
55 hammer_vop_inactive(struct vop_inactive_args *ap)
56 {
57         struct hammer_inode *ip = VTOI(ap->a_vp);
58
59         /*
60          * Degenerate case
61          */
62         if (ip == NULL) {
63                 vrecycle(ap->a_vp);
64                 return(0);
65         }
66
67         /*
68          * If the inode no longer has visibility in the filesystem and is
69          * fairly clean, try to recycle it immediately.  This can deadlock
70          * in vfsync() if we aren't careful.
71          */
72         if (hammer_inode_unloadable_check(ip) && ip->ino_rec.ino_nlinks == 0)
73                 vrecycle(ap->a_vp);
74         return(0);
75 }
76
77 /*
78  * Release the vnode association.  This is typically (but not always)
79  * the last reference on the inode.
80  *
81  * Once the association is lost we are on our own with regards to
82  * flushing the inode.
83  */
84 int
85 hammer_vop_reclaim(struct vop_reclaim_args *ap)
86 {
87         struct hammer_inode *ip;
88         struct vnode *vp;
89
90         vp = ap->a_vp;
91
92         if ((ip = vp->v_data) != NULL) {
93                 vp->v_data = NULL;
94                 ip->vp = NULL;
95                 hammer_rel_inode(ip, 1);
96         }
97         return(0);
98 }
99
100 /*
101  * Return a locked vnode for the specified inode.  The inode must be
102  * referenced but NOT LOCKED on entry and will remain referenced on
103  * return.
104  *
105  * Called from the frontend.
106  */
107 int
108 hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
109 {
110         struct vnode *vp;
111         int error = 0;
112
113         for (;;) {
114                 if ((vp = ip->vp) == NULL) {
115                         error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
116                         if (error)
117                                 break;
118                         hammer_lock_ex(&ip->lock);
119                         if (ip->vp != NULL) {
120                                 hammer_unlock(&ip->lock);
121                                 vp->v_type = VBAD;
122                                 vx_put(vp);
123                                 continue;
124                         }
125                         hammer_ref(&ip->lock);
126                         vp = *vpp;
127                         ip->vp = vp;
128                         vp->v_type = hammer_get_vnode_type(
129                                             ip->ino_rec.base.base.obj_type);
130
131                         switch(ip->ino_rec.base.base.obj_type) {
132                         case HAMMER_OBJTYPE_CDEV:
133                         case HAMMER_OBJTYPE_BDEV:
134                                 vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
135                                 addaliasu(vp, ip->ino_data.rmajor,
136                                           ip->ino_data.rminor);
137                                 break;
138                         case HAMMER_OBJTYPE_FIFO:
139                                 vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
140                                 break;
141                         default:
142                                 break;
143                         }
144
145                         /*
146                          * Only mark as the root vnode if the ip is not
147                          * historical, otherwise the VFS cache will get
148                          * confused.  The other half of the special handling
149                          * is in hammer_vop_nlookupdotdot().
150                          */
151                         if (ip->obj_id == HAMMER_OBJID_ROOT &&
152                             ip->obj_asof == ip->hmp->asof) {
153                                 vp->v_flag |= VROOT;
154                         }
155
156                         vp->v_data = (void *)ip;
157                         /* vnode locked by getnewvnode() */
158                         /* make related vnode dirty if inode dirty? */
159                         hammer_unlock(&ip->lock);
160                         if (vp->v_type == VREG)
161                                 vinitvmio(vp, ip->ino_rec.ino_size);
162                         break;
163                 }
164
165                 /*
166                  * loop if the vget fails (aka races), or if the vp
167                  * no longer matches ip->vp.
168                  */
169                 if (vget(vp, LK_EXCLUSIVE) == 0) {
170                         if (vp == ip->vp)
171                                 break;
172                         vput(vp);
173                 }
174         }
175         *vpp = vp;
176         return(error);
177 }
178
179 /*
180  * Acquire a HAMMER inode.  The returned inode is not locked.  These functions
181  * do not attach or detach the related vnode (use hammer_get_vnode() for
182  * that).
183  *
184  * The flags argument is only applied for newly created inodes, and only
185  * certain flags are inherited.
186  *
187  * Called from the frontend.
188  */
189 struct hammer_inode *
190 hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
191                  u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
192 {
193         hammer_mount_t hmp = trans->hmp;
194         struct hammer_inode_info iinfo;
195         struct hammer_cursor cursor;
196         struct hammer_inode *ip;
197
198         /*
199          * Determine if we already have an inode cached.  If we do then
200          * we are golden.
201          */
202         iinfo.obj_id = obj_id;
203         iinfo.obj_asof = asof;
204 loop:
205         ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
206         if (ip) {
207                 hammer_ref(&ip->lock);
208                 *errorp = 0;
209                 return(ip);
210         }
211
212         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
213         ++hammer_count_inodes;
214         ip->obj_id = obj_id;
215         ip->obj_asof = iinfo.obj_asof;
216         ip->hmp = hmp;
217         ip->flags = flags & HAMMER_INODE_RO;
218         if (hmp->ronly)
219                 ip->flags |= HAMMER_INODE_RO;
220         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
221         RB_INIT(&ip->rec_tree);
222         TAILQ_INIT(&ip->bio_list);
223         TAILQ_INIT(&ip->bio_alt_list);
224         TAILQ_INIT(&ip->target_list);
225
226         /*
227          * Locate the on-disk inode.
228          */
229 retry:
230         hammer_init_cursor(trans, &cursor, cache);
231         cursor.key_beg.obj_id = ip->obj_id;
232         cursor.key_beg.key = 0;
233         cursor.key_beg.create_tid = 0;
234         cursor.key_beg.delete_tid = 0;
235         cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
236         cursor.key_beg.obj_type = 0;
237         cursor.asof = iinfo.obj_asof;
238         cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
239                        HAMMER_CURSOR_ASOF;
240
241         *errorp = hammer_btree_lookup(&cursor);
242         if (*errorp == EDEADLK) {
243                 hammer_done_cursor(&cursor);
244                 goto retry;
245         }
246
247         /*
248          * On success the B-Tree lookup will hold the appropriate
249          * buffer cache buffers and provide a pointer to the requested
250          * information.  Copy the information to the in-memory inode
251          * and cache the B-Tree node to improve future operations.
252          */
253         if (*errorp == 0) {
254                 ip->ino_rec = cursor.record->inode;
255                 ip->ino_data = cursor.data->inode;
256                 hammer_cache_node(cursor.node, &ip->cache[0]);
257                 if (cache)
258                         hammer_cache_node(cursor.node, cache);
259         }
260
261         /*
262          * On success load the inode's record and data and insert the
263          * inode into the B-Tree.  It is possible to race another lookup
264          * insertion of the same inode so deal with that condition too.
265          *
266          * The cursor's locked node interlocks against others creating and
267          * destroying ip while we were blocked.
268          */
269         if (*errorp == 0) {
270                 hammer_ref(&ip->lock);
271                 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
272                         hammer_uncache_node(&ip->cache[0]);
273                         hammer_uncache_node(&ip->cache[1]);
274                         KKASSERT(ip->lock.refs == 1);
275                         --hammer_count_inodes;
276                         kfree(ip, M_HAMMER);
277                         hammer_done_cursor(&cursor);
278                         goto loop;
279                 }
280                 ip->flags |= HAMMER_INODE_ONDISK;
281         } else {
282                 --hammer_count_inodes;
283                 kfree(ip, M_HAMMER);
284                 ip = NULL;
285         }
286         hammer_done_cursor(&cursor);
287         return (ip);
288 }
289
290 /*
291  * Create a new filesystem object, returning the inode in *ipp.  The
292  * returned inode will be referenced.
293  *
294  * The inode is created in-memory.
295  */
296 int
297 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
298                     struct ucred *cred, hammer_inode_t dip,
299                     struct hammer_inode **ipp)
300 {
301         hammer_mount_t hmp;
302         hammer_inode_t ip;
303         uid_t xuid;
304
305         hmp = trans->hmp;
306         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
307         ++hammer_count_inodes;
308         ip->obj_id = hammer_alloc_objid(trans, dip);
309         KKASSERT(ip->obj_id != 0);
310         ip->obj_asof = hmp->asof;
311         ip->hmp = hmp;
312         ip->flush_state = HAMMER_FST_IDLE;
313         ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
314                     HAMMER_INODE_ITIMES;
315
316         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
317         RB_INIT(&ip->rec_tree);
318         TAILQ_INIT(&ip->bio_list);
319         TAILQ_INIT(&ip->bio_alt_list);
320         TAILQ_INIT(&ip->target_list);
321
322         ip->ino_rec.ino_atime = trans->time;
323         ip->ino_rec.ino_mtime = trans->time;
324         ip->ino_rec.ino_size = 0;
325         ip->ino_rec.ino_nlinks = 0;
326         /* XXX */
327         ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
328         ip->ino_rec.base.base.obj_id = ip->obj_id;
329         ip->ino_rec.base.base.key = 0;
330         ip->ino_rec.base.base.create_tid = 0;
331         ip->ino_rec.base.base.delete_tid = 0;
332         ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
333         ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
334
335         ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
336         ip->ino_data.mode = vap->va_mode;
337         ip->ino_data.ctime = trans->time;
338         ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
339
340         switch(ip->ino_rec.base.base.obj_type) {
341         case HAMMER_OBJTYPE_CDEV:
342         case HAMMER_OBJTYPE_BDEV:
343                 ip->ino_data.rmajor = vap->va_rmajor;
344                 ip->ino_data.rminor = vap->va_rminor;
345                 break;
346         default:
347                 break;
348         }
349
350         /*
351          * Calculate default uid/gid and overwrite with information from
352          * the vap.
353          */
354         xuid = hammer_to_unix_xid(&dip->ino_data.uid);
355         ip->ino_data.gid = dip->ino_data.gid;
356         xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
357                                      &vap->va_mode);
358         ip->ino_data.mode = vap->va_mode;
359
360         if (vap->va_vaflags & VA_UID_UUID_VALID)
361                 ip->ino_data.uid = vap->va_uid_uuid;
362         else if (vap->va_uid != (uid_t)VNOVAL)
363                 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
364         if (vap->va_vaflags & VA_GID_UUID_VALID)
365                 ip->ino_data.gid = vap->va_gid_uuid;
366         else if (vap->va_gid != (gid_t)VNOVAL)
367                 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
368
369         hammer_ref(&ip->lock);
370         if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
371                 hammer_unref(&ip->lock);
372                 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
373         }
374         *ipp = ip;
375         return(0);
376 }
377
378 /*
379  * Called by hammer_sync_inode().
380  */
381 static int
382 hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip)
383 {
384         struct hammer_cursor cursor;
385         hammer_record_t record;
386         int error;
387
388 retry:
389         error = 0;
390
391         /*
392          * If the inode has a presence on-disk then locate it and mark
393          * it deleted, setting DELONDISK.
394          *
395          * The record may or may not be physically deleted, depending on
396          * the retention policy.
397          */
398         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
399             HAMMER_INODE_ONDISK) {
400                 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
401                 cursor.key_beg.obj_id = ip->obj_id;
402                 cursor.key_beg.key = 0;
403                 cursor.key_beg.create_tid = 0;
404                 cursor.key_beg.delete_tid = 0;
405                 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
406                 cursor.key_beg.obj_type = 0;
407                 cursor.asof = ip->obj_asof;
408                 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
409                 cursor.flags |= HAMMER_CURSOR_BACKEND;
410
411                 error = hammer_btree_lookup(&cursor);
412                 if (error) {
413                         kprintf("error %d\n", error);
414                         Debugger("hammer_update_inode");
415                 }
416
417                 if (error == 0) {
418                         error = hammer_ip_delete_record(&cursor, trans->tid);
419                         if (error && error != EDEADLK) {
420                                 kprintf("error %d\n", error);
421                                 Debugger("hammer_update_inode2");
422                         }
423                         if (error == 0) {
424                                 ip->flags |= HAMMER_INODE_DELONDISK;
425                         }
426                         hammer_cache_node(cursor.node, &ip->cache[0]);
427                 }
428                 hammer_done_cursor(&cursor);
429                 if (error == EDEADLK)
430                         goto retry;
431         }
432
433         /*
434          * Ok, write out the initial record or a new record (after deleting
435          * the old one), unless the DELETED flag is set.  This routine will
436          * clear DELONDISK if it writes out a record.
437          *
438          * Update our inode statistics if this is the first application of
439          * the inode on-disk.
440          */
441         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
442                 /*
443                  * Generate a record and write it to the media
444                  */
445                 record = hammer_alloc_mem_record(ip);
446                 record->flush_state = HAMMER_FST_FLUSH;
447                 record->rec.inode = ip->sync_ino_rec;
448                 record->rec.inode.base.base.create_tid = trans->tid;
449                 record->rec.inode.base.data_len = sizeof(ip->sync_ino_data);
450                 record->data = (void *)&ip->sync_ino_data;
451                 record->flags |= HAMMER_RECF_INTERLOCK_BE;
452                 error = hammer_ip_sync_record(trans, record);
453                 if (error) {
454                         kprintf("error %d\n", error);
455                         Debugger("hammer_update_inode3");
456                 }
457
458                 /*
459                  * The record isn't managed by the inode's record tree,
460                  * destroy it whether we succeed or fail.
461                  */
462                 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
463                 record->flags |= HAMMER_RECF_DELETED_FE;
464                 record->flush_state = HAMMER_FST_IDLE;
465                 hammer_rel_mem_record(record);
466
467                 /*
468                  * Finish up.
469                  */
470                 if (error == 0) {
471                         ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
472                                             HAMMER_INODE_DDIRTY |
473                                             HAMMER_INODE_ITIMES);
474                         ip->flags &= ~HAMMER_INODE_DELONDISK;
475
476                         /*
477                          * Root volume count of inodes
478                          */
479                         if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
480                                 hammer_modify_volume(trans, trans->rootvol,
481                                                      NULL, 0);
482                                 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
483                                 hammer_modify_volume_done(trans->rootvol);
484                                 ip->flags |= HAMMER_INODE_ONDISK;
485                         }
486                 }
487         }
488
489         /*
490          * If the inode has been destroyed, clean out any left-over flags
491          * that may have been set by the frontend.
492          */
493         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) { 
494                 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
495                                     HAMMER_INODE_DDIRTY |
496                                     HAMMER_INODE_ITIMES);
497         }
498         return(error);
499 }
500
501 /*
502  * Update only the itimes fields.  This is done no-historically.  The
503  * record is updated in-place on the disk.
504  */
505 static int
506 hammer_update_itimes(hammer_transaction_t trans, hammer_inode_t ip)
507 {
508         struct hammer_cursor cursor;
509         struct hammer_inode_record *rec;
510         int error;
511
512 retry:
513         error = 0;
514         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
515             HAMMER_INODE_ONDISK) {
516                 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
517                 cursor.key_beg.obj_id = ip->obj_id;
518                 cursor.key_beg.key = 0;
519                 cursor.key_beg.create_tid = 0;
520                 cursor.key_beg.delete_tid = 0;
521                 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
522                 cursor.key_beg.obj_type = 0;
523                 cursor.asof = ip->obj_asof;
524                 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
525                 cursor.flags |= HAMMER_CURSOR_BACKEND;
526
527                 error = hammer_btree_lookup(&cursor);
528                 if (error) {
529                         kprintf("error %d\n", error);
530                         Debugger("hammer_update_itimes1");
531                 }
532                 if (error == 0) {
533                         /*
534                          * Do not generate UNDO records for atime/mtime
535                          * updates.
536                          */
537                         rec = &cursor.record->inode;
538                         hammer_modify_buffer(cursor.trans, cursor.record_buffer,
539                                              NULL, 0);
540                         rec->ino_atime = ip->sync_ino_rec.ino_atime;
541                         rec->ino_mtime = ip->sync_ino_rec.ino_mtime;
542                         hammer_modify_buffer_done(cursor.record_buffer);
543                         ip->sync_flags &= ~HAMMER_INODE_ITIMES;
544                         /* XXX recalculate crc */
545                         hammer_cache_node(cursor.node, &ip->cache[0]);
546                 }
547                 hammer_done_cursor(&cursor);
548                 if (error == EDEADLK)
549                         goto retry;
550         }
551         return(error);
552 }
553
554 /*
555  * Release a reference on an inode, flush as requested.
556  *
557  * On the last reference we queue the inode to the flusher for its final
558  * disposition.
559  */
560 void
561 hammer_rel_inode(struct hammer_inode *ip, int flush)
562 {
563         hammer_mount_t hmp = ip->hmp;
564
565         /*
566          * Handle disposition when dropping the last ref.
567          */
568         for (;;) {
569                 if (ip->lock.refs == 1) {
570                         /*
571                          * Determine whether on-disk action is needed for
572                          * the inode's final disposition.
573                          */
574                         if (hammer_inode_unloadable_check(ip)) {
575                                 hammer_unload_inode(ip);
576                                 break;
577                         }
578                         hammer_flush_inode(ip, 0);
579                 } else {
580                         /*
581                          * We gotta flush inodes which do not have vnode
582                          * associations.
583                          */
584 #if 0
585                         if (ip->vp == NULL) {
586                                 kprintf("v%d:%04x\n", ip->flush_state, ip->flags);
587                                 hammer_flush_inode(ip, 0);
588                         } else 
589 #endif
590                         if (flush) {
591                                 hammer_flush_inode(ip, 0);
592                         }
593                         /*
594                          * The inode still has multiple refs, try to drop
595                          * one ref.
596                          */
597                         KKASSERT(ip->lock.refs >= 1);
598                         if (ip->lock.refs > 1) {
599                                 hammer_unref(&ip->lock);
600                                 break;
601                         }
602                 }
603         }
604
605         /*
606          * XXX bad hack until I add code to track inodes in SETUP.  We
607          * can queue a lot of inodes to the syncer but if we don't wake
608          * it up the undo sets will be too large or too many unflushed
609          * records will build up and blow our malloc limit.
610          */
611         if (++hmp->reclaim_count > 256) {
612                 hmp->reclaim_count = 0;
613                 hammer_flusher_async(hmp);
614         }
615 }
616
617 /*
618  * Unload and destroy the specified inode.  Must be called with one remaining
619  * reference.  The reference is disposed of.
620  *
621  * This can only be called in the context of the flusher.
622  */
623 static int
624 hammer_unload_inode(struct hammer_inode *ip)
625 {
626         KASSERT(ip->lock.refs == 1,
627                 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
628         KKASSERT(ip->vp == NULL);
629         KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
630         KKASSERT(ip->cursor_ip_refs == 0);
631         KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
632
633         KKASSERT(RB_EMPTY(&ip->rec_tree));
634         KKASSERT(TAILQ_EMPTY(&ip->target_list));
635         KKASSERT(TAILQ_EMPTY(&ip->bio_list));
636         KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
637
638         RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
639
640         hammer_uncache_node(&ip->cache[0]);
641         hammer_uncache_node(&ip->cache[1]);
642         if (ip->objid_cache)
643                 hammer_clear_objid(ip);
644         --hammer_count_inodes;
645         kfree(ip, M_HAMMER);
646
647         return(0);
648 }
649
650 /*
651  * A transaction has modified an inode, requiring updates as specified by
652  * the passed flags.
653  *
654  * HAMMER_INODE_RDIRTY: Inode record has been updated
655  * HAMMER_INODE_DDIRTY: Inode data has been updated
656  * HAMMER_INODE_XDIRTY: Dirty in-memory records
657  * HAMMER_INODE_BUFS:   Dirty front-end buffer cache buffers
658  * HAMMER_INODE_DELETED: Inode record/data must be deleted
659  * HAMMER_INODE_ITIMES: mtime/atime has been updated
660  */
661 void
662 hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
663 {
664         KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
665                   (flags & (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
666                    HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS|
667                    HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES)) == 0);
668
669         ip->flags |= flags;
670 }
671
672 /*
673  * Request that an inode be flushed.  This whole mess cannot block and may
674  * recurse.  Once requested HAMMER will attempt to actively flush it until
675  * the flush can be done.
676  *
677  * The inode may already be flushing, or may be in a setup state.  We can
678  * place the inode in a flushing state if it is currently idle and flag it
679  * to reflush if it is currently flushing.
680  */
681 void
682 hammer_flush_inode(hammer_inode_t ip, int flags)
683 {
684         hammer_record_t depend;
685         int r, good;
686
687         /*
688          * Trivial 'nothing to flush' case.  If the inode is ina SETUP
689          * state we have to put it back into an IDLE state so we can
690          * drop the extra ref.
691          */
692         if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
693             (flags & HAMMER_FLUSH_FORCE) == 0) {
694                 if (ip->flush_state == HAMMER_FST_SETUP) {
695                         ip->flush_state = HAMMER_FST_IDLE;
696                         hammer_rel_inode(ip, 0);
697                 }
698                 return;
699         }
700
701         /*
702          * Our flush action will depend on the current state.
703          */
704         switch(ip->flush_state) {
705         case HAMMER_FST_IDLE:
706                 /*
707                  * We have no dependancies and can flush immediately.  Some
708                  * our children may not be flushable so we have to re-test
709                  * with that additional knowledge.
710                  */
711                 hammer_flush_inode_core(ip, flags);
712                 break;
713         case HAMMER_FST_SETUP:
714                 /*
715                  * Recurse upwards through dependancies via target_list
716                  * and start their flusher actions going if possible.
717                  *
718                  * 'good' is our connectivity.  -1 means we have none and
719                  * can't flush, 0 means there weren't any dependancies, and
720                  * 1 means we have good connectivity.
721                  */
722                 good = 0;
723                 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
724                         r = hammer_setup_parent_inodes(depend);
725                         if (r < 0 && good == 0)
726                                 good = -1;
727                         if (r > 0)
728                                 good = 1;
729                 }
730
731                 /*
732                  * We can continue if good >= 0.  Determine how many records
733                  * under our inode can be flushed (and mark them).
734                  */
735                 kprintf("g%d", good);
736                 if (good >= 0) {
737                         hammer_flush_inode_core(ip, flags);
738                 } else {
739                         ip->flags |= HAMMER_INODE_REFLUSH;
740                 }
741                 break;
742         default:
743                 /*
744                  * We are already flushing, flag the inode to reflush
745                  * if needed after it completes its current flush.
746                  */
747                 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
748                         ip->flags |= HAMMER_INODE_REFLUSH;
749                 break;
750         }
751 }
752
753 /*
754  * We are asked to recurse upwards and convert the record from SETUP
755  * to FLUSH if possible.  record->ip is a parent of the caller's inode,
756  * and record->target_ip is the caller's inode.
757  *
758  * Return 1 if the record gives us connectivity
759  *
760  * Return 0 if the record is not relevant 
761  *
762  * Return -1 if we can't resolve the dependancy and there is no connectivity.
763  */
764 static int
765 hammer_setup_parent_inodes(hammer_record_t record)
766 {
767         hammer_mount_t hmp = record->ip->hmp;
768         hammer_record_t depend;
769         hammer_inode_t ip;
770         int r, good;
771
772         KKASSERT(record->flush_state != HAMMER_FST_IDLE);
773         ip = record->ip;
774
775         /*
776          * If the record is already flushing, is it in our flush group?
777          *
778          * If it is in our flush group but it is a delete-on-disk, it
779          * does not improve our connectivity (return 0), and if the
780          * target inode is not trying to destroy itself we can't allow
781          * the operation yet anyway (the second return -1).
782          */
783         if (record->flush_state == HAMMER_FST_FLUSH) {
784                 if (record->flush_group != hmp->flusher_next) {
785                         ip->flags |= HAMMER_INODE_REFLUSH;
786                         return(-1);
787                 }
788                 if (record->type == HAMMER_MEM_RECORD_ADD)
789                         return(1);
790                 return(0);
791         }
792
793         /*
794          * It must be a setup record.  Try to resolve the setup dependancies
795          * by recursing upwards so we can place ip on the flush list.
796          */
797         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
798
799         good = 0;
800         TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
801                 r = hammer_setup_parent_inodes(depend);
802                 if (r < 0 && good == 0)
803                         good = -1;
804                 if (r > 0)
805                         good = 1;
806         }
807
808         /*
809          * We can't flush ip because it has no connectivity (XXX also check
810          * nlinks for pre-existing connectivity!).  Flag it so any resolution
811          * recurses back down.
812          */
813         if (good < 0) {
814                 ip->flags |= HAMMER_INODE_REFLUSH;
815                 return(good);
816         }
817
818         /*
819          * We are go, place the parent inode in a flushing state so we can
820          * place its record in a flushing state.  Note that the parent
821          * may already be flushing.  The record must be in the same flush
822          * group as the parent.
823          */
824         if (ip->flush_state != HAMMER_FST_FLUSH)
825                 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
826         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
827         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
828
829 #if 0
830         if (record->type == HAMMER_MEM_RECORD_DEL &&
831             (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
832                 /*
833                  * Regardless of flushing state we cannot sync this path if the
834                  * record represents a delete-on-disk but the target inode
835                  * is not ready to sync its own deletion.
836                  *
837                  * XXX need to count effective nlinks to determine whether
838                  * the flush is ok, otherwise removing a hardlink will
839                  * just leave the DEL record to rot.
840                  */
841                 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
842                 return(-1);
843         } else
844 #endif
845         if (ip->flush_group == ip->hmp->flusher_next) {
846                 /*
847                  * This is the record we wanted to synchronize.
848                  */
849                 record->flush_state = HAMMER_FST_FLUSH;
850                 record->flush_group = ip->flush_group;
851                 hammer_ref(&record->lock);
852                 if (record->type == HAMMER_MEM_RECORD_ADD)
853                         return(1);
854
855                 /*
856                  * The record is a delete-n-disk.  It does not contribute
857                  * to our visibility.  We can still flush it.
858                  */
859                 return(0);
860         } else {
861                 /*
862                  * We couldn't resolve the dependancies, request that the
863                  * inode be flushed when the dependancies can be resolved.
864                  */
865                 ip->flags |= HAMMER_INODE_REFLUSH;
866                 return(-1);
867         }
868 }
869
870 /*
871  * This is the core routine placing an inode into the FST_FLUSH state.
872  */
873 static void
874 hammer_flush_inode_core(hammer_inode_t ip, int flags)
875 {
876         int go_count;
877         int error;
878
879         KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
880         if (ip->flush_state == HAMMER_FST_IDLE)
881                 hammer_ref(&ip->lock);
882         ip->flush_state = HAMMER_FST_FLUSH;
883         ip->flush_group = ip->hmp->flusher_next;
884
885         /*
886          * Figure out how many in-memory records we can actually flush
887          * (not including inode meta-data, buffers, etc).
888          */
889         if (flags & HAMMER_FLUSH_RECURSION) {
890                 go_count = 1;
891         } else {
892                 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
893                                    hammer_setup_child_callback, NULL);
894         }
895
896         /*
897          * This is a more involved test that includes go_count.  If we
898          * can't flush, flag the inode and return.  If go_count is 0 we
899          * were are unable to flush any records in our rec_tree and
900          * must ignore the XDIRTY flag.
901          */
902         if (go_count == 0) {
903                 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
904                         ip->flags |= HAMMER_INODE_REFLUSH;
905                         ip->flush_state = HAMMER_FST_SETUP;
906                         return;
907                 }
908         }
909
910         /*
911          * Inodes not in an IDLE state get an extra reference.
912          *
913          * Place the inode in a flush state and sync all frontend
914          * information to the backend.
915          */
916
917         if ((flags & HAMMER_FLUSH_RECURSION) == 0)  {
918                 if (ip->vp != NULL)
919                         error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
920                 else
921                         error = 0;
922         }
923
924         /*
925          * Any further strategy calls will go into the inode's alternative
926          * bioq.
927          */
928         ip->flags |= HAMMER_INODE_WRITE_ALT;
929
930         /*
931          * Snapshot the state of the inode for the backend flusher.
932          *
933          * The truncation must be retained in the frontend until after
934          * we've actually performed the record deletion.
935          *
936          * NOTE: The DELETING flag is a mod flag, but it is also sticky,
937          * and stays in ip->flags.  Once set, it stays set until the
938          * inode is destroyed.
939          */
940         ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
941         ip->sync_trunc_off = ip->trunc_off;
942         ip->sync_ino_rec = ip->ino_rec;
943         ip->sync_ino_data = ip->ino_data;
944         ip->flags &= ~HAMMER_INODE_MODMASK |
945                      HAMMER_INODE_TRUNCATED | HAMMER_INODE_BUFS;
946
947         /*
948          * Fix up the dirty buffer status.
949          */
950         if (ip->vp == NULL || RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL) {
951                 if (TAILQ_FIRST(&ip->bio_alt_list) == NULL)
952                         ip->flags &= ~HAMMER_INODE_BUFS;
953         }
954         if (TAILQ_FIRST(&ip->bio_list))
955                 ip->sync_flags |= HAMMER_INODE_BUFS;
956         else
957                 ip->sync_flags &= ~HAMMER_INODE_BUFS;
958
959         /*
960          * The flusher inherits our inode and reference.
961          */
962         TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
963
964         if (flags & HAMMER_FLUSH_SIGNAL)
965                 hammer_flusher_async(ip->hmp);
966 }
967
968 /*
969  * Callback for scan of ip->rec_tree.  Try to include each record in our
970  * flush.  ip->flush_group has been set but the inode has not yet been
971  * moved into a flushing state.
972  *
973  * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
974  * both inodes.
975  *
976  * We return 1 for any record placed or found in FST_FLUSH, which prevents
977  * the caller from shortcutting the flush.
978  */
979 static int
980 hammer_setup_child_callback(hammer_record_t rec, void *data)
981 {
982         hammer_inode_t target_ip;
983         hammer_inode_t ip;
984         int r;
985
986         /*
987          * If the record has been deleted by the backend (it's being held
988          * by the frontend in a race), just ignore it.
989          */
990         if (rec->flags & HAMMER_RECF_DELETED_BE)
991                 return(0);
992
993         /*
994          * If the record is in an idle state it has no dependancies and
995          * can be flushed.
996          */
997         ip = rec->ip;
998         r = 0;
999
1000         switch(rec->flush_state) {
1001         case HAMMER_FST_IDLE:
1002                 /*
1003                  * Record has no setup dependancy, we can flush it.
1004                  */
1005                 KKASSERT(rec->target_ip == NULL);
1006                 rec->flush_state = HAMMER_FST_FLUSH;
1007                 rec->flush_group = ip->flush_group;
1008                 hammer_ref(&rec->lock);
1009                 r = 1;
1010                 break;
1011         case HAMMER_FST_SETUP:
1012                 /*
1013                  * Record has a setup dependancy.  Try to include the
1014                  * target ip in the flush. 
1015                  *
1016                  * We have to be careful here, if we do not do the right
1017                  * thing we can lose track of dirty inodes and the system
1018                  * will lockup trying to allocate buffers.
1019                  */
1020                 target_ip = rec->target_ip;
1021                 KKASSERT(target_ip != NULL);
1022                 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1023                 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1024                         /*
1025                          * If the target IP is already flushing in our group
1026                          * we are golden, otherwise make sure the target
1027                          * reflushes.
1028                          */
1029                         if (target_ip->flush_group == ip->flush_group) {
1030                                 rec->flush_state = HAMMER_FST_FLUSH;
1031                                 rec->flush_group = ip->flush_group;
1032                                 hammer_ref(&rec->lock);
1033                                 r = 1;
1034                         } else {
1035                                 target_ip->flags |= HAMMER_INODE_REFLUSH;
1036                         }
1037                 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1038                         /*
1039                          * If the target IP is not flushing we can force
1040                          * it to flush, even if it is unable to write out
1041                          * any of its own records we have at least one in
1042                          * hand that we CAN deal with.
1043                          */
1044                         rec->flush_state = HAMMER_FST_FLUSH;
1045                         rec->flush_group = ip->flush_group;
1046                         hammer_ref(&rec->lock);
1047                         hammer_flush_inode_core(target_ip,
1048                                                 HAMMER_FLUSH_RECURSION);
1049                         r = 1;
1050                 } else {
1051                         /*
1052                          * XXX this needs help.  We have a delete-on-disk
1053                          * which could disconnect the target.  If the target
1054                          * has its own dependancies they really need to
1055                          * be flushed.
1056                          *
1057                          * XXX
1058                          */
1059                         rec->flush_state = HAMMER_FST_FLUSH;
1060                         rec->flush_group = ip->flush_group;
1061                         hammer_ref(&rec->lock);
1062                         hammer_flush_inode_core(target_ip,
1063                                                 HAMMER_FLUSH_RECURSION);
1064                         r = 1;
1065                 }
1066                 break;
1067         case HAMMER_FST_FLUSH:
1068                 /* 
1069                  * Record already associated with a flush group.  It had
1070                  * better be ours.
1071                  */
1072                 KKASSERT(rec->flush_group == ip->flush_group);
1073                 r = 1;
1074                 break;
1075         }
1076         return(r);
1077 }
1078
1079 /*
1080  * Wait for a previously queued flush to complete
1081  */
1082 void
1083 hammer_wait_inode(hammer_inode_t ip)
1084 {
1085         while (ip->flush_state == HAMMER_FST_FLUSH) {
1086                 ip->flags |= HAMMER_INODE_FLUSHW;
1087                 tsleep(&ip->flags, 0, "hmrwin", 0);
1088         }
1089 }
1090
1091 /*
1092  * Called by the backend code when a flush has been completed.
1093  * The inode has already been removed from the flush list.
1094  *
1095  * A pipelined flush can occur, in which case we must re-enter the
1096  * inode on the list and re-copy its fields.
1097  */
1098 void
1099 hammer_flush_inode_done(hammer_inode_t ip)
1100 {
1101         struct bio *bio;
1102         int dorel = 0;
1103
1104         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1105
1106         /*
1107          * Allow BIOs to queue to the inode's primary bioq again.
1108          */
1109         ip->flags &= ~HAMMER_INODE_WRITE_ALT;
1110
1111         /*
1112          * Merge left-over flags back into the frontend and fix the state.
1113          */
1114         ip->flags |= ip->sync_flags;
1115         if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1116                 ip->flush_state = HAMMER_FST_IDLE;
1117                 dorel = 1;
1118         } else {
1119                 ip->flush_state = HAMMER_FST_SETUP;
1120         }
1121
1122         /*
1123          * The backend may have adjusted nlinks, so if the adjusted nlinks
1124          * does not match the fronttend set the frontend's RDIRTY flag again.
1125          */
1126         if (ip->ino_rec.ino_nlinks != ip->sync_ino_rec.ino_nlinks)
1127                 ip->flags |= HAMMER_INODE_RDIRTY;
1128
1129         /*
1130          * Reflush any BIOs that wound up in the alt list.  Our inode will
1131          * also wind up at the end of the flusher's list.
1132          */
1133         while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) {
1134                 TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act);
1135                 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1136                 kprintf("d");
1137                 ip->flags |= HAMMER_INODE_BUFS;
1138                 ip->flags |= HAMMER_INODE_REFLUSH;
1139         }
1140
1141         /*
1142          * Re-set the XDIRTY flag if some of the inode's in-memory records
1143          * could not be flushed.
1144          */
1145         if (RB_ROOT(&ip->rec_tree)) {
1146                 ip->flags |= HAMMER_INODE_XDIRTY;
1147                 ip->flags |= HAMMER_INODE_REFLUSH;
1148                 kprintf("e");
1149         }
1150
1151         /*
1152          * If the frontend made more changes and requested another flush,
1153          * do it. 
1154          */
1155         if (ip->flags & HAMMER_INODE_REFLUSH) {
1156                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1157                 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1158                 if (ip->flush_state == HAMMER_FST_IDLE) {
1159                         if (ip->flags & HAMMER_INODE_FLUSHW) {
1160                                 ip->flags &= ~HAMMER_INODE_FLUSHW;
1161                                 wakeup(&ip->flags);
1162                         }
1163                 }
1164         } else {
1165                 if (ip->flags & HAMMER_INODE_FLUSHW) {
1166                         ip->flags &= ~HAMMER_INODE_FLUSHW;
1167                         wakeup(&ip->flags);
1168                 }
1169         }
1170         if (dorel)
1171                 hammer_rel_inode(ip, 0);
1172 }
1173
1174 /*
1175  * Called from hammer_sync_inode() to synchronize in-memory records
1176  * to the media.
1177  */
1178 static int
1179 hammer_sync_record_callback(hammer_record_t record, void *data)
1180 {
1181         hammer_transaction_t trans = data;
1182         int error;
1183
1184         /*
1185          * Skip records that do not belong to the current flush.
1186          */
1187         if (record->flush_state != HAMMER_FST_FLUSH)
1188                 return(0);
1189         KKASSERT((record->flags & HAMMER_RECF_DELETED_BE) == 0);
1190 #if 1
1191         if (record->flush_group != record->ip->flush_group) {
1192                 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1193                 Debugger("blah2");
1194                 return(0);
1195         }
1196 #endif
1197         KKASSERT(record->flush_group == record->ip->flush_group);
1198
1199         /*
1200          * Interlock the record using the BE flag.  Once BE is set the
1201          * frontend cannot change the state of FE.
1202          *
1203          * NOTE: If FE is set prior to us setting BE we still sync the
1204          * record out, but the flush completion code converts it to 
1205          * a delete-on-disk record instead of destroying it.
1206          */
1207         if (record->flags & HAMMER_RECF_INTERLOCK_BE) {
1208                 hammer_flush_record_done(record, 0);
1209                 return(0);
1210         }
1211         record->flags |= HAMMER_RECF_INTERLOCK_BE;
1212
1213         /*
1214          * If DELETED_FE is set we may have already sent dependant pieces
1215          * to the disk and we must flush the record as if it hadn't been
1216          * deleted.  This creates a bit of a mess because we have to
1217          * have ip_sync_record convert the record to MEM_RECORD_DEL before
1218          * it inserts the B-Tree record.  Otherwise the media sync might
1219          * be visible to the frontend.
1220          */
1221         if (record->flags & HAMMER_RECF_DELETED_FE) {
1222                 KKASSERT(record->type == HAMMER_MEM_RECORD_ADD);
1223                 record->flags |= HAMMER_RECF_CONVERT_DELETE;
1224         }
1225
1226         /*
1227          * Assign the create_tid for new records.  Deletions already
1228          * have the record's entire key properly set up.
1229          */
1230         if (record->type != HAMMER_MEM_RECORD_DEL)
1231                 record->rec.inode.base.base.create_tid = trans->tid;
1232         error = hammer_ip_sync_record(trans, record);
1233
1234         if (error) {
1235                 error = -error;
1236                 if (error != -ENOSPC) {
1237                         kprintf("hammer_sync_record_callback: sync failed rec "
1238                                 "%p, error %d\n", record, error);
1239                         Debugger("sync failed rec");
1240                 }
1241         }
1242         hammer_flush_record_done(record, error);
1243         return(error);
1244 }
1245
1246 /*
1247  * XXX error handling
1248  */
1249 int
1250 hammer_sync_inode(hammer_inode_t ip)
1251 {
1252         struct hammer_transaction trans;
1253         struct bio *bio;
1254         hammer_record_t depend;
1255         hammer_record_t next;
1256         int error, tmp_error;
1257         u_int64_t nlinks;
1258
1259         if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
1260                 return(0);
1261
1262         hammer_start_transaction_fls(&trans, ip->hmp);
1263
1264         /*
1265          * Any directory records referencing this inode which are not in
1266          * our current flush group must adjust our nlink count for the
1267          * purposes of synchronization to disk.
1268          *
1269          * Records which are in our flush group can be unlinked from our
1270          * inode now, allowing the inode to be physically deleted.
1271          */
1272         nlinks = ip->ino_rec.ino_nlinks;
1273         next = TAILQ_FIRST(&ip->target_list);
1274         while ((depend = next) != NULL) {
1275                 next = TAILQ_NEXT(depend, target_entry);
1276                 if (depend->flush_state == HAMMER_FST_FLUSH &&
1277                     depend->flush_group == ip->hmp->flusher_act) {
1278                         TAILQ_REMOVE(&ip->target_list, depend, target_entry);
1279                         depend->target_ip = NULL;
1280                         /* no need to signal target_ip, it is us */
1281                 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1282                         switch(depend->type) {
1283                         case HAMMER_MEM_RECORD_ADD:
1284                                 --nlinks;
1285                                 break;
1286                         case HAMMER_MEM_RECORD_DEL:
1287                                 ++nlinks;
1288                                 break;
1289                         }
1290                 }
1291         }
1292
1293         /*
1294          * Set dirty if we had to modify the link count.
1295          */
1296         if (ip->sync_ino_rec.ino_nlinks != nlinks) {
1297                 KKASSERT((int64_t)nlinks >= 0);
1298                 ip->sync_ino_rec.ino_nlinks = nlinks;
1299                 ip->sync_flags |= HAMMER_INODE_RDIRTY;
1300         }
1301
1302         /*
1303          * If there is a trunction queued destroy any data past the (aligned)
1304          * truncation point.  Userland will have dealt with the buffer
1305          * containing the truncation point for us.
1306          *
1307          * We don't flush pending frontend data buffers until after we've
1308          * dealth with the truncation.
1309          *
1310          * Don't bother if the inode is or has been deleted.
1311          */
1312         if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1313                 /*
1314                  * Interlock trunc_off.  The VOP front-end may continue to
1315                  * make adjustments to it while we are blocked.
1316                  */
1317                 off_t trunc_off;
1318                 off_t aligned_trunc_off;
1319
1320                 trunc_off = ip->sync_trunc_off;
1321                 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1322                                     ~HAMMER_BUFMASK64;
1323
1324                 /*
1325                  * Delete any whole blocks on-media.  The front-end has
1326                  * already cleaned out any partial block and made it
1327                  * pending.  The front-end may have updated trunc_off
1328                  * while we were blocked so do not just unconditionally
1329                  * set it to the maximum offset.
1330                  */
1331                 error = hammer_ip_delete_range(&trans, ip,
1332                                                 aligned_trunc_off,
1333                                                 0x7FFFFFFFFFFFFFFFLL);
1334                 if (error)
1335                         Debugger("hammer_ip_delete_range errored");
1336                 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1337                 if (ip->trunc_off >= trunc_off) {
1338                         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1339                         ip->flags &= ~HAMMER_INODE_TRUNCATED;
1340                 }
1341         } else {
1342                 error = 0;
1343         }
1344
1345         /*
1346          * Now sync related records.  These will typically be directory
1347          * entries or delete-on-disk records.
1348          *
1349          * Not all records will be flushed, but clear XDIRTY anyway.  We
1350          * will set it again in the frontend hammer_flush_inode_done() 
1351          * if records remain.
1352          */
1353         if (error == 0) {
1354                 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1355                                     hammer_sync_record_callback, &trans);
1356                 if (tmp_error < 0)
1357                         tmp_error = -error;
1358                 if (tmp_error)
1359                         error = tmp_error;
1360                 if (error == 0)
1361                         ip->sync_flags &= ~HAMMER_INODE_XDIRTY;
1362         }
1363
1364         /*
1365          * If we are deleting the inode the frontend had better not have
1366          * any active references on elements making up the inode.
1367          */
1368         if (error == 0 && ip->sync_ino_rec.ino_nlinks == 0 &&
1369                 RB_EMPTY(&ip->rec_tree)  &&
1370             (ip->sync_flags & HAMMER_INODE_DELETING) &&
1371             (ip->flags & HAMMER_INODE_DELETED) == 0) {
1372                 int count1 = 0;
1373
1374                 kprintf("Y");
1375                 ip->flags |= HAMMER_INODE_DELETED;
1376                 error = hammer_ip_delete_range_all(&trans, ip, &count1);
1377                 if (error == 0) {
1378                         ip->sync_flags &= ~HAMMER_INODE_DELETING;
1379                         ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1380                         KKASSERT(RB_EMPTY(&ip->rec_tree));
1381
1382                         /*
1383                          * Set delete_tid in both the frontend and backend
1384                          * copy of the inode record.  The DELETED flag handles
1385                          * this, do not set RDIRTY.
1386                          */
1387                         ip->ino_rec.base.base.delete_tid = trans.tid;
1388                         ip->sync_ino_rec.base.base.delete_tid = trans.tid;
1389
1390                         /*
1391                          * Adjust the inode count in the volume header
1392                          */
1393                         hammer_modify_volume(&trans, trans.rootvol, NULL, 0);
1394                         --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1395                         hammer_modify_volume_done(trans.rootvol);
1396                 } else {
1397                         ip->flags &= ~HAMMER_INODE_DELETED;
1398                         Debugger("hammer_ip_delete_range_all errored");
1399                 }
1400         }
1401
1402         /*
1403          * Flush any queued BIOs.  These will just biodone() the IO's if
1404          * the inode has been deleted.
1405          */
1406         while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) {
1407                 TAILQ_REMOVE(&ip->bio_list, bio, bio_act);
1408                 tmp_error = hammer_dowrite(&trans, ip, bio);
1409                 if (tmp_error)
1410                         error = tmp_error;
1411         }
1412         ip->sync_flags &= ~HAMMER_INODE_BUFS;
1413
1414         if (error)
1415                 Debugger("RB_SCAN errored");
1416
1417         /*
1418          * Now update the inode's on-disk inode-data and/or on-disk record.
1419          * DELETED and ONDISK are managed only in ip->flags.
1420          */
1421         switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1422         case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1423                 /*
1424                  * If deleted and on-disk, don't set any additional flags.
1425                  * the delete flag takes care of things.
1426                  *
1427                  * Clear flags which may have been set by the frontend.
1428                  */
1429                 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1430                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1431                                     HAMMER_INODE_DELETING);
1432                 break;
1433         case HAMMER_INODE_DELETED:
1434                 /*
1435                  * Take care of the case where a deleted inode was never
1436                  * flushed to the disk in the first place.
1437                  *
1438                  * Clear flags which may have been set by the frontend.
1439                  */
1440                 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1441                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1442                                     HAMMER_INODE_DELETING);
1443                 while (RB_ROOT(&ip->rec_tree)) {
1444                         hammer_record_t record = RB_ROOT(&ip->rec_tree);
1445                         hammer_ref(&record->lock);
1446                         KKASSERT(record->lock.refs == 1);
1447                         record->flags |= HAMMER_RECF_DELETED_FE;
1448                         record->flags |= HAMMER_RECF_DELETED_BE;
1449                         hammer_rel_mem_record(record);
1450                 }
1451                 break;
1452         case HAMMER_INODE_ONDISK:
1453                 /*
1454                  * If already on-disk, do not set any additional flags.
1455                  */
1456                 break;
1457         default:
1458                 /*
1459                  * If not on-disk and not deleted, set both dirty flags
1460                  * to force an initial record to be written.  Also set
1461                  * the create_tid for the inode.
1462                  *
1463                  * Set create_tid in both the frontend and backend
1464                  * copy of the inode record.
1465                  */
1466                 ip->ino_rec.base.base.create_tid = trans.tid;
1467                 ip->sync_ino_rec.base.base.create_tid = trans.tid;
1468                 ip->sync_flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
1469                 break;
1470         }
1471
1472         /*
1473          * If RDIRTY or DDIRTY is set, write out a new record.  If the inode
1474          * is already on-disk the old record is marked as deleted.
1475          *
1476          * If DELETED is set hammer_update_inode() will delete the existing
1477          * record without writing out a new one.
1478          *
1479          * If *ONLY* the ITIMES flag is set we can update the record in-place.
1480          */
1481         if (ip->flags & HAMMER_INODE_DELETED) {
1482                 error = hammer_update_inode(&trans, ip);
1483         } else 
1484         if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1485                                HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) {
1486                 error = hammer_update_itimes(&trans, ip);
1487         } else
1488         if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1489                               HAMMER_INODE_ITIMES)) {
1490                 error = hammer_update_inode(&trans, ip);
1491         }
1492         if (error)
1493                 Debugger("hammer_update_itimes/inode errored");
1494
1495         /*
1496          * Save the TID we used to sync the inode with to make sure we
1497          * do not improperly reuse it.
1498          */
1499         hammer_done_transaction(&trans);
1500         return(error);
1501 }
1502
1503 /*
1504  * This routine is called when the OS is no longer actively referencing
1505  * the inode (but might still be keeping it cached), or when releasing
1506  * the last reference to an inode.
1507  *
1508  * At this point if the inode's nlinks count is zero we want to destroy
1509  * it, which may mean destroying it on-media too.
1510  */
1511 static int
1512 hammer_inode_unloadable_check(hammer_inode_t ip)
1513 {
1514         /*
1515          * If the inode is on-media and the link count is 0 we MUST delete
1516          * it on-media.  DELETING is a mod flag, DELETED is a state flag.
1517          */
1518         if (ip->ino_rec.ino_nlinks == 0 &&
1519             (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
1520                 if (ip->vp) {
1521                         vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1522                         vnode_pager_setsize(ip->vp, 0);
1523                 }
1524                 ip->flags |= HAMMER_INODE_DELETING;
1525                 ip->flags |= HAMMER_INODE_TRUNCATED;
1526                 ip->trunc_off = 0;
1527         }
1528
1529         /*
1530          * If only one ref remains and the inode is not dirty, telling
1531          * the caller that he can dispose of the inode.
1532          */
1533         if (ip->lock.refs == 1 && (ip->flags & HAMMER_INODE_MODMASK) == 0)
1534                 return(1);
1535         return(0);
1536 }
1537
1538 void
1539 hammer_test_inode(hammer_inode_t ip)
1540 {
1541         if (ip->flags & HAMMER_INODE_REFLUSH) {
1542                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1543                 hammer_ref(&ip->lock);
1544                 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1545                 hammer_rel_inode(ip, 0);
1546         }
1547 }
1548