fb74ed5911e7a31ca34437428f1c6699383bc2c2
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.74 2008/06/13 00:25:33 dillon Exp $
35  */
36
37 #include "hammer.h"
38 #include <vm/vm_extern.h>
39 #include <sys/buf.h>
40 #include <sys/buf2.h>
41
42 static int      hammer_unload_inode(struct hammer_inode *ip);
43 static void     hammer_flush_inode_core(hammer_inode_t ip, int flags);
44 static int      hammer_setup_child_callback(hammer_record_t rec, void *data);
45 static int      hammer_setup_parent_inodes(hammer_record_t record);
46 static void     hammer_inode_wakereclaims(hammer_inode_t ip);
47
48 #ifdef DEBUG_TRUNCATE
49 extern struct hammer_inode *HammerTruncIp;
50 #endif
51
52 /*
53  * The kernel is not actively referencing this vnode but is still holding
54  * it cached.
55  *
56  * This is called from the frontend.
57  */
58 int
59 hammer_vop_inactive(struct vop_inactive_args *ap)
60 {
61         struct hammer_inode *ip = VTOI(ap->a_vp);
62
63         /*
64          * Degenerate case
65          */
66         if (ip == NULL) {
67                 vrecycle(ap->a_vp);
68                 return(0);
69         }
70
71         /*
72          * If the inode no longer has visibility in the filesystem and is
73          * fairly clean, try to recycle it immediately.  This can deadlock
74          * in vfsync() if we aren't careful.
75          * 
76          * Do not queue the inode to the flusher if we still have visibility,
77          * otherwise namespace calls such as chmod will unnecessarily generate
78          * multiple inode updates.
79          */
80         hammer_inode_unloadable_check(ip, 0);
81         if (ip->ino_data.nlinks == 0) {
82                 if (ip->flags & HAMMER_INODE_MODMASK)
83                         hammer_flush_inode(ip, 0);
84                 else
85                         vrecycle(ap->a_vp);
86         }
87         return(0);
88 }
89
90 /*
91  * Release the vnode association.  This is typically (but not always)
92  * the last reference on the inode.
93  *
94  * Once the association is lost we are on our own with regards to
95  * flushing the inode.
96  */
97 int
98 hammer_vop_reclaim(struct vop_reclaim_args *ap)
99 {
100         struct hammer_reclaim reclaim;
101         struct hammer_inode *ip;
102         hammer_mount_t hmp;
103         struct vnode *vp;
104         int delay;
105
106         vp = ap->a_vp;
107
108         if ((ip = vp->v_data) != NULL) {
109                 hmp = ip->hmp;
110                 vp->v_data = NULL;
111                 ip->vp = NULL;
112
113                 /*
114                  * Setup our reclaim pipeline.  We only let so many detached
115                  * (and dirty) inodes build up before we start blocking.  Do
116                  * not bother tracking the immediate increment/decrement if
117                  * the inode is not actually dirty.
118                  *
119                  * When we block we don't care *which* inode has finished
120                  * reclaiming, as lone as one does.
121                  */
122                 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0 &&
123                     ((ip->flags|ip->sync_flags) & HAMMER_INODE_MODMASK)) {
124                         ++hammer_count_reclaiming;
125                         ++hmp->inode_reclaims;
126                         ip->flags |= HAMMER_INODE_RECLAIM;
127                         if (hmp->inode_reclaims > HAMMER_RECLAIM_PIPESIZE) {
128                                 reclaim.okydoky = 0;
129                                 TAILQ_INSERT_TAIL(&hmp->reclaim_list,
130                                                   &reclaim, entry);
131                         } else {
132                                 reclaim.okydoky = 1;
133                         }
134                 } else {
135                         reclaim.okydoky = 1;
136                 }
137                 hammer_rel_inode(ip, 1);
138
139                 /*
140                  * Reclaim pipeline.  We can't let too many reclaimed inodes
141                  * build-up in the flusher or the flusher loses its locality
142                  * of reference, or worse blows out our memory.  Once we have
143                  * exceeded the reclaim pipe size start slowing down.  Our
144                  * imposed delay can be cut short if the flusher catches up
145                  * to us.
146                  */
147                 if (reclaim.okydoky == 0) {
148                         delay = (hmp->inode_reclaims -
149                                  HAMMER_RECLAIM_PIPESIZE) * hz /
150                                 HAMMER_RECLAIM_PIPESIZE;
151                         if (delay <= 0)
152                                 delay = 1;
153                         hammer_flusher_async(hmp);
154                         if (reclaim.okydoky == 0) {
155                                 tsleep(&reclaim, 0, "hmrrcm", delay);
156                         }
157                         if (reclaim.okydoky == 0) {
158                                 TAILQ_REMOVE(&hmp->reclaim_list, &reclaim,
159                                              entry);
160                         }
161                 }
162         }
163         return(0);
164 }
165
166 /*
167  * Return a locked vnode for the specified inode.  The inode must be
168  * referenced but NOT LOCKED on entry and will remain referenced on
169  * return.
170  *
171  * Called from the frontend.
172  */
173 int
174 hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
175 {
176         hammer_mount_t hmp;
177         struct vnode *vp;
178         int error = 0;
179
180         hmp = ip->hmp;
181
182         for (;;) {
183                 if ((vp = ip->vp) == NULL) {
184                         error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
185                         if (error)
186                                 break;
187                         hammer_lock_ex(&ip->lock);
188                         if (ip->vp != NULL) {
189                                 hammer_unlock(&ip->lock);
190                                 vp->v_type = VBAD;
191                                 vx_put(vp);
192                                 continue;
193                         }
194                         hammer_ref(&ip->lock);
195                         vp = *vpp;
196                         ip->vp = vp;
197                         vp->v_type =
198                                 hammer_get_vnode_type(ip->ino_data.obj_type);
199
200                         hammer_inode_wakereclaims(ip);
201
202                         switch(ip->ino_data.obj_type) {
203                         case HAMMER_OBJTYPE_CDEV:
204                         case HAMMER_OBJTYPE_BDEV:
205                                 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
206                                 addaliasu(vp, ip->ino_data.rmajor,
207                                           ip->ino_data.rminor);
208                                 break;
209                         case HAMMER_OBJTYPE_FIFO:
210                                 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
211                                 break;
212                         default:
213                                 break;
214                         }
215
216                         /*
217                          * Only mark as the root vnode if the ip is not
218                          * historical, otherwise the VFS cache will get
219                          * confused.  The other half of the special handling
220                          * is in hammer_vop_nlookupdotdot().
221                          */
222                         if (ip->obj_id == HAMMER_OBJID_ROOT &&
223                             ip->obj_asof == hmp->asof) {
224                                 vp->v_flag |= VROOT;
225                         }
226
227                         vp->v_data = (void *)ip;
228                         /* vnode locked by getnewvnode() */
229                         /* make related vnode dirty if inode dirty? */
230                         hammer_unlock(&ip->lock);
231                         if (vp->v_type == VREG)
232                                 vinitvmio(vp, ip->ino_data.size);
233                         break;
234                 }
235
236                 /*
237                  * loop if the vget fails (aka races), or if the vp
238                  * no longer matches ip->vp.
239                  */
240                 if (vget(vp, LK_EXCLUSIVE) == 0) {
241                         if (vp == ip->vp)
242                                 break;
243                         vput(vp);
244                 }
245         }
246         *vpp = vp;
247         return(error);
248 }
249
250 /*
251  * Acquire a HAMMER inode.  The returned inode is not locked.  These functions
252  * do not attach or detach the related vnode (use hammer_get_vnode() for
253  * that).
254  *
255  * The flags argument is only applied for newly created inodes, and only
256  * certain flags are inherited.
257  *
258  * Called from the frontend.
259  */
260 struct hammer_inode *
261 hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
262                  u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
263 {
264         hammer_mount_t hmp = trans->hmp;
265         struct hammer_inode_info iinfo;
266         struct hammer_cursor cursor;
267         struct hammer_inode *ip;
268
269         /*
270          * Determine if we already have an inode cached.  If we do then
271          * we are golden.
272          */
273         iinfo.obj_id = obj_id;
274         iinfo.obj_asof = asof;
275 loop:
276         ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
277         if (ip) {
278                 hammer_ref(&ip->lock);
279                 *errorp = 0;
280                 return(ip);
281         }
282
283         /*
284          * Allocate a new inode structure and deal with races later.
285          */
286         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
287         ++hammer_count_inodes;
288         ++hmp->count_inodes;
289         ip->obj_id = obj_id;
290         ip->obj_asof = iinfo.obj_asof;
291         ip->hmp = hmp;
292         ip->flags = flags & HAMMER_INODE_RO;
293         if (hmp->ronly)
294                 ip->flags |= HAMMER_INODE_RO;
295         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
296         RB_INIT(&ip->rec_tree);
297         TAILQ_INIT(&ip->target_list);
298
299         /*
300          * Locate the on-disk inode.
301          */
302 retry:
303         hammer_init_cursor(trans, &cursor, cache, NULL);
304         cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
305         cursor.key_beg.obj_id = ip->obj_id;
306         cursor.key_beg.key = 0;
307         cursor.key_beg.create_tid = 0;
308         cursor.key_beg.delete_tid = 0;
309         cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
310         cursor.key_beg.obj_type = 0;
311         cursor.asof = iinfo.obj_asof;
312         cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
313                        HAMMER_CURSOR_ASOF;
314
315         *errorp = hammer_btree_lookup(&cursor);
316         if (*errorp == EDEADLK) {
317                 hammer_done_cursor(&cursor);
318                 goto retry;
319         }
320
321         /*
322          * On success the B-Tree lookup will hold the appropriate
323          * buffer cache buffers and provide a pointer to the requested
324          * information.  Copy the information to the in-memory inode
325          * and cache the B-Tree node to improve future operations.
326          */
327         if (*errorp == 0) {
328                 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
329                 ip->ino_data = cursor.data->inode;
330                 hammer_cache_node(cursor.node, &ip->cache[0]);
331                 if (cache)
332                         hammer_cache_node(cursor.node, cache);
333         }
334
335         /*
336          * On success load the inode's record and data and insert the
337          * inode into the B-Tree.  It is possible to race another lookup
338          * insertion of the same inode so deal with that condition too.
339          *
340          * The cursor's locked node interlocks against others creating and
341          * destroying ip while we were blocked.
342          */
343         if (*errorp == 0) {
344                 hammer_ref(&ip->lock);
345                 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
346                         hammer_uncache_node(&ip->cache[0]);
347                         hammer_uncache_node(&ip->cache[1]);
348                         KKASSERT(ip->lock.refs == 1);
349                         --hammer_count_inodes;
350                         --hmp->count_inodes;
351                         kfree(ip, M_HAMMER);
352                         hammer_done_cursor(&cursor);
353                         goto loop;
354                 }
355                 ip->flags |= HAMMER_INODE_ONDISK;
356         } else {
357                 /*
358                  * Do not panic on read-only accesses which fail, particularly
359                  * historical accesses where the snapshot might not have
360                  * complete connectivity.
361                  */
362                 if ((flags & HAMMER_INODE_RO) == 0) {
363                         kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
364                                 ip, ip->obj_id, &cursor, *errorp);
365                         Debugger("x");
366                 }
367                 if (ip->flags & HAMMER_INODE_RSV_INODES) {
368                         ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
369                         --hmp->rsv_inodes;
370                 }
371                 hmp->rsv_databufs -= ip->rsv_databufs;
372                 ip->rsv_databufs = 0;                          /* sanity */
373
374                 --hammer_count_inodes;
375                 --hmp->count_inodes;
376                 kfree(ip, M_HAMMER);
377                 ip = NULL;
378         }
379         hammer_done_cursor(&cursor);
380         return (ip);
381 }
382
383 /*
384  * Create a new filesystem object, returning the inode in *ipp.  The
385  * returned inode will be referenced.
386  *
387  * The inode is created in-memory.
388  */
389 int
390 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
391                     struct ucred *cred, hammer_inode_t dip,
392                     struct hammer_inode **ipp)
393 {
394         hammer_mount_t hmp;
395         hammer_inode_t ip;
396         uid_t xuid;
397
398         hmp = trans->hmp;
399         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
400         ++hammer_count_inodes;
401         ++hmp->count_inodes;
402         ip->obj_id = hammer_alloc_objid(trans, dip);
403         KKASSERT(ip->obj_id != 0);
404         ip->obj_asof = hmp->asof;
405         ip->hmp = hmp;
406         ip->flush_state = HAMMER_FST_IDLE;
407         ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
408
409         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
410         RB_INIT(&ip->rec_tree);
411         TAILQ_INIT(&ip->target_list);
412
413         ip->ino_leaf.atime = trans->time;
414         ip->ino_data.mtime = trans->time;
415         ip->ino_data.size = 0;
416         ip->ino_data.nlinks = 0;
417
418         /*
419          * A nohistory designator on the parent directory is inherited by
420          * the child.
421          */
422         ip->ino_data.uflags = dip->ino_data.uflags &
423                               (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
424
425         ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
426         ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
427         ip->ino_leaf.base.obj_id = ip->obj_id;
428         ip->ino_leaf.base.key = 0;
429         ip->ino_leaf.base.create_tid = 0;
430         ip->ino_leaf.base.delete_tid = 0;
431         ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
432         ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
433
434         ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
435         ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
436         ip->ino_data.mode = vap->va_mode;
437         ip->ino_data.ctime = trans->time;
438         ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
439
440         switch(ip->ino_leaf.base.obj_type) {
441         case HAMMER_OBJTYPE_CDEV:
442         case HAMMER_OBJTYPE_BDEV:
443                 ip->ino_data.rmajor = vap->va_rmajor;
444                 ip->ino_data.rminor = vap->va_rminor;
445                 break;
446         default:
447                 break;
448         }
449
450         /*
451          * Calculate default uid/gid and overwrite with information from
452          * the vap.
453          */
454         xuid = hammer_to_unix_xid(&dip->ino_data.uid);
455         xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
456                                      &vap->va_mode);
457         ip->ino_data.mode = vap->va_mode;
458
459         if (vap->va_vaflags & VA_UID_UUID_VALID)
460                 ip->ino_data.uid = vap->va_uid_uuid;
461         else if (vap->va_uid != (uid_t)VNOVAL)
462                 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
463         else
464                 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
465
466         if (vap->va_vaflags & VA_GID_UUID_VALID)
467                 ip->ino_data.gid = vap->va_gid_uuid;
468         else if (vap->va_gid != (gid_t)VNOVAL)
469                 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
470         else
471                 ip->ino_data.gid = dip->ino_data.gid;
472
473         hammer_ref(&ip->lock);
474         if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
475                 hammer_unref(&ip->lock);
476                 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
477         }
478         *ipp = ip;
479         return(0);
480 }
481
482 /*
483  * Called by hammer_sync_inode().
484  */
485 static int
486 hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
487 {
488         hammer_transaction_t trans = cursor->trans;
489         hammer_record_t record;
490         int error;
491
492 retry:
493         error = 0;
494
495         /*
496          * If the inode has a presence on-disk then locate it and mark
497          * it deleted, setting DELONDISK.
498          *
499          * The record may or may not be physically deleted, depending on
500          * the retention policy.
501          */
502         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
503             HAMMER_INODE_ONDISK) {
504                 hammer_normalize_cursor(cursor);
505                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
506                 cursor->key_beg.obj_id = ip->obj_id;
507                 cursor->key_beg.key = 0;
508                 cursor->key_beg.create_tid = 0;
509                 cursor->key_beg.delete_tid = 0;
510                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
511                 cursor->key_beg.obj_type = 0;
512                 cursor->asof = ip->obj_asof;
513                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
514                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
515                 cursor->flags |= HAMMER_CURSOR_BACKEND;
516
517                 error = hammer_btree_lookup(cursor);
518                 if (hammer_debug_inode)
519                         kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
520                 if (error) {
521                         kprintf("error %d\n", error);
522                         Debugger("hammer_update_inode");
523                 }
524
525                 if (error == 0) {
526                         error = hammer_ip_delete_record(cursor, ip, trans->tid);
527                         if (hammer_debug_inode)
528                                 kprintf(" error %d\n", error);
529                         if (error && error != EDEADLK) {
530                                 kprintf("error %d\n", error);
531                                 Debugger("hammer_update_inode2");
532                         }
533                         if (error == 0) {
534                                 ip->flags |= HAMMER_INODE_DELONDISK;
535                         }
536                         if (cursor->node)
537                                 hammer_cache_node(cursor->node, &ip->cache[0]);
538                 }
539                 if (error == EDEADLK) {
540                         hammer_done_cursor(cursor);
541                         error = hammer_init_cursor(trans, cursor,
542                                                    &ip->cache[0], ip);
543                         if (hammer_debug_inode)
544                                 kprintf("IPDED %p %d\n", ip, error);
545                         if (error == 0)
546                                 goto retry;
547                 }
548         }
549
550         /*
551          * Ok, write out the initial record or a new record (after deleting
552          * the old one), unless the DELETED flag is set.  This routine will
553          * clear DELONDISK if it writes out a record.
554          *
555          * Update our inode statistics if this is the first application of
556          * the inode on-disk.
557          */
558         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
559                 /*
560                  * Generate a record and write it to the media
561                  */
562                 record = hammer_alloc_mem_record(ip, 0);
563                 record->type = HAMMER_MEM_RECORD_INODE;
564                 record->flush_state = HAMMER_FST_FLUSH;
565                 record->leaf = ip->sync_ino_leaf;
566                 record->leaf.base.create_tid = trans->tid;
567                 record->leaf.data_len = sizeof(ip->sync_ino_data);
568                 record->data = (void *)&ip->sync_ino_data;
569                 record->flags |= HAMMER_RECF_INTERLOCK_BE;
570                 for (;;) {
571                         error = hammer_ip_sync_record_cursor(cursor, record);
572                         if (hammer_debug_inode)
573                                 kprintf("GENREC %p rec %08x %d\n",      
574                                         ip, record->flags, error);
575                         if (error != EDEADLK)
576                                 break;
577                         hammer_done_cursor(cursor);
578                         error = hammer_init_cursor(trans, cursor,
579                                                    &ip->cache[0], ip);
580                         if (hammer_debug_inode)
581                                 kprintf("GENREC reinit %d\n", error);
582                         if (error)
583                                 break;
584                 }
585                 if (error) {
586                         kprintf("error %d\n", error);
587                         Debugger("hammer_update_inode3");
588                 }
589
590                 /*
591                  * The record isn't managed by the inode's record tree,
592                  * destroy it whether we succeed or fail.
593                  */
594                 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
595                 record->flags |= HAMMER_RECF_DELETED_FE;
596                 record->flush_state = HAMMER_FST_IDLE;
597                 hammer_rel_mem_record(record);
598
599                 /*
600                  * Finish up.
601                  */
602                 if (error == 0) {
603                         if (hammer_debug_inode)
604                                 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
605                         ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
606                                             HAMMER_INODE_ITIMES);
607                         ip->flags &= ~HAMMER_INODE_DELONDISK;
608
609                         /*
610                          * Root volume count of inodes
611                          */
612                         if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
613                                 hammer_modify_volume_field(trans,
614                                                            trans->rootvol,
615                                                            vol0_stat_inodes);
616                                 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
617                                 hammer_modify_volume_done(trans->rootvol);
618                                 ip->flags |= HAMMER_INODE_ONDISK;
619                                 if (hammer_debug_inode)
620                                         kprintf("NOWONDISK %p\n", ip);
621                         }
622                 }
623         }
624
625         /*
626          * If the inode has been destroyed, clean out any left-over flags
627          * that may have been set by the frontend.
628          */
629         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) { 
630                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
631                                     HAMMER_INODE_ITIMES);
632         }
633         return(error);
634 }
635
636 /*
637  * Update only the itimes fields.  This is done no-historically.  The
638  * record is updated in-place on the disk.
639  */
640 static int
641 hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
642 {
643         hammer_transaction_t trans = cursor->trans;
644         struct hammer_btree_leaf_elm *leaf;
645         int error;
646
647 retry:
648         error = 0;
649         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
650             HAMMER_INODE_ONDISK) {
651                 hammer_normalize_cursor(cursor);
652                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
653                 cursor->key_beg.obj_id = ip->obj_id;
654                 cursor->key_beg.key = 0;
655                 cursor->key_beg.create_tid = 0;
656                 cursor->key_beg.delete_tid = 0;
657                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
658                 cursor->key_beg.obj_type = 0;
659                 cursor->asof = ip->obj_asof;
660                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
661                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
662                 cursor->flags |= HAMMER_CURSOR_BACKEND;
663
664                 error = hammer_btree_lookup(cursor);
665                 if (error) {
666                         kprintf("error %d\n", error);
667                         Debugger("hammer_update_itimes1");
668                 }
669                 if (error == 0) {
670                         /*
671                          * Do not generate UNDO records for atime updates.
672                          */
673                         leaf = cursor->leaf;
674                         hammer_modify_node(trans, cursor->node, 
675                                            &leaf->atime, sizeof(leaf->atime));
676                         leaf->atime = ip->sync_ino_leaf.atime;
677                         hammer_modify_node_done(cursor->node);
678                         /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
679                         ip->sync_flags &= ~HAMMER_INODE_ITIMES;
680                         /* XXX recalculate crc */
681                         hammer_cache_node(cursor->node, &ip->cache[0]);
682                 }
683                 if (error == EDEADLK) {
684                         hammer_done_cursor(cursor);
685                         error = hammer_init_cursor(trans, cursor,
686                                                    &ip->cache[0], ip);
687                         if (error == 0)
688                                 goto retry;
689                 }
690         }
691         return(error);
692 }
693
694 /*
695  * Release a reference on an inode, flush as requested.
696  *
697  * On the last reference we queue the inode to the flusher for its final
698  * disposition.
699  */
700 void
701 hammer_rel_inode(struct hammer_inode *ip, int flush)
702 {
703         hammer_mount_t hmp = ip->hmp;
704
705         /*
706          * Handle disposition when dropping the last ref.
707          */
708         for (;;) {
709                 if (ip->lock.refs == 1) {
710                         /*
711                          * Determine whether on-disk action is needed for
712                          * the inode's final disposition.
713                          */
714                         KKASSERT(ip->vp == NULL);
715                         hammer_inode_unloadable_check(ip, 0);
716                         if (ip->flags & HAMMER_INODE_MODMASK) {
717                                 if (hmp->rsv_inodes > desiredvnodes) {
718                                         hammer_flush_inode(ip,
719                                                            HAMMER_FLUSH_SIGNAL);
720                                 } else {
721                                         hammer_flush_inode(ip, 0);
722                                 }
723                         } else if (ip->lock.refs == 1) {
724                                 hammer_unload_inode(ip);
725                                 break;
726                         }
727                 } else {
728                         if (flush)
729                                 hammer_flush_inode(ip, 0);
730
731                         /*
732                          * The inode still has multiple refs, try to drop
733                          * one ref.
734                          */
735                         KKASSERT(ip->lock.refs >= 1);
736                         if (ip->lock.refs > 1) {
737                                 hammer_unref(&ip->lock);
738                                 break;
739                         }
740                 }
741         }
742 }
743
744 /*
745  * Unload and destroy the specified inode.  Must be called with one remaining
746  * reference.  The reference is disposed of.
747  *
748  * This can only be called in the context of the flusher.
749  */
750 static int
751 hammer_unload_inode(struct hammer_inode *ip)
752 {
753         hammer_mount_t hmp = ip->hmp;
754
755         KASSERT(ip->lock.refs == 1,
756                 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
757         KKASSERT(ip->vp == NULL);
758         KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
759         KKASSERT(ip->cursor_ip_refs == 0);
760         KKASSERT(ip->lock.lockcount == 0);
761         KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
762
763         KKASSERT(RB_EMPTY(&ip->rec_tree));
764         KKASSERT(TAILQ_EMPTY(&ip->target_list));
765
766         RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
767
768         hammer_uncache_node(&ip->cache[0]);
769         hammer_uncache_node(&ip->cache[1]);
770         if (ip->objid_cache)
771                 hammer_clear_objid(ip);
772         --hammer_count_inodes;
773         --hmp->count_inodes;
774
775         hammer_inode_wakereclaims(ip);
776         kfree(ip, M_HAMMER);
777
778         return(0);
779 }
780
781 /*
782  * Called on mount -u when switching from RW to RO or vise-versa.  Adjust
783  * the read-only flag for cached inodes.
784  *
785  * This routine is called from a RB_SCAN().
786  */
787 int
788 hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
789 {
790         hammer_mount_t hmp = ip->hmp;
791
792         if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
793                 ip->flags |= HAMMER_INODE_RO;
794         else
795                 ip->flags &= ~HAMMER_INODE_RO;
796         return(0);
797 }
798
799 /*
800  * A transaction has modified an inode, requiring updates as specified by
801  * the passed flags.
802  *
803  * HAMMER_INODE_DDIRTY: Inode data has been updated
804  * HAMMER_INODE_XDIRTY: Dirty in-memory records
805  * HAMMER_INODE_BUFS:   Dirty buffer cache buffers
806  * HAMMER_INODE_DELETED: Inode record/data must be deleted
807  * HAMMER_INODE_ITIMES: mtime/atime has been updated
808  */
809 void
810 hammer_modify_inode(hammer_inode_t ip, int flags)
811 {
812         KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
813                   (flags & (HAMMER_INODE_DDIRTY |
814                             HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
815                             HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
816         if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
817                 ip->flags |= HAMMER_INODE_RSV_INODES;
818                 ++ip->hmp->rsv_inodes;
819         }
820
821         ip->flags |= flags;
822 }
823
824 /*
825  * Request that an inode be flushed.  This whole mess cannot block and may
826  * recurse (if not synchronous).  Once requested HAMMER will attempt to
827  * actively flush the inode until the flush can be done.
828  *
829  * The inode may already be flushing, or may be in a setup state.  We can
830  * place the inode in a flushing state if it is currently idle and flag it
831  * to reflush if it is currently flushing.
832  *
833  * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to
834  * flush the indoe synchronously using the caller's context.
835  */
836 void
837 hammer_flush_inode(hammer_inode_t ip, int flags)
838 {
839         hammer_record_t depend;
840         int r, good;
841
842         /*
843          * Trivial 'nothing to flush' case.  If the inode is ina SETUP
844          * state we have to put it back into an IDLE state so we can
845          * drop the extra ref.
846          */
847         if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
848                 if (ip->flush_state == HAMMER_FST_SETUP) {
849                         ip->flush_state = HAMMER_FST_IDLE;
850                         hammer_rel_inode(ip, 0);
851                 }
852                 return;
853         }
854
855         /*
856          * Our flush action will depend on the current state.
857          */
858         switch(ip->flush_state) {
859         case HAMMER_FST_IDLE:
860                 /*
861                  * We have no dependancies and can flush immediately.  Some
862                  * our children may not be flushable so we have to re-test
863                  * with that additional knowledge.
864                  */
865                 hammer_flush_inode_core(ip, flags);
866                 break;
867         case HAMMER_FST_SETUP:
868                 /*
869                  * Recurse upwards through dependancies via target_list
870                  * and start their flusher actions going if possible.
871                  *
872                  * 'good' is our connectivity.  -1 means we have none and
873                  * can't flush, 0 means there weren't any dependancies, and
874                  * 1 means we have good connectivity.
875                  */
876                 good = 0;
877                 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
878                         r = hammer_setup_parent_inodes(depend);
879                         if (r < 0 && good == 0)
880                                 good = -1;
881                         if (r > 0)
882                                 good = 1;
883                 }
884
885                 /*
886                  * We can continue if good >= 0.  Determine how many records
887                  * under our inode can be flushed (and mark them).
888                  */
889                 if (good >= 0) {
890                         hammer_flush_inode_core(ip, flags);
891                 } else {
892                         ip->flags |= HAMMER_INODE_REFLUSH;
893                         if (flags & HAMMER_FLUSH_SIGNAL) {
894                                 ip->flags |= HAMMER_INODE_RESIGNAL;
895                                 hammer_flusher_async(ip->hmp);
896                         }
897                 }
898                 break;
899         default:
900                 /*
901                  * We are already flushing, flag the inode to reflush
902                  * if needed after it completes its current flush.
903                  */
904                 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
905                         ip->flags |= HAMMER_INODE_REFLUSH;
906                 if (flags & HAMMER_FLUSH_SIGNAL) {
907                         ip->flags |= HAMMER_INODE_RESIGNAL;
908                         hammer_flusher_async(ip->hmp);
909                 }
910                 break;
911         }
912 }
913
914 /*
915  * We are asked to recurse upwards and convert the record from SETUP
916  * to FLUSH if possible.  record->ip is a parent of the caller's inode,
917  * and record->target_ip is the caller's inode.
918  *
919  * Return 1 if the record gives us connectivity
920  *
921  * Return 0 if the record is not relevant 
922  *
923  * Return -1 if we can't resolve the dependancy and there is no connectivity.
924  */
925 static int
926 hammer_setup_parent_inodes(hammer_record_t record)
927 {
928         hammer_mount_t hmp = record->ip->hmp;
929         hammer_record_t depend;
930         hammer_inode_t ip;
931         int r, good;
932
933         KKASSERT(record->flush_state != HAMMER_FST_IDLE);
934         ip = record->ip;
935
936         /*
937          * If the record is already flushing, is it in our flush group?
938          *
939          * If it is in our flush group but it is a general record or a 
940          * delete-on-disk, it does not improve our connectivity (return 0),
941          * and if the target inode is not trying to destroy itself we can't
942          * allow the operation yet anyway (the second return -1).
943          */
944         if (record->flush_state == HAMMER_FST_FLUSH) {
945                 if (record->flush_group != hmp->flusher.next) {
946                         ip->flags |= HAMMER_INODE_REFLUSH;
947                         return(-1);
948                 }
949                 if (record->type == HAMMER_MEM_RECORD_ADD)
950                         return(1);
951                 /* GENERAL or DEL */
952                 return(0);
953         }
954
955         /*
956          * It must be a setup record.  Try to resolve the setup dependancies
957          * by recursing upwards so we can place ip on the flush list.
958          */
959         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
960
961         good = 0;
962         TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
963                 r = hammer_setup_parent_inodes(depend);
964                 if (r < 0 && good == 0)
965                         good = -1;
966                 if (r > 0)
967                         good = 1;
968         }
969
970         /*
971          * We can't flush ip because it has no connectivity (XXX also check
972          * nlinks for pre-existing connectivity!).  Flag it so any resolution
973          * recurses back down.
974          */
975         if (good < 0) {
976                 ip->flags |= HAMMER_INODE_REFLUSH;
977                 return(good);
978         }
979
980         /*
981          * We are go, place the parent inode in a flushing state so we can
982          * place its record in a flushing state.  Note that the parent
983          * may already be flushing.  The record must be in the same flush
984          * group as the parent.
985          */
986         if (ip->flush_state != HAMMER_FST_FLUSH)
987                 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
988         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
989         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
990
991 #if 0
992         if (record->type == HAMMER_MEM_RECORD_DEL &&
993             (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
994                 /*
995                  * Regardless of flushing state we cannot sync this path if the
996                  * record represents a delete-on-disk but the target inode
997                  * is not ready to sync its own deletion.
998                  *
999                  * XXX need to count effective nlinks to determine whether
1000                  * the flush is ok, otherwise removing a hardlink will
1001                  * just leave the DEL record to rot.
1002                  */
1003                 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
1004                 return(-1);
1005         } else
1006 #endif
1007         if (ip->flush_group == ip->hmp->flusher.next) {
1008                 /*
1009                  * This is the record we wanted to synchronize.
1010                  */
1011                 record->flush_state = HAMMER_FST_FLUSH;
1012                 record->flush_group = ip->flush_group;
1013                 hammer_ref(&record->lock);
1014                 if (record->type == HAMMER_MEM_RECORD_ADD)
1015                         return(1);
1016
1017                 /*
1018                  * A general or delete-on-disk record does not contribute
1019                  * to our visibility.  We can still flush it, however.
1020                  */
1021                 return(0);
1022         } else {
1023                 /*
1024                  * We couldn't resolve the dependancies, request that the
1025                  * inode be flushed when the dependancies can be resolved.
1026                  */
1027                 ip->flags |= HAMMER_INODE_REFLUSH;
1028                 return(-1);
1029         }
1030 }
1031
1032 /*
1033  * This is the core routine placing an inode into the FST_FLUSH state.
1034  */
1035 static void
1036 hammer_flush_inode_core(hammer_inode_t ip, int flags)
1037 {
1038         int go_count;
1039
1040         /*
1041          * Set flush state and prevent the flusher from cycling into
1042          * the next flush group.  Do not place the ip on the list yet.
1043          * Inodes not in the idle state get an extra reference.
1044          */
1045         KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1046         if (ip->flush_state == HAMMER_FST_IDLE)
1047                 hammer_ref(&ip->lock);
1048         ip->flush_state = HAMMER_FST_FLUSH;
1049         ip->flush_group = ip->hmp->flusher.next;
1050         ++ip->hmp->flusher.group_lock;
1051         ++ip->hmp->count_iqueued;
1052         ++hammer_count_iqueued;
1053
1054         /*
1055          * We need to be able to vfsync/truncate from the backend.
1056          */
1057         KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1058         if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1059                 ip->flags |= HAMMER_INODE_VHELD;
1060                 vref(ip->vp);
1061         }
1062
1063         /*
1064          * Figure out how many in-memory records we can actually flush
1065          * (not including inode meta-data, buffers, etc).
1066          */
1067         if (flags & HAMMER_FLUSH_RECURSION) {
1068                 go_count = 1;
1069         } else {
1070                 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1071                                    hammer_setup_child_callback, NULL);
1072         }
1073
1074         /*
1075          * This is a more involved test that includes go_count.  If we
1076          * can't flush, flag the inode and return.  If go_count is 0 we
1077          * were are unable to flush any records in our rec_tree and
1078          * must ignore the XDIRTY flag.
1079          */
1080         if (go_count == 0) {
1081                 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1082                         ip->flags |= HAMMER_INODE_REFLUSH;
1083
1084                         --ip->hmp->count_iqueued;
1085                         --hammer_count_iqueued;
1086
1087                         ip->flush_state = HAMMER_FST_SETUP;
1088                         if (ip->flags & HAMMER_INODE_VHELD) {
1089                                 ip->flags &= ~HAMMER_INODE_VHELD;
1090                                 vrele(ip->vp);
1091                         }
1092                         if (flags & HAMMER_FLUSH_SIGNAL) {
1093                                 ip->flags |= HAMMER_INODE_RESIGNAL;
1094                                 hammer_flusher_async(ip->hmp);
1095                         }
1096                         if (--ip->hmp->flusher.group_lock == 0)
1097                                 wakeup(&ip->hmp->flusher.group_lock);
1098                         return;
1099                 }
1100         }
1101
1102         /*
1103          * Snapshot the state of the inode for the backend flusher.
1104          *
1105          * The truncation must be retained in the frontend until after
1106          * we've actually performed the record deletion.
1107          *
1108          * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1109          * and stays in ip->flags.  Once set, it stays set until the
1110          * inode is destroyed.
1111          */
1112         ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1113         ip->sync_trunc_off = ip->trunc_off;
1114         ip->sync_ino_leaf = ip->ino_leaf;
1115         ip->sync_ino_data = ip->ino_data;
1116         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1117         ip->flags &= ~HAMMER_INODE_MODMASK;
1118 #ifdef DEBUG_TRUNCATE
1119         if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1120                 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1121 #endif
1122
1123         /*
1124          * The flusher list inherits our inode and reference.
1125          */
1126         TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
1127         if (--ip->hmp->flusher.group_lock == 0)
1128                 wakeup(&ip->hmp->flusher.group_lock);
1129
1130         if (flags & HAMMER_FLUSH_SIGNAL) {
1131                 hammer_flusher_async(ip->hmp);
1132         }
1133 }
1134
1135 /*
1136  * Callback for scan of ip->rec_tree.  Try to include each record in our
1137  * flush.  ip->flush_group has been set but the inode has not yet been
1138  * moved into a flushing state.
1139  *
1140  * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1141  * both inodes.
1142  *
1143  * We return 1 for any record placed or found in FST_FLUSH, which prevents
1144  * the caller from shortcutting the flush.
1145  */
1146 static int
1147 hammer_setup_child_callback(hammer_record_t rec, void *data)
1148 {
1149         hammer_inode_t target_ip;
1150         hammer_inode_t ip;
1151         int r;
1152
1153         /*
1154          * Deleted records are ignored.  Note that the flush detects deleted
1155          * front-end records at multiple points to deal with races.  This is
1156          * just the first line of defense.  The only time DELETED_FE cannot
1157          * be set is when HAMMER_RECF_INTERLOCK_BE is set. 
1158          *
1159          * Don't get confused between record deletion and, say, directory
1160          * entry deletion.  The deletion of a directory entry that is on
1161          * the media has nothing to do with the record deletion flags.
1162          */
1163         if (rec->flags & (HAMMER_RECF_DELETED_FE|HAMMER_RECF_DELETED_BE))
1164                 return(0);
1165
1166         /*
1167          * If the record is in an idle state it has no dependancies and
1168          * can be flushed.
1169          */
1170         ip = rec->ip;
1171         r = 0;
1172
1173         switch(rec->flush_state) {
1174         case HAMMER_FST_IDLE:
1175                 /*
1176                  * Record has no setup dependancy, we can flush it.
1177                  */
1178                 KKASSERT(rec->target_ip == NULL);
1179                 rec->flush_state = HAMMER_FST_FLUSH;
1180                 rec->flush_group = ip->flush_group;
1181                 hammer_ref(&rec->lock);
1182                 r = 1;
1183                 break;
1184         case HAMMER_FST_SETUP:
1185                 /*
1186                  * Record has a setup dependancy.  Try to include the
1187                  * target ip in the flush. 
1188                  *
1189                  * We have to be careful here, if we do not do the right
1190                  * thing we can lose track of dirty inodes and the system
1191                  * will lockup trying to allocate buffers.
1192                  */
1193                 target_ip = rec->target_ip;
1194                 KKASSERT(target_ip != NULL);
1195                 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1196                 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1197                         /*
1198                          * If the target IP is already flushing in our group
1199                          * we are golden, otherwise make sure the target
1200                          * reflushes.
1201                          */
1202                         if (target_ip->flush_group == ip->flush_group) {
1203                                 rec->flush_state = HAMMER_FST_FLUSH;
1204                                 rec->flush_group = ip->flush_group;
1205                                 hammer_ref(&rec->lock);
1206                                 r = 1;
1207                         } else {
1208                                 target_ip->flags |= HAMMER_INODE_REFLUSH;
1209                         }
1210                 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1211                         /*
1212                          * If the target IP is not flushing we can force
1213                          * it to flush, even if it is unable to write out
1214                          * any of its own records we have at least one in
1215                          * hand that we CAN deal with.
1216                          */
1217                         rec->flush_state = HAMMER_FST_FLUSH;
1218                         rec->flush_group = ip->flush_group;
1219                         hammer_ref(&rec->lock);
1220                         hammer_flush_inode_core(target_ip,
1221                                                 HAMMER_FLUSH_RECURSION);
1222                         r = 1;
1223                 } else {
1224                         /*
1225                          * General or delete-on-disk record.
1226                          *
1227                          * XXX this needs help.  If a delete-on-disk we could
1228                          * disconnect the target.  If the target has its own
1229                          * dependancies they really need to be flushed.
1230                          *
1231                          * XXX
1232                          */
1233                         rec->flush_state = HAMMER_FST_FLUSH;
1234                         rec->flush_group = ip->flush_group;
1235                         hammer_ref(&rec->lock);
1236                         hammer_flush_inode_core(target_ip,
1237                                                 HAMMER_FLUSH_RECURSION);
1238                         r = 1;
1239                 }
1240                 break;
1241         case HAMMER_FST_FLUSH:
1242                 /* 
1243                  * Record already associated with a flush group.  It had
1244                  * better be ours.
1245                  */
1246                 KKASSERT(rec->flush_group == ip->flush_group);
1247                 r = 1;
1248                 break;
1249         }
1250         return(r);
1251 }
1252
1253 /*
1254  * Wait for a previously queued flush to complete
1255  */
1256 void
1257 hammer_wait_inode(hammer_inode_t ip)
1258 {
1259         while (ip->flush_state != HAMMER_FST_IDLE) {
1260                 if (ip->flush_state == HAMMER_FST_SETUP) {
1261                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1262                 } else {
1263                         ip->flags |= HAMMER_INODE_FLUSHW;
1264                         tsleep(&ip->flags, 0, "hmrwin", 0);
1265                 }
1266         }
1267 }
1268
1269 /*
1270  * Wait for records to drain
1271  */
1272 void
1273 hammer_wait_inode_recs(hammer_inode_t ip)
1274 {
1275         while (ip->rsv_recs > hammer_limit_irecs) {
1276                 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1277                 if (ip->rsv_recs > hammer_limit_irecs) {
1278                         ip->flags |= HAMMER_INODE_PARTIALW;
1279                         tsleep(&ip->flags, 0, "hmrwpp", 0);
1280                 }
1281         }
1282 }
1283
1284 /*
1285  * Called by the backend code when a flush has been completed.
1286  * The inode has already been removed from the flush list.
1287  *
1288  * A pipelined flush can occur, in which case we must re-enter the
1289  * inode on the list and re-copy its fields.
1290  */
1291 void
1292 hammer_flush_inode_done(hammer_inode_t ip)
1293 {
1294         hammer_mount_t hmp;
1295         int dorel;
1296
1297         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1298
1299         hmp = ip->hmp;
1300
1301         /*
1302          * Merge left-over flags back into the frontend and fix the state.
1303          */
1304         ip->flags |= ip->sync_flags;
1305
1306         /*
1307          * The backend may have adjusted nlinks, so if the adjusted nlinks
1308          * does not match the fronttend set the frontend's RDIRTY flag again.
1309          */
1310         if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1311                 ip->flags |= HAMMER_INODE_DDIRTY;
1312
1313         /*
1314          * Fix up the dirty buffer status.  IO completions will also
1315          * try to clean up rsv_databufs.
1316          */
1317         if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1318                 ip->flags |= HAMMER_INODE_BUFS;
1319         } else {
1320                 hmp->rsv_databufs -= ip->rsv_databufs;
1321                 ip->rsv_databufs = 0;
1322         }
1323
1324         /*
1325          * Re-set the XDIRTY flag if some of the inode's in-memory records
1326          * could not be flushed.
1327          */
1328         KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1329                   (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1330                  (!RB_EMPTY(&ip->rec_tree) &&
1331                   (ip->flags & HAMMER_INODE_XDIRTY) != 0));
1332
1333         /*
1334          * Do not lose track of inodes which no longer have vnode
1335          * assocations, otherwise they may never get flushed again.
1336          */
1337         if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
1338                 ip->flags |= HAMMER_INODE_REFLUSH;
1339
1340         /*
1341          * Adjust flush_state.  The target state (idle or setup) shouldn't
1342          * be terribly important since we will reflush if we really need
1343          * to do anything. XXX
1344          */
1345         if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1346                 ip->flush_state = HAMMER_FST_IDLE;
1347                 dorel = 1;
1348         } else {
1349                 ip->flush_state = HAMMER_FST_SETUP;
1350                 dorel = 0;
1351         }
1352
1353         --hmp->count_iqueued;
1354         --hammer_count_iqueued;
1355
1356         /*
1357          * Clean up the vnode ref
1358          */
1359         if (ip->flags & HAMMER_INODE_VHELD) {
1360                 ip->flags &= ~HAMMER_INODE_VHELD;
1361                 vrele(ip->vp);
1362         }
1363
1364         /*
1365          * If the frontend made more changes and requested another flush,
1366          * then try to get it running.
1367          */
1368         if (ip->flags & HAMMER_INODE_REFLUSH) {
1369                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1370                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1371                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1372                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1373                 } else {
1374                         hammer_flush_inode(ip, 0);
1375                 }
1376         }
1377
1378         /*
1379          * If the inode is now clean drop the space reservation.
1380          */
1381         if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1382             (ip->flags & HAMMER_INODE_RSV_INODES)) {
1383                 ip->flags &= ~HAMMER_INODE_RSV_INODES;
1384                 --hmp->rsv_inodes;
1385         }
1386
1387         /*
1388          * Finally, if the frontend is waiting for a flush to complete,
1389          * wake it up.
1390          */
1391         if (ip->flush_state != HAMMER_FST_FLUSH) {
1392                 if (ip->flags & HAMMER_INODE_FLUSHW) {
1393                         ip->flags &= ~HAMMER_INODE_FLUSHW;
1394                         wakeup(&ip->flags);
1395                 }
1396         }
1397         if (dorel)
1398                 hammer_rel_inode(ip, 0);
1399 }
1400
1401 /*
1402  * Called from hammer_sync_inode() to synchronize in-memory records
1403  * to the media.
1404  */
1405 static int
1406 hammer_sync_record_callback(hammer_record_t record, void *data)
1407 {
1408         hammer_cursor_t cursor = data;
1409         hammer_transaction_t trans = cursor->trans;
1410         int error;
1411
1412         /*
1413          * Skip records that do not belong to the current flush.
1414          */
1415         ++hammer_stats_record_iterations;
1416         if (record->flush_state != HAMMER_FST_FLUSH)
1417                 return(0);
1418
1419 #if 1
1420         if (record->flush_group != record->ip->flush_group) {
1421                 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1422                 Debugger("blah2");
1423                 return(0);
1424         }
1425 #endif
1426         KKASSERT(record->flush_group == record->ip->flush_group);
1427
1428         /*
1429          * Interlock the record using the BE flag.  Once BE is set the
1430          * frontend cannot change the state of FE.
1431          *
1432          * NOTE: If FE is set prior to us setting BE we still sync the
1433          * record out, but the flush completion code converts it to 
1434          * a delete-on-disk record instead of destroying it.
1435          */
1436         KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
1437         record->flags |= HAMMER_RECF_INTERLOCK_BE;
1438
1439         /*
1440          * The backend may have already disposed of the record.
1441          */
1442         if (record->flags & HAMMER_RECF_DELETED_BE) {
1443                 error = 0;
1444                 goto done;
1445         }
1446
1447         /*
1448          * If the whole inode is being deleting all on-disk records will
1449          * be deleted very soon, we can't sync any new records to disk
1450          * because they will be deleted in the same transaction they were
1451          * created in (delete_tid == create_tid), which will assert.
1452          *
1453          * XXX There may be a case with RECORD_ADD with DELETED_FE set
1454          * that we currently panic on.
1455          */
1456         if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
1457                 switch(record->type) {
1458                 case HAMMER_MEM_RECORD_DATA:
1459                         /*
1460                          * We don't have to do anything, if the record was
1461                          * committed the space will have been accounted for
1462                          * in the blockmap.
1463                          */
1464                         /* fall through */
1465                 case HAMMER_MEM_RECORD_GENERAL:
1466                         record->flags |= HAMMER_RECF_DELETED_FE;
1467                         record->flags |= HAMMER_RECF_DELETED_BE;
1468                         error = 0;
1469                         goto done;
1470                 case HAMMER_MEM_RECORD_ADD:
1471                         panic("hammer_sync_record_callback: illegal add "
1472                               "during inode deletion record %p", record);
1473                         break; /* NOT REACHED */
1474                 case HAMMER_MEM_RECORD_INODE:
1475                         panic("hammer_sync_record_callback: attempt to "
1476                               "sync inode record %p?", record);
1477                         break; /* NOT REACHED */
1478                 case HAMMER_MEM_RECORD_DEL:
1479                         /* 
1480                          * Follow through and issue the on-disk deletion
1481                          */
1482                         break;
1483                 }
1484         }
1485
1486         /*
1487          * If DELETED_FE is set special handling is needed for directory
1488          * entries.  Dependant pieces related to the directory entry may
1489          * have already been synced to disk.  If this occurs we have to
1490          * sync the directory entry and then change the in-memory record
1491          * from an ADD to a DELETE to cover the fact that it's been
1492          * deleted by the frontend.
1493          *
1494          * A directory delete covering record (MEM_RECORD_DEL) can never
1495          * be deleted by the frontend.
1496          *
1497          * Any other record type (aka DATA) can be deleted by the frontend.
1498          * XXX At the moment the flusher must skip it because there may
1499          * be another data record in the flush group for the same block,
1500          * meaning that some frontend data changes can leak into the backend's
1501          * synchronization point.
1502          */
1503         if (record->flags & HAMMER_RECF_DELETED_FE) {
1504                 if (record->type == HAMMER_MEM_RECORD_ADD) {
1505                         record->flags |= HAMMER_RECF_CONVERT_DELETE;
1506                 } else {
1507                         KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
1508                         record->flags |= HAMMER_RECF_DELETED_BE;
1509                         error = 0;
1510                         goto done;
1511                 }
1512         }
1513
1514         /*
1515          * Assign the create_tid for new records.  Deletions already
1516          * have the record's entire key properly set up.
1517          */
1518         if (record->type != HAMMER_MEM_RECORD_DEL)
1519                 record->leaf.base.create_tid = trans->tid;
1520         for (;;) {
1521                 error = hammer_ip_sync_record_cursor(cursor, record);
1522                 if (error != EDEADLK)
1523                         break;
1524                 hammer_done_cursor(cursor);
1525                 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1526                                            record->ip);
1527                 if (error)
1528                         break;
1529         }
1530         record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
1531
1532         if (error) {
1533                 error = -error;
1534                 if (error != -ENOSPC) {
1535                         kprintf("hammer_sync_record_callback: sync failed rec "
1536                                 "%p, error %d\n", record, error);
1537                         Debugger("sync failed rec");
1538                 }
1539         }
1540 done:
1541         hammer_flush_record_done(record, error);
1542         return(error);
1543 }
1544
1545 /*
1546  * XXX error handling
1547  */
1548 int
1549 hammer_sync_inode(hammer_inode_t ip)
1550 {
1551         struct hammer_transaction trans;
1552         struct hammer_cursor cursor;
1553         hammer_record_t depend;
1554         hammer_record_t next;
1555         int error, tmp_error;
1556         u_int64_t nlinks;
1557
1558         if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
1559                 return(0);
1560
1561         hammer_start_transaction_fls(&trans, ip->hmp);
1562         error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1563         if (error)
1564                 goto done;
1565
1566         /*
1567          * Any directory records referencing this inode which are not in
1568          * our current flush group must adjust our nlink count for the
1569          * purposes of synchronization to disk.
1570          *
1571          * Records which are in our flush group can be unlinked from our
1572          * inode now, potentially allowing the inode to be physically
1573          * deleted.
1574          */
1575         nlinks = ip->ino_data.nlinks;
1576         next = TAILQ_FIRST(&ip->target_list);
1577         while ((depend = next) != NULL) {
1578                 next = TAILQ_NEXT(depend, target_entry);
1579                 if (depend->flush_state == HAMMER_FST_FLUSH &&
1580                     depend->flush_group == ip->hmp->flusher.act) {
1581                         /*
1582                          * If this is an ADD that was deleted by the frontend
1583                          * the frontend nlinks count will have already been
1584                          * decremented, but the backend is going to sync its
1585                          * directory entry and must account for it.  The
1586                          * record will be converted to a delete-on-disk when
1587                          * it gets synced.
1588                          *
1589                          * If the ADD was not deleted by the frontend we
1590                          * can remove the dependancy from our target_list.
1591                          */
1592                         if (depend->flags & HAMMER_RECF_DELETED_FE) {
1593                                 ++nlinks;
1594                         } else {
1595                                 TAILQ_REMOVE(&ip->target_list, depend,
1596                                              target_entry);
1597                                 depend->target_ip = NULL;
1598                         }
1599                 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1600                         /*
1601                          * Not part of our flush group
1602                          */
1603                         KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1604                         switch(depend->type) {
1605                         case HAMMER_MEM_RECORD_ADD:
1606                                 --nlinks;
1607                                 break;
1608                         case HAMMER_MEM_RECORD_DEL:
1609                                 ++nlinks;
1610                                 break;
1611                         default:
1612                                 break;
1613                         }
1614                 }
1615         }
1616
1617         /*
1618          * Set dirty if we had to modify the link count.
1619          */
1620         if (ip->sync_ino_data.nlinks != nlinks) {
1621                 KKASSERT((int64_t)nlinks >= 0);
1622                 ip->sync_ino_data.nlinks = nlinks;
1623                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1624         }
1625
1626         /*
1627          * If there is a trunction queued destroy any data past the (aligned)
1628          * truncation point.  Userland will have dealt with the buffer
1629          * containing the truncation point for us.
1630          *
1631          * We don't flush pending frontend data buffers until after we've
1632          * dealth with the truncation.
1633          *
1634          * Don't bother if the inode is or has been deleted.
1635          */
1636         if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1637                 /*
1638                  * Interlock trunc_off.  The VOP front-end may continue to
1639                  * make adjustments to it while we are blocked.
1640                  */
1641                 off_t trunc_off;
1642                 off_t aligned_trunc_off;
1643
1644                 trunc_off = ip->sync_trunc_off;
1645                 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1646                                     ~HAMMER_BUFMASK64;
1647
1648                 /*
1649                  * Delete any whole blocks on-media.  The front-end has
1650                  * already cleaned out any partial block and made it
1651                  * pending.  The front-end may have updated trunc_off
1652                  * while we were blocked so we only use sync_trunc_off.
1653                  */
1654                 error = hammer_ip_delete_range(&cursor, ip,
1655                                                 aligned_trunc_off,
1656                                                 0x7FFFFFFFFFFFFFFFLL, 1);
1657                 if (error)
1658                         Debugger("hammer_ip_delete_range errored");
1659
1660                 /*
1661                  * Clear the truncation flag on the backend after we have
1662                  * complete the deletions.  Backend data is now good again
1663                  * (including new records we are about to sync, below).
1664                  */
1665                 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1666                 ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1667         } else {
1668                 error = 0;
1669         }
1670
1671         /*
1672          * Now sync related records.  These will typically be directory
1673          * entries or delete-on-disk records.
1674          *
1675          * Not all records will be flushed, but clear XDIRTY anyway.  We
1676          * will set it again in the frontend hammer_flush_inode_done() 
1677          * if records remain.
1678          */
1679         if (error == 0) {
1680                 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1681                                     hammer_sync_record_callback, &cursor);
1682                 if (tmp_error < 0)
1683                         tmp_error = -error;
1684                 if (tmp_error)
1685                         error = tmp_error;
1686         }
1687
1688         /*
1689          * If we are deleting the inode the frontend had better not have
1690          * any active references on elements making up the inode.
1691          */
1692         if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
1693                 RB_EMPTY(&ip->rec_tree)  &&
1694             (ip->sync_flags & HAMMER_INODE_DELETING) &&
1695             (ip->flags & HAMMER_INODE_DELETED) == 0) {
1696                 int count1 = 0;
1697
1698                 ip->flags |= HAMMER_INODE_DELETED;
1699                 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
1700                 if (error == 0) {
1701                         ip->sync_flags &= ~HAMMER_INODE_DELETING;
1702                         ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1703                         KKASSERT(RB_EMPTY(&ip->rec_tree));
1704
1705                         /*
1706                          * Set delete_tid in both the frontend and backend
1707                          * copy of the inode record.  The DELETED flag handles
1708                          * this, do not set RDIRTY.
1709                          */
1710                         ip->ino_leaf.base.delete_tid = trans.tid;
1711                         ip->sync_ino_leaf.base.delete_tid = trans.tid;
1712
1713                         /*
1714                          * Adjust the inode count in the volume header
1715                          */
1716                         if (ip->flags & HAMMER_INODE_ONDISK) {
1717                                 hammer_modify_volume_field(&trans,
1718                                                            trans.rootvol,
1719                                                            vol0_stat_inodes);
1720                                 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1721                                 hammer_modify_volume_done(trans.rootvol);
1722                         }
1723                 } else {
1724                         ip->flags &= ~HAMMER_INODE_DELETED;
1725                         Debugger("hammer_ip_delete_range_all errored");
1726                 }
1727         }
1728
1729         ip->sync_flags &= ~HAMMER_INODE_BUFS;
1730
1731         if (error)
1732                 Debugger("RB_SCAN errored");
1733
1734         /*
1735          * Now update the inode's on-disk inode-data and/or on-disk record.
1736          * DELETED and ONDISK are managed only in ip->flags.
1737          */
1738         switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1739         case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1740                 /*
1741                  * If deleted and on-disk, don't set any additional flags.
1742                  * the delete flag takes care of things.
1743                  *
1744                  * Clear flags which may have been set by the frontend.
1745                  */
1746                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1747                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1748                                     HAMMER_INODE_DELETING);
1749                 break;
1750         case HAMMER_INODE_DELETED:
1751                 /*
1752                  * Take care of the case where a deleted inode was never
1753                  * flushed to the disk in the first place.
1754                  *
1755                  * Clear flags which may have been set by the frontend.
1756                  */
1757                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1758                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1759                                     HAMMER_INODE_DELETING);
1760                 while (RB_ROOT(&ip->rec_tree)) {
1761                         hammer_record_t record = RB_ROOT(&ip->rec_tree);
1762                         hammer_ref(&record->lock);
1763                         KKASSERT(record->lock.refs == 1);
1764                         record->flags |= HAMMER_RECF_DELETED_FE;
1765                         record->flags |= HAMMER_RECF_DELETED_BE;
1766                         hammer_rel_mem_record(record);
1767                 }
1768                 break;
1769         case HAMMER_INODE_ONDISK:
1770                 /*
1771                  * If already on-disk, do not set any additional flags.
1772                  */
1773                 break;
1774         default:
1775                 /*
1776                  * If not on-disk and not deleted, set both dirty flags
1777                  * to force an initial record to be written.  Also set
1778                  * the create_tid for the inode.
1779                  *
1780                  * Set create_tid in both the frontend and backend
1781                  * copy of the inode record.
1782                  */
1783                 ip->ino_leaf.base.create_tid = trans.tid;
1784                 ip->sync_ino_leaf.base.create_tid = trans.tid;
1785                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1786                 break;
1787         }
1788
1789         /*
1790          * If RDIRTY or DDIRTY is set, write out a new record.  If the inode
1791          * is already on-disk the old record is marked as deleted.
1792          *
1793          * If DELETED is set hammer_update_inode() will delete the existing
1794          * record without writing out a new one.
1795          *
1796          * If *ONLY* the ITIMES flag is set we can update the record in-place.
1797          */
1798         if (ip->flags & HAMMER_INODE_DELETED) {
1799                 error = hammer_update_inode(&cursor, ip);
1800         } else 
1801         if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1802             HAMMER_INODE_ITIMES) {
1803                 error = hammer_update_itimes(&cursor, ip);
1804         } else
1805         if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
1806                 error = hammer_update_inode(&cursor, ip);
1807         }
1808         if (error)
1809                 Debugger("hammer_update_itimes/inode errored");
1810 done:
1811         /*
1812          * Save the TID we used to sync the inode with to make sure we
1813          * do not improperly reuse it.
1814          */
1815         hammer_done_cursor(&cursor);
1816         hammer_done_transaction(&trans);
1817         return(error);
1818 }
1819
1820 /*
1821  * This routine is called when the OS is no longer actively referencing
1822  * the inode (but might still be keeping it cached), or when releasing
1823  * the last reference to an inode.
1824  *
1825  * At this point if the inode's nlinks count is zero we want to destroy
1826  * it, which may mean destroying it on-media too.
1827  */
1828 void
1829 hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1830 {
1831         struct vnode *vp;
1832
1833         /*
1834          * Set the DELETING flag when the link count drops to 0 and the
1835          * OS no longer has any opens on the inode.
1836          *
1837          * The backend will clear DELETING (a mod flag) and set DELETED
1838          * (a state flag) when it is actually able to perform the
1839          * operation.
1840          */
1841         if (ip->ino_data.nlinks == 0 &&
1842             (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
1843                 ip->flags |= HAMMER_INODE_DELETING;
1844                 ip->flags |= HAMMER_INODE_TRUNCATED;
1845                 ip->trunc_off = 0;
1846                 vp = NULL;
1847                 if (getvp) {
1848                         if (hammer_get_vnode(ip, &vp) != 0)
1849                                 return;
1850                 }
1851
1852                 /*
1853                  * Final cleanup
1854                  */
1855                 if (ip->vp) {
1856                         vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1857                         vnode_pager_setsize(ip->vp, 0);
1858                 }
1859                 if (getvp) {
1860                         vput(vp);
1861                 }
1862         }
1863 }
1864
1865 /*
1866  * Re-test an inode when a dependancy had gone away to see if we
1867  * can chain flush it.
1868  */
1869 void
1870 hammer_test_inode(hammer_inode_t ip)
1871 {
1872         if (ip->flags & HAMMER_INODE_REFLUSH) {
1873                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1874                 hammer_ref(&ip->lock);
1875                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1876                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1877                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1878                 } else {
1879                         hammer_flush_inode(ip, 0);
1880                 }
1881                 hammer_rel_inode(ip, 0);
1882         }
1883 }
1884
1885 /*
1886  * Clear the RECLAIM flag on an inode.  This occurs when the inode is
1887  * reassociated with a vp or just before it gets freed.
1888  *
1889  * Wakeup one thread blocked waiting on reclaims to complete.  Note that
1890  * the inode the thread is waiting on behalf of is a different inode then
1891  * the inode we are called with.  This is to create a pipeline.
1892  */
1893 static void
1894 hammer_inode_wakereclaims(hammer_inode_t ip)
1895 {
1896         struct hammer_reclaim *reclaim;
1897         hammer_mount_t hmp = ip->hmp;
1898
1899         if ((ip->flags & HAMMER_INODE_RECLAIM) == 0)
1900                 return;
1901
1902         --hammer_count_reclaiming;
1903         --hmp->inode_reclaims;
1904         ip->flags &= ~HAMMER_INODE_RECLAIM;
1905
1906         if ((reclaim = TAILQ_FIRST(&hmp->reclaim_list)) != NULL) {
1907                 TAILQ_REMOVE(&hmp->reclaim_list, reclaim, entry);
1908                 reclaim->okydoky = 1;
1909                 wakeup(reclaim);
1910         }
1911 }
1912