8a6d6fea16cd12d1a68162e4b7332eb28e306bed
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.68 2008/06/10 05:06:20 dillon Exp $
35  */
36
37 #include "hammer.h"
38 #include <vm/vm_extern.h>
39 #include <sys/buf.h>
40 #include <sys/buf2.h>
41
42 static int hammer_unload_inode(struct hammer_inode *ip);
43 static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
44 static int hammer_setup_child_callback(hammer_record_t rec, void *data);
45 static int hammer_setup_parent_inodes(hammer_record_t record);
46
47 #ifdef DEBUG_TRUNCATE
48 extern struct hammer_inode *HammerTruncIp;
49 #endif
50
51 /*
52  * The kernel is not actively referencing this vnode but is still holding
53  * it cached.
54  *
55  * This is called from the frontend.
56  */
57 int
58 hammer_vop_inactive(struct vop_inactive_args *ap)
59 {
60         struct hammer_inode *ip = VTOI(ap->a_vp);
61
62         /*
63          * Degenerate case
64          */
65         if (ip == NULL) {
66                 vrecycle(ap->a_vp);
67                 return(0);
68         }
69
70         /*
71          * If the inode no longer has visibility in the filesystem and is
72          * fairly clean, try to recycle it immediately.  This can deadlock
73          * in vfsync() if we aren't careful.
74          * 
75          * Do not queue the inode to the flusher if we still have visibility,
76          * otherwise namespace calls such as chmod will unnecessarily generate
77          * multiple inode updates.
78          */
79         hammer_inode_unloadable_check(ip, 0);
80         if (ip->ino_data.nlinks == 0) {
81                 if (ip->flags & HAMMER_INODE_MODMASK)
82                         hammer_flush_inode(ip, 0);
83                 else
84                         vrecycle(ap->a_vp);
85         }
86         return(0);
87 }
88
89 /*
90  * Release the vnode association.  This is typically (but not always)
91  * the last reference on the inode.
92  *
93  * Once the association is lost we are on our own with regards to
94  * flushing the inode.
95  */
96 int
97 hammer_vop_reclaim(struct vop_reclaim_args *ap)
98 {
99         struct hammer_inode *ip;
100         struct vnode *vp;
101
102         vp = ap->a_vp;
103
104         if ((ip = vp->v_data) != NULL) {
105                 vp->v_data = NULL;
106                 ip->vp = NULL;
107                 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) {
108                         ++hammer_count_reclaiming;
109                         ++ip->hmp->inode_reclaims;
110                         ip->flags |= HAMMER_INODE_RECLAIM;
111                 }
112                 hammer_rel_inode(ip, 1);
113         }
114         return(0);
115 }
116
117 /*
118  * Return a locked vnode for the specified inode.  The inode must be
119  * referenced but NOT LOCKED on entry and will remain referenced on
120  * return.
121  *
122  * Called from the frontend.
123  */
124 int
125 hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
126 {
127         hammer_mount_t hmp;
128         struct vnode *vp;
129         int error = 0;
130
131         hmp = ip->hmp;
132
133         for (;;) {
134                 if ((vp = ip->vp) == NULL) {
135                         error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
136                         if (error)
137                                 break;
138                         hammer_lock_ex(&ip->lock);
139                         if (ip->vp != NULL) {
140                                 hammer_unlock(&ip->lock);
141                                 vp->v_type = VBAD;
142                                 vx_put(vp);
143                                 continue;
144                         }
145                         hammer_ref(&ip->lock);
146                         vp = *vpp;
147                         ip->vp = vp;
148                         vp->v_type =
149                                 hammer_get_vnode_type(ip->ino_data.obj_type);
150
151                         if (ip->flags & HAMMER_INODE_RECLAIM) {
152                                 --hammer_count_reclaiming;
153                                 --hmp->inode_reclaims;
154                                 ip->flags &= ~HAMMER_INODE_RECLAIM;
155                                 if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
156                                         hammer_inode_wakereclaims(hmp);
157                         }
158
159                         switch(ip->ino_data.obj_type) {
160                         case HAMMER_OBJTYPE_CDEV:
161                         case HAMMER_OBJTYPE_BDEV:
162                                 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
163                                 addaliasu(vp, ip->ino_data.rmajor,
164                                           ip->ino_data.rminor);
165                                 break;
166                         case HAMMER_OBJTYPE_FIFO:
167                                 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
168                                 break;
169                         default:
170                                 break;
171                         }
172
173                         /*
174                          * Only mark as the root vnode if the ip is not
175                          * historical, otherwise the VFS cache will get
176                          * confused.  The other half of the special handling
177                          * is in hammer_vop_nlookupdotdot().
178                          */
179                         if (ip->obj_id == HAMMER_OBJID_ROOT &&
180                             ip->obj_asof == hmp->asof) {
181                                 vp->v_flag |= VROOT;
182                         }
183
184                         vp->v_data = (void *)ip;
185                         /* vnode locked by getnewvnode() */
186                         /* make related vnode dirty if inode dirty? */
187                         hammer_unlock(&ip->lock);
188                         if (vp->v_type == VREG)
189                                 vinitvmio(vp, ip->ino_data.size);
190                         break;
191                 }
192
193                 /*
194                  * loop if the vget fails (aka races), or if the vp
195                  * no longer matches ip->vp.
196                  */
197                 if (vget(vp, LK_EXCLUSIVE) == 0) {
198                         if (vp == ip->vp)
199                                 break;
200                         vput(vp);
201                 }
202         }
203         *vpp = vp;
204         return(error);
205 }
206
207 /*
208  * Acquire a HAMMER inode.  The returned inode is not locked.  These functions
209  * do not attach or detach the related vnode (use hammer_get_vnode() for
210  * that).
211  *
212  * The flags argument is only applied for newly created inodes, and only
213  * certain flags are inherited.
214  *
215  * Called from the frontend.
216  */
217 struct hammer_inode *
218 hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
219                  u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
220 {
221         hammer_mount_t hmp = trans->hmp;
222         struct hammer_inode_info iinfo;
223         struct hammer_cursor cursor;
224         struct hammer_inode *ip;
225
226         /*
227          * Determine if we already have an inode cached.  If we do then
228          * we are golden.
229          */
230         iinfo.obj_id = obj_id;
231         iinfo.obj_asof = asof;
232 loop:
233         ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
234         if (ip) {
235                 hammer_ref(&ip->lock);
236                 *errorp = 0;
237                 return(ip);
238         }
239
240         /*
241          * Impose a slow-down if HAMMER is heavily backlogged on cleaning
242          * out reclaimed inodes.
243          */
244         if (hmp->inode_reclaims > HAMMER_RECLAIM_MIN &&
245             curthread != hmp->flusher_td) {
246                 hammer_inode_waitreclaims(hmp);
247         }
248
249         /*
250          * Allocate a new inode structure and deal with races later.
251          */
252         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
253         ++hammer_count_inodes;
254         ++hmp->count_inodes;
255         ip->obj_id = obj_id;
256         ip->obj_asof = iinfo.obj_asof;
257         ip->hmp = hmp;
258         ip->flags = flags & HAMMER_INODE_RO;
259         if (hmp->ronly)
260                 ip->flags |= HAMMER_INODE_RO;
261         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
262         RB_INIT(&ip->rec_tree);
263         TAILQ_INIT(&ip->target_list);
264
265         /*
266          * Locate the on-disk inode.
267          */
268 retry:
269         hammer_init_cursor(trans, &cursor, cache, NULL);
270         cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
271         cursor.key_beg.obj_id = ip->obj_id;
272         cursor.key_beg.key = 0;
273         cursor.key_beg.create_tid = 0;
274         cursor.key_beg.delete_tid = 0;
275         cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
276         cursor.key_beg.obj_type = 0;
277         cursor.asof = iinfo.obj_asof;
278         cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
279                        HAMMER_CURSOR_ASOF;
280
281         *errorp = hammer_btree_lookup(&cursor);
282         if (*errorp == EDEADLK) {
283                 hammer_done_cursor(&cursor);
284                 goto retry;
285         }
286
287         /*
288          * On success the B-Tree lookup will hold the appropriate
289          * buffer cache buffers and provide a pointer to the requested
290          * information.  Copy the information to the in-memory inode
291          * and cache the B-Tree node to improve future operations.
292          */
293         if (*errorp == 0) {
294                 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
295                 ip->ino_data = cursor.data->inode;
296                 hammer_cache_node(cursor.node, &ip->cache[0]);
297                 if (cache)
298                         hammer_cache_node(cursor.node, cache);
299         }
300
301         /*
302          * On success load the inode's record and data and insert the
303          * inode into the B-Tree.  It is possible to race another lookup
304          * insertion of the same inode so deal with that condition too.
305          *
306          * The cursor's locked node interlocks against others creating and
307          * destroying ip while we were blocked.
308          */
309         if (*errorp == 0) {
310                 hammer_ref(&ip->lock);
311                 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
312                         hammer_uncache_node(&ip->cache[0]);
313                         hammer_uncache_node(&ip->cache[1]);
314                         KKASSERT(ip->lock.refs == 1);
315                         --hammer_count_inodes;
316                         --hmp->count_inodes;
317                         kfree(ip, M_HAMMER);
318                         hammer_done_cursor(&cursor);
319                         goto loop;
320                 }
321                 ip->flags |= HAMMER_INODE_ONDISK;
322         } else {
323                 /*
324                  * Do not panic on read-only accesses which fail, particularly
325                  * historical accesses where the snapshot might not have
326                  * complete connectivity.
327                  */
328                 if ((flags & HAMMER_INODE_RO) == 0) {
329                         kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
330                                 ip, ip->obj_id, &cursor, *errorp);
331                         Debugger("x");
332                 }
333                 if (ip->flags & HAMMER_INODE_RSV_INODES) {
334                         ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
335                         --hmp->rsv_inodes;
336                 }
337                 hmp->rsv_databufs -= ip->rsv_databufs;
338                 ip->rsv_databufs = 0;                          /* sanity */
339
340                 --hammer_count_inodes;
341                 --hmp->count_inodes;
342                 kfree(ip, M_HAMMER);
343                 ip = NULL;
344         }
345         hammer_done_cursor(&cursor);
346         return (ip);
347 }
348
349 /*
350  * Create a new filesystem object, returning the inode in *ipp.  The
351  * returned inode will be referenced.
352  *
353  * The inode is created in-memory.
354  */
355 int
356 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
357                     struct ucred *cred, hammer_inode_t dip,
358                     struct hammer_inode **ipp)
359 {
360         hammer_mount_t hmp;
361         hammer_inode_t ip;
362         uid_t xuid;
363
364         hmp = trans->hmp;
365         ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
366         ++hammer_count_inodes;
367         ++hmp->count_inodes;
368         ip->obj_id = hammer_alloc_objid(trans, dip);
369         KKASSERT(ip->obj_id != 0);
370         ip->obj_asof = hmp->asof;
371         ip->hmp = hmp;
372         ip->flush_state = HAMMER_FST_IDLE;
373         ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
374
375         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
376         RB_INIT(&ip->rec_tree);
377         TAILQ_INIT(&ip->target_list);
378
379         ip->ino_leaf.atime = trans->time;
380         ip->ino_data.mtime = trans->time;
381         ip->ino_data.size = 0;
382         ip->ino_data.nlinks = 0;
383
384         /*
385          * A nohistory designator on the parent directory is inherited by
386          * the child.
387          */
388         ip->ino_data.uflags = dip->ino_data.uflags &
389                               (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
390
391         ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
392         ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
393         ip->ino_leaf.base.obj_id = ip->obj_id;
394         ip->ino_leaf.base.key = 0;
395         ip->ino_leaf.base.create_tid = 0;
396         ip->ino_leaf.base.delete_tid = 0;
397         ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
398         ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
399
400         ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
401         ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
402         ip->ino_data.mode = vap->va_mode;
403         ip->ino_data.ctime = trans->time;
404         ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
405
406         switch(ip->ino_leaf.base.obj_type) {
407         case HAMMER_OBJTYPE_CDEV:
408         case HAMMER_OBJTYPE_BDEV:
409                 ip->ino_data.rmajor = vap->va_rmajor;
410                 ip->ino_data.rminor = vap->va_rminor;
411                 break;
412         default:
413                 break;
414         }
415
416         /*
417          * Calculate default uid/gid and overwrite with information from
418          * the vap.
419          */
420         xuid = hammer_to_unix_xid(&dip->ino_data.uid);
421         xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
422                                      &vap->va_mode);
423         ip->ino_data.mode = vap->va_mode;
424
425         if (vap->va_vaflags & VA_UID_UUID_VALID)
426                 ip->ino_data.uid = vap->va_uid_uuid;
427         else if (vap->va_uid != (uid_t)VNOVAL)
428                 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
429         else
430                 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
431
432         if (vap->va_vaflags & VA_GID_UUID_VALID)
433                 ip->ino_data.gid = vap->va_gid_uuid;
434         else if (vap->va_gid != (gid_t)VNOVAL)
435                 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
436         else
437                 ip->ino_data.gid = dip->ino_data.gid;
438
439         hammer_ref(&ip->lock);
440         if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
441                 hammer_unref(&ip->lock);
442                 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
443         }
444         *ipp = ip;
445         return(0);
446 }
447
448 /*
449  * Called by hammer_sync_inode().
450  */
451 static int
452 hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
453 {
454         hammer_transaction_t trans = cursor->trans;
455         hammer_record_t record;
456         int error;
457
458 retry:
459         error = 0;
460
461         /*
462          * If the inode has a presence on-disk then locate it and mark
463          * it deleted, setting DELONDISK.
464          *
465          * The record may or may not be physically deleted, depending on
466          * the retention policy.
467          */
468         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
469             HAMMER_INODE_ONDISK) {
470                 hammer_normalize_cursor(cursor);
471                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
472                 cursor->key_beg.obj_id = ip->obj_id;
473                 cursor->key_beg.key = 0;
474                 cursor->key_beg.create_tid = 0;
475                 cursor->key_beg.delete_tid = 0;
476                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
477                 cursor->key_beg.obj_type = 0;
478                 cursor->asof = ip->obj_asof;
479                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
480                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
481                 cursor->flags |= HAMMER_CURSOR_BACKEND;
482
483                 error = hammer_btree_lookup(cursor);
484                 if (hammer_debug_inode)
485                         kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
486                 if (error) {
487                         kprintf("error %d\n", error);
488                         Debugger("hammer_update_inode");
489                 }
490
491                 if (error == 0) {
492                         error = hammer_ip_delete_record(cursor, ip, trans->tid);
493                         if (hammer_debug_inode)
494                                 kprintf(" error %d\n", error);
495                         if (error && error != EDEADLK) {
496                                 kprintf("error %d\n", error);
497                                 Debugger("hammer_update_inode2");
498                         }
499                         if (error == 0) {
500                                 ip->flags |= HAMMER_INODE_DELONDISK;
501                         }
502                         if (cursor->node)
503                                 hammer_cache_node(cursor->node, &ip->cache[0]);
504                 }
505                 if (error == EDEADLK) {
506                         hammer_done_cursor(cursor);
507                         error = hammer_init_cursor(trans, cursor,
508                                                    &ip->cache[0], ip);
509                         if (hammer_debug_inode)
510                                 kprintf("IPDED %p %d\n", ip, error);
511                         if (error == 0)
512                                 goto retry;
513                 }
514         }
515
516         /*
517          * Ok, write out the initial record or a new record (after deleting
518          * the old one), unless the DELETED flag is set.  This routine will
519          * clear DELONDISK if it writes out a record.
520          *
521          * Update our inode statistics if this is the first application of
522          * the inode on-disk.
523          */
524         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
525                 /*
526                  * Generate a record and write it to the media
527                  */
528                 record = hammer_alloc_mem_record(ip, 0);
529                 record->type = HAMMER_MEM_RECORD_INODE;
530                 record->flush_state = HAMMER_FST_FLUSH;
531                 record->leaf = ip->sync_ino_leaf;
532                 record->leaf.base.create_tid = trans->tid;
533                 record->leaf.data_len = sizeof(ip->sync_ino_data);
534                 record->data = (void *)&ip->sync_ino_data;
535                 record->flags |= HAMMER_RECF_INTERLOCK_BE;
536                 for (;;) {
537                         error = hammer_ip_sync_record_cursor(cursor, record);
538                         if (hammer_debug_inode)
539                                 kprintf("GENREC %p rec %08x %d\n",      
540                                         ip, record->flags, error);
541                         if (error != EDEADLK)
542                                 break;
543                         hammer_done_cursor(cursor);
544                         error = hammer_init_cursor(trans, cursor,
545                                                    &ip->cache[0], ip);
546                         if (hammer_debug_inode)
547                                 kprintf("GENREC reinit %d\n", error);
548                         if (error)
549                                 break;
550                 }
551                 if (error) {
552                         kprintf("error %d\n", error);
553                         Debugger("hammer_update_inode3");
554                 }
555
556                 /*
557                  * The record isn't managed by the inode's record tree,
558                  * destroy it whether we succeed or fail.
559                  */
560                 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
561                 record->flags |= HAMMER_RECF_DELETED_FE;
562                 record->flush_state = HAMMER_FST_IDLE;
563                 hammer_rel_mem_record(record);
564
565                 /*
566                  * Finish up.
567                  */
568                 if (error == 0) {
569                         if (hammer_debug_inode)
570                                 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
571                         ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
572                                             HAMMER_INODE_ITIMES);
573                         ip->flags &= ~HAMMER_INODE_DELONDISK;
574
575                         /*
576                          * Root volume count of inodes
577                          */
578                         if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
579                                 hammer_modify_volume_field(trans,
580                                                            trans->rootvol,
581                                                            vol0_stat_inodes);
582                                 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
583                                 hammer_modify_volume_done(trans->rootvol);
584                                 ip->flags |= HAMMER_INODE_ONDISK;
585                                 if (hammer_debug_inode)
586                                         kprintf("NOWONDISK %p\n", ip);
587                         }
588                 }
589         }
590
591         /*
592          * If the inode has been destroyed, clean out any left-over flags
593          * that may have been set by the frontend.
594          */
595         if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) { 
596                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
597                                     HAMMER_INODE_ITIMES);
598         }
599         return(error);
600 }
601
602 /*
603  * Update only the itimes fields.  This is done no-historically.  The
604  * record is updated in-place on the disk.
605  */
606 static int
607 hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
608 {
609         hammer_transaction_t trans = cursor->trans;
610         struct hammer_btree_leaf_elm *leaf;
611         int error;
612
613 retry:
614         error = 0;
615         if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
616             HAMMER_INODE_ONDISK) {
617                 hammer_normalize_cursor(cursor);
618                 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
619                 cursor->key_beg.obj_id = ip->obj_id;
620                 cursor->key_beg.key = 0;
621                 cursor->key_beg.create_tid = 0;
622                 cursor->key_beg.delete_tid = 0;
623                 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
624                 cursor->key_beg.obj_type = 0;
625                 cursor->asof = ip->obj_asof;
626                 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
627                 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
628                 cursor->flags |= HAMMER_CURSOR_BACKEND;
629
630                 error = hammer_btree_lookup(cursor);
631                 if (error) {
632                         kprintf("error %d\n", error);
633                         Debugger("hammer_update_itimes1");
634                 }
635                 if (error == 0) {
636                         /*
637                          * Do not generate UNDO records for atime updates.
638                          */
639                         leaf = cursor->leaf;
640                         hammer_modify_node(trans, cursor->node, 
641                                            &leaf->atime, sizeof(leaf->atime));
642                         leaf->atime = ip->sync_ino_leaf.atime;
643                         hammer_modify_node_done(cursor->node);
644                         /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
645                         ip->sync_flags &= ~HAMMER_INODE_ITIMES;
646                         /* XXX recalculate crc */
647                         hammer_cache_node(cursor->node, &ip->cache[0]);
648                 }
649                 if (error == EDEADLK) {
650                         hammer_done_cursor(cursor);
651                         error = hammer_init_cursor(trans, cursor,
652                                                    &ip->cache[0], ip);
653                         if (error == 0)
654                                 goto retry;
655                 }
656         }
657         return(error);
658 }
659
660 /*
661  * Release a reference on an inode, flush as requested.
662  *
663  * On the last reference we queue the inode to the flusher for its final
664  * disposition.
665  */
666 void
667 hammer_rel_inode(struct hammer_inode *ip, int flush)
668 {
669         hammer_mount_t hmp = ip->hmp;
670
671         /*
672          * Handle disposition when dropping the last ref.
673          */
674         for (;;) {
675                 if (ip->lock.refs == 1) {
676                         /*
677                          * Determine whether on-disk action is needed for
678                          * the inode's final disposition.
679                          */
680                         KKASSERT(ip->vp == NULL);
681                         hammer_inode_unloadable_check(ip, 0);
682                         if (ip->flags & HAMMER_INODE_MODMASK) {
683                                 if (hmp->rsv_inodes > desiredvnodes) {
684                                         hammer_flush_inode(ip,
685                                                            HAMMER_FLUSH_SIGNAL);
686                                 } else {
687                                         hammer_flush_inode(ip, 0);
688                                 }
689                         } else if (ip->lock.refs == 1) {
690                                 hammer_unload_inode(ip);
691                                 break;
692                         }
693                 } else {
694                         if (flush)
695                                 hammer_flush_inode(ip, 0);
696
697                         /*
698                          * The inode still has multiple refs, try to drop
699                          * one ref.
700                          */
701                         KKASSERT(ip->lock.refs >= 1);
702                         if (ip->lock.refs > 1) {
703                                 hammer_unref(&ip->lock);
704                                 break;
705                         }
706                 }
707         }
708 }
709
710 /*
711  * Unload and destroy the specified inode.  Must be called with one remaining
712  * reference.  The reference is disposed of.
713  *
714  * This can only be called in the context of the flusher.
715  */
716 static int
717 hammer_unload_inode(struct hammer_inode *ip)
718 {
719         hammer_mount_t hmp = ip->hmp;
720
721         KASSERT(ip->lock.refs == 1,
722                 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
723         KKASSERT(ip->vp == NULL);
724         KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
725         KKASSERT(ip->cursor_ip_refs == 0);
726         KKASSERT(ip->lock.lockcount == 0);
727         KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
728
729         KKASSERT(RB_EMPTY(&ip->rec_tree));
730         KKASSERT(TAILQ_EMPTY(&ip->target_list));
731
732         RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
733
734         hammer_uncache_node(&ip->cache[0]);
735         hammer_uncache_node(&ip->cache[1]);
736         if (ip->objid_cache)
737                 hammer_clear_objid(ip);
738         --hammer_count_inodes;
739         --hmp->count_inodes;
740         if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
741                 hammer_inode_wakereclaims(hmp);
742
743         if (ip->flags & HAMMER_INODE_RECLAIM) {
744                 --hammer_count_reclaiming;
745                 --hmp->inode_reclaims;
746                 ip->flags &= ~HAMMER_INODE_RECLAIM;
747         }
748         kfree(ip, M_HAMMER);
749
750         return(0);
751 }
752
753 /*
754  * Called on mount -u when switching from RW to RO or vise-versa.  Adjust
755  * the read-only flag for cached inodes.
756  *
757  * This routine is called from a RB_SCAN().
758  */
759 int
760 hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
761 {
762         hammer_mount_t hmp = ip->hmp;
763
764         if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
765                 ip->flags |= HAMMER_INODE_RO;
766         else
767                 ip->flags &= ~HAMMER_INODE_RO;
768         return(0);
769 }
770
771 /*
772  * A transaction has modified an inode, requiring updates as specified by
773  * the passed flags.
774  *
775  * HAMMER_INODE_DDIRTY: Inode data has been updated
776  * HAMMER_INODE_XDIRTY: Dirty in-memory records
777  * HAMMER_INODE_BUFS:   Dirty buffer cache buffers
778  * HAMMER_INODE_DELETED: Inode record/data must be deleted
779  * HAMMER_INODE_ITIMES: mtime/atime has been updated
780  */
781 void
782 hammer_modify_inode(hammer_inode_t ip, int flags)
783 {
784         KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
785                   (flags & (HAMMER_INODE_DDIRTY |
786                             HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
787                             HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
788         if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
789                 ip->flags |= HAMMER_INODE_RSV_INODES;
790                 ++ip->hmp->rsv_inodes;
791         }
792
793         ip->flags |= flags;
794 }
795
796 /*
797  * Request that an inode be flushed.  This whole mess cannot block and may
798  * recurse.  Once requested HAMMER will attempt to actively flush it until
799  * the flush can be done.
800  *
801  * The inode may already be flushing, or may be in a setup state.  We can
802  * place the inode in a flushing state if it is currently idle and flag it
803  * to reflush if it is currently flushing.
804  */
805 void
806 hammer_flush_inode(hammer_inode_t ip, int flags)
807 {
808         hammer_record_t depend;
809         int r, good;
810
811         /*
812          * Trivial 'nothing to flush' case.  If the inode is ina SETUP
813          * state we have to put it back into an IDLE state so we can
814          * drop the extra ref.
815          */
816         if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
817                 if (ip->flush_state == HAMMER_FST_SETUP) {
818                         ip->flush_state = HAMMER_FST_IDLE;
819                         hammer_rel_inode(ip, 0);
820                 }
821                 return;
822         }
823
824         /*
825          * Our flush action will depend on the current state.
826          */
827         switch(ip->flush_state) {
828         case HAMMER_FST_IDLE:
829                 /*
830                  * We have no dependancies and can flush immediately.  Some
831                  * our children may not be flushable so we have to re-test
832                  * with that additional knowledge.
833                  */
834                 hammer_flush_inode_core(ip, flags);
835                 break;
836         case HAMMER_FST_SETUP:
837                 /*
838                  * Recurse upwards through dependancies via target_list
839                  * and start their flusher actions going if possible.
840                  *
841                  * 'good' is our connectivity.  -1 means we have none and
842                  * can't flush, 0 means there weren't any dependancies, and
843                  * 1 means we have good connectivity.
844                  */
845                 good = 0;
846                 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
847                         r = hammer_setup_parent_inodes(depend);
848                         if (r < 0 && good == 0)
849                                 good = -1;
850                         if (r > 0)
851                                 good = 1;
852                 }
853
854                 /*
855                  * We can continue if good >= 0.  Determine how many records
856                  * under our inode can be flushed (and mark them).
857                  */
858                 if (good >= 0) {
859                         hammer_flush_inode_core(ip, flags);
860                 } else {
861                         ip->flags |= HAMMER_INODE_REFLUSH;
862                         if (flags & HAMMER_FLUSH_SIGNAL) {
863                                 ip->flags |= HAMMER_INODE_RESIGNAL;
864                                 hammer_flusher_async(ip->hmp);
865                         }
866                 }
867                 break;
868         default:
869                 /*
870                  * We are already flushing, flag the inode to reflush
871                  * if needed after it completes its current flush.
872                  */
873                 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
874                         ip->flags |= HAMMER_INODE_REFLUSH;
875                 if (flags & HAMMER_FLUSH_SIGNAL) {
876                         ip->flags |= HAMMER_INODE_RESIGNAL;
877                         hammer_flusher_async(ip->hmp);
878                 }
879                 break;
880         }
881 }
882
883 /*
884  * We are asked to recurse upwards and convert the record from SETUP
885  * to FLUSH if possible.  record->ip is a parent of the caller's inode,
886  * and record->target_ip is the caller's inode.
887  *
888  * Return 1 if the record gives us connectivity
889  *
890  * Return 0 if the record is not relevant 
891  *
892  * Return -1 if we can't resolve the dependancy and there is no connectivity.
893  */
894 static int
895 hammer_setup_parent_inodes(hammer_record_t record)
896 {
897         hammer_mount_t hmp = record->ip->hmp;
898         hammer_record_t depend;
899         hammer_inode_t ip;
900         int r, good;
901
902         KKASSERT(record->flush_state != HAMMER_FST_IDLE);
903         ip = record->ip;
904
905         /*
906          * If the record is already flushing, is it in our flush group?
907          *
908          * If it is in our flush group but it is a general record or a 
909          * delete-on-disk, it does not improve our connectivity (return 0),
910          * and if the target inode is not trying to destroy itself we can't
911          * allow the operation yet anyway (the second return -1).
912          */
913         if (record->flush_state == HAMMER_FST_FLUSH) {
914                 if (record->flush_group != hmp->flusher_next) {
915                         ip->flags |= HAMMER_INODE_REFLUSH;
916                         return(-1);
917                 }
918                 if (record->type == HAMMER_MEM_RECORD_ADD)
919                         return(1);
920                 /* GENERAL or DEL */
921                 return(0);
922         }
923
924         /*
925          * It must be a setup record.  Try to resolve the setup dependancies
926          * by recursing upwards so we can place ip on the flush list.
927          */
928         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
929
930         good = 0;
931         TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
932                 r = hammer_setup_parent_inodes(depend);
933                 if (r < 0 && good == 0)
934                         good = -1;
935                 if (r > 0)
936                         good = 1;
937         }
938
939         /*
940          * We can't flush ip because it has no connectivity (XXX also check
941          * nlinks for pre-existing connectivity!).  Flag it so any resolution
942          * recurses back down.
943          */
944         if (good < 0) {
945                 ip->flags |= HAMMER_INODE_REFLUSH;
946                 return(good);
947         }
948
949         /*
950          * We are go, place the parent inode in a flushing state so we can
951          * place its record in a flushing state.  Note that the parent
952          * may already be flushing.  The record must be in the same flush
953          * group as the parent.
954          */
955         if (ip->flush_state != HAMMER_FST_FLUSH)
956                 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
957         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
958         KKASSERT(record->flush_state == HAMMER_FST_SETUP);
959
960 #if 0
961         if (record->type == HAMMER_MEM_RECORD_DEL &&
962             (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
963                 /*
964                  * Regardless of flushing state we cannot sync this path if the
965                  * record represents a delete-on-disk but the target inode
966                  * is not ready to sync its own deletion.
967                  *
968                  * XXX need to count effective nlinks to determine whether
969                  * the flush is ok, otherwise removing a hardlink will
970                  * just leave the DEL record to rot.
971                  */
972                 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
973                 return(-1);
974         } else
975 #endif
976         if (ip->flush_group == ip->hmp->flusher_next) {
977                 /*
978                  * This is the record we wanted to synchronize.
979                  */
980                 record->flush_state = HAMMER_FST_FLUSH;
981                 record->flush_group = ip->flush_group;
982                 hammer_ref(&record->lock);
983                 if (record->type == HAMMER_MEM_RECORD_ADD)
984                         return(1);
985
986                 /*
987                  * A general or delete-on-disk record does not contribute
988                  * to our visibility.  We can still flush it, however.
989                  */
990                 return(0);
991         } else {
992                 /*
993                  * We couldn't resolve the dependancies, request that the
994                  * inode be flushed when the dependancies can be resolved.
995                  */
996                 ip->flags |= HAMMER_INODE_REFLUSH;
997                 return(-1);
998         }
999 }
1000
1001 /*
1002  * This is the core routine placing an inode into the FST_FLUSH state.
1003  */
1004 static void
1005 hammer_flush_inode_core(hammer_inode_t ip, int flags)
1006 {
1007         int go_count;
1008
1009         /*
1010          * Set flush state and prevent the flusher from cycling into
1011          * the next flush group.  Do not place the ip on the list yet.
1012          * Inodes not in the idle state get an extra reference.
1013          */
1014         KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1015         if (ip->flush_state == HAMMER_FST_IDLE)
1016                 hammer_ref(&ip->lock);
1017         ip->flush_state = HAMMER_FST_FLUSH;
1018         ip->flush_group = ip->hmp->flusher_next;
1019         ++ip->hmp->flusher_lock;
1020
1021         /*
1022          * We need to be able to vfsync/truncate from the backend.
1023          */
1024         KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1025         if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1026                 ip->flags |= HAMMER_INODE_VHELD;
1027                 vref(ip->vp);
1028         }
1029
1030         /*
1031          * Figure out how many in-memory records we can actually flush
1032          * (not including inode meta-data, buffers, etc).
1033          */
1034         if (flags & HAMMER_FLUSH_RECURSION) {
1035                 go_count = 1;
1036         } else {
1037                 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1038                                    hammer_setup_child_callback, NULL);
1039         }
1040
1041         /*
1042          * This is a more involved test that includes go_count.  If we
1043          * can't flush, flag the inode and return.  If go_count is 0 we
1044          * were are unable to flush any records in our rec_tree and
1045          * must ignore the XDIRTY flag.
1046          */
1047         if (go_count == 0) {
1048                 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1049                         ip->flags |= HAMMER_INODE_REFLUSH;
1050                         ip->flush_state = HAMMER_FST_SETUP;
1051                         if (ip->flags & HAMMER_INODE_VHELD) {
1052                                 ip->flags &= ~HAMMER_INODE_VHELD;
1053                                 vrele(ip->vp);
1054                         }
1055                         if (flags & HAMMER_FLUSH_SIGNAL) {
1056                                 ip->flags |= HAMMER_INODE_RESIGNAL;
1057                                 hammer_flusher_async(ip->hmp);
1058                         }
1059                         if (--ip->hmp->flusher_lock == 0)
1060                                 wakeup(&ip->hmp->flusher_lock);
1061                         return;
1062                 }
1063         }
1064
1065         /*
1066          * Snapshot the state of the inode for the backend flusher.
1067          *
1068          * The truncation must be retained in the frontend until after
1069          * we've actually performed the record deletion.
1070          *
1071          * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1072          * and stays in ip->flags.  Once set, it stays set until the
1073          * inode is destroyed.
1074          */
1075         ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1076         ip->sync_trunc_off = ip->trunc_off;
1077         ip->sync_ino_leaf = ip->ino_leaf;
1078         ip->sync_ino_data = ip->ino_data;
1079         ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1080         ip->flags &= ~HAMMER_INODE_MODMASK;
1081 #ifdef DEBUG_TRUNCATE
1082         if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1083                 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1084 #endif
1085
1086         /*
1087          * The flusher list inherits our inode and reference.
1088          */
1089         TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
1090         if (--ip->hmp->flusher_lock == 0)
1091                 wakeup(&ip->hmp->flusher_lock);
1092
1093         if (flags & HAMMER_FLUSH_SIGNAL) {
1094                 hammer_flusher_async(ip->hmp);
1095         }
1096 }
1097
1098 /*
1099  * Callback for scan of ip->rec_tree.  Try to include each record in our
1100  * flush.  ip->flush_group has been set but the inode has not yet been
1101  * moved into a flushing state.
1102  *
1103  * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1104  * both inodes.
1105  *
1106  * We return 1 for any record placed or found in FST_FLUSH, which prevents
1107  * the caller from shortcutting the flush.
1108  */
1109 static int
1110 hammer_setup_child_callback(hammer_record_t rec, void *data)
1111 {
1112         hammer_inode_t target_ip;
1113         hammer_inode_t ip;
1114         int r;
1115
1116         /*
1117          * If the record has been deleted by the backend (it's being held
1118          * by the frontend in a race), just ignore it.
1119          */
1120         if (rec->flags & HAMMER_RECF_DELETED_BE)
1121                 return(0);
1122
1123         /*
1124          * If the record is in an idle state it has no dependancies and
1125          * can be flushed.
1126          */
1127         ip = rec->ip;
1128         r = 0;
1129
1130         switch(rec->flush_state) {
1131         case HAMMER_FST_IDLE:
1132                 /*
1133                  * Record has no setup dependancy, we can flush it.
1134                  */
1135                 KKASSERT(rec->target_ip == NULL);
1136                 rec->flush_state = HAMMER_FST_FLUSH;
1137                 rec->flush_group = ip->flush_group;
1138                 hammer_ref(&rec->lock);
1139                 r = 1;
1140                 break;
1141         case HAMMER_FST_SETUP:
1142                 /*
1143                  * Record has a setup dependancy.  Try to include the
1144                  * target ip in the flush. 
1145                  *
1146                  * We have to be careful here, if we do not do the right
1147                  * thing we can lose track of dirty inodes and the system
1148                  * will lockup trying to allocate buffers.
1149                  */
1150                 target_ip = rec->target_ip;
1151                 KKASSERT(target_ip != NULL);
1152                 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1153                 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1154                         /*
1155                          * If the target IP is already flushing in our group
1156                          * we are golden, otherwise make sure the target
1157                          * reflushes.
1158                          */
1159                         if (target_ip->flush_group == ip->flush_group) {
1160                                 rec->flush_state = HAMMER_FST_FLUSH;
1161                                 rec->flush_group = ip->flush_group;
1162                                 hammer_ref(&rec->lock);
1163                                 r = 1;
1164                         } else {
1165                                 target_ip->flags |= HAMMER_INODE_REFLUSH;
1166                         }
1167                 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1168                         /*
1169                          * If the target IP is not flushing we can force
1170                          * it to flush, even if it is unable to write out
1171                          * any of its own records we have at least one in
1172                          * hand that we CAN deal with.
1173                          */
1174                         rec->flush_state = HAMMER_FST_FLUSH;
1175                         rec->flush_group = ip->flush_group;
1176                         hammer_ref(&rec->lock);
1177                         hammer_flush_inode_core(target_ip,
1178                                                 HAMMER_FLUSH_RECURSION);
1179                         r = 1;
1180                 } else {
1181                         /*
1182                          * General or delete-on-disk record.
1183                          *
1184                          * XXX this needs help.  If a delete-on-disk we could
1185                          * disconnect the target.  If the target has its own
1186                          * dependancies they really need to be flushed.
1187                          *
1188                          * XXX
1189                          */
1190                         rec->flush_state = HAMMER_FST_FLUSH;
1191                         rec->flush_group = ip->flush_group;
1192                         hammer_ref(&rec->lock);
1193                         hammer_flush_inode_core(target_ip,
1194                                                 HAMMER_FLUSH_RECURSION);
1195                         r = 1;
1196                 }
1197                 break;
1198         case HAMMER_FST_FLUSH:
1199                 /* 
1200                  * Record already associated with a flush group.  It had
1201                  * better be ours.
1202                  */
1203                 KKASSERT(rec->flush_group == ip->flush_group);
1204                 r = 1;
1205                 break;
1206         }
1207         return(r);
1208 }
1209
1210 /*
1211  * Wait for a previously queued flush to complete
1212  */
1213 void
1214 hammer_wait_inode(hammer_inode_t ip)
1215 {
1216         while (ip->flush_state != HAMMER_FST_IDLE) {
1217                 if (ip->flush_state == HAMMER_FST_SETUP) {
1218                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1219                 } else {
1220                         ip->flags |= HAMMER_INODE_FLUSHW;
1221                         tsleep(&ip->flags, 0, "hmrwin", 0);
1222                 }
1223         }
1224 }
1225
1226 /*
1227  * Called by the backend code when a flush has been completed.
1228  * The inode has already been removed from the flush list.
1229  *
1230  * A pipelined flush can occur, in which case we must re-enter the
1231  * inode on the list and re-copy its fields.
1232  */
1233 void
1234 hammer_flush_inode_done(hammer_inode_t ip)
1235 {
1236         int dorel = 0;
1237
1238         KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1239
1240         /*
1241          * Merge left-over flags back into the frontend and fix the state.
1242          */
1243         ip->flags |= ip->sync_flags;
1244
1245         /*
1246          * The backend may have adjusted nlinks, so if the adjusted nlinks
1247          * does not match the fronttend set the frontend's RDIRTY flag again.
1248          */
1249         if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1250                 ip->flags |= HAMMER_INODE_DDIRTY;
1251
1252         /*
1253          * Fix up the dirty buffer status.  IO completions will also
1254          * try to clean up rsv_databufs.
1255          */
1256         if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1257                 ip->flags |= HAMMER_INODE_BUFS;
1258         } else {
1259                 ip->hmp->rsv_databufs -= ip->rsv_databufs;
1260                 ip->rsv_databufs = 0;
1261         }
1262
1263         /*
1264          * Re-set the XDIRTY flag if some of the inode's in-memory records
1265          * could not be flushed.
1266          */
1267         KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1268                   (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1269                  (!RB_EMPTY(&ip->rec_tree) &&
1270                   (ip->flags & HAMMER_INODE_XDIRTY) != 0));
1271
1272         /*
1273          * Do not lose track of inodes which no longer have vnode
1274          * assocations, otherwise they may never get flushed again.
1275          */
1276         if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
1277                 ip->flags |= HAMMER_INODE_REFLUSH;
1278
1279         /*
1280          * Adjust flush_state.  The target state (idle or setup) shouldn't
1281          * be terribly important since we will reflush if we really need
1282          * to do anything. XXX
1283          */
1284         if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1285                 ip->flush_state = HAMMER_FST_IDLE;
1286                 dorel = 1;
1287         } else {
1288                 ip->flush_state = HAMMER_FST_SETUP;
1289         }
1290
1291         /*
1292          * Clean up the vnode ref
1293          */
1294         if (ip->flags & HAMMER_INODE_VHELD) {
1295                 ip->flags &= ~HAMMER_INODE_VHELD;
1296                 vrele(ip->vp);
1297         }
1298
1299         /*
1300          * If the frontend made more changes and requested another flush,
1301          * then try to get it running.
1302          */
1303         if (ip->flags & HAMMER_INODE_REFLUSH) {
1304                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1305                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1306                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1307                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1308                 } else {
1309                         hammer_flush_inode(ip, 0);
1310                 }
1311         }
1312
1313         /*
1314          * If the inode is now clean drop the space reservation.
1315          */
1316         if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1317             (ip->flags & HAMMER_INODE_RSV_INODES)) {
1318                 ip->flags &= ~HAMMER_INODE_RSV_INODES;
1319                 --ip->hmp->rsv_inodes;
1320         }
1321
1322         /*
1323          * Finally, if the frontend is waiting for a flush to complete,
1324          * wake it up.
1325          */
1326         if (ip->flush_state != HAMMER_FST_FLUSH) {
1327                 if (ip->flags & HAMMER_INODE_FLUSHW) {
1328                         ip->flags &= ~HAMMER_INODE_FLUSHW;
1329                         wakeup(&ip->flags);
1330                 }
1331         }
1332         if (dorel)
1333                 hammer_rel_inode(ip, 0);
1334 }
1335
1336 /*
1337  * Called from hammer_sync_inode() to synchronize in-memory records
1338  * to the media.
1339  */
1340 static int
1341 hammer_sync_record_callback(hammer_record_t record, void *data)
1342 {
1343         hammer_cursor_t cursor = data;
1344         hammer_transaction_t trans = cursor->trans;
1345         int error;
1346
1347         /*
1348          * Skip records that do not belong to the current flush.
1349          */
1350         ++hammer_stats_record_iterations;
1351         if (record->flush_state != HAMMER_FST_FLUSH)
1352                 return(0);
1353
1354 #if 1
1355         if (record->flush_group != record->ip->flush_group) {
1356                 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1357                 Debugger("blah2");
1358                 return(0);
1359         }
1360 #endif
1361         KKASSERT(record->flush_group == record->ip->flush_group);
1362
1363         /*
1364          * Interlock the record using the BE flag.  Once BE is set the
1365          * frontend cannot change the state of FE.
1366          *
1367          * NOTE: If FE is set prior to us setting BE we still sync the
1368          * record out, but the flush completion code converts it to 
1369          * a delete-on-disk record instead of destroying it.
1370          */
1371         KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
1372         record->flags |= HAMMER_RECF_INTERLOCK_BE;
1373
1374         /*
1375          * The backend may have already disposed of the record.
1376          */
1377         if (record->flags & HAMMER_RECF_DELETED_BE) {
1378                 error = 0;
1379                 goto done;
1380         }
1381
1382         /*
1383          * If the whole inode is being deleting all on-disk records will
1384          * be deleted very soon, we can't sync any new records to disk
1385          * because they will be deleted in the same transaction they were
1386          * created in (delete_tid == create_tid), which will assert.
1387          *
1388          * XXX There may be a case with RECORD_ADD with DELETED_FE set
1389          * that we currently panic on.
1390          */
1391         if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
1392                 switch(record->type) {
1393                 case HAMMER_MEM_RECORD_DATA:
1394                         /*
1395                          * We don't have to do anything, if the record was
1396                          * committed the space will have been accounted for
1397                          * in the blockmap.
1398                          */
1399                         /* fall through */
1400                 case HAMMER_MEM_RECORD_GENERAL:
1401                         record->flags |= HAMMER_RECF_DELETED_FE;
1402                         record->flags |= HAMMER_RECF_DELETED_BE;
1403                         error = 0;
1404                         goto done;
1405                 case HAMMER_MEM_RECORD_ADD:
1406                         panic("hammer_sync_record_callback: illegal add "
1407                               "during inode deletion record %p", record);
1408                         break; /* NOT REACHED */
1409                 case HAMMER_MEM_RECORD_INODE:
1410                         panic("hammer_sync_record_callback: attempt to "
1411                               "sync inode record %p?", record);
1412                         break; /* NOT REACHED */
1413                 case HAMMER_MEM_RECORD_DEL:
1414                         /* 
1415                          * Follow through and issue the on-disk deletion
1416                          */
1417                         break;
1418                 }
1419         }
1420
1421         /*
1422          * If DELETED_FE is set we may have already sent dependant pieces
1423          * to the disk and we must flush the record as if it hadn't been
1424          * deleted.  This creates a bit of a mess because we have to
1425          * have ip_sync_record convert the record to MEM_RECORD_DEL before
1426          * it inserts the B-Tree record.  Otherwise the media sync might
1427          * be visible to the frontend.
1428          */
1429         if (record->flags & HAMMER_RECF_DELETED_FE) {
1430                 if (record->type == HAMMER_MEM_RECORD_ADD) {
1431                         record->flags |= HAMMER_RECF_CONVERT_DELETE;
1432                 } else {
1433                         KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
1434                         return(0);
1435                 }
1436         }
1437
1438         /*
1439          * Assign the create_tid for new records.  Deletions already
1440          * have the record's entire key properly set up.
1441          */
1442         if (record->type != HAMMER_MEM_RECORD_DEL)
1443                 record->leaf.base.create_tid = trans->tid;
1444         for (;;) {
1445                 error = hammer_ip_sync_record_cursor(cursor, record);
1446                 if (error != EDEADLK)
1447                         break;
1448                 hammer_done_cursor(cursor);
1449                 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1450                                            record->ip);
1451                 if (error)
1452                         break;
1453         }
1454         record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
1455
1456         if (error) {
1457                 error = -error;
1458                 if (error != -ENOSPC) {
1459                         kprintf("hammer_sync_record_callback: sync failed rec "
1460                                 "%p, error %d\n", record, error);
1461                         Debugger("sync failed rec");
1462                 }
1463         }
1464 done:
1465         hammer_flush_record_done(record, error);
1466         return(error);
1467 }
1468
1469 /*
1470  * XXX error handling
1471  */
1472 int
1473 hammer_sync_inode(hammer_inode_t ip)
1474 {
1475         struct hammer_transaction trans;
1476         struct hammer_cursor cursor;
1477         hammer_record_t depend;
1478         hammer_record_t next;
1479         int error, tmp_error;
1480         u_int64_t nlinks;
1481
1482         if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
1483                 return(0);
1484
1485         hammer_start_transaction_fls(&trans, ip->hmp);
1486         error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1487         if (error)
1488                 goto done;
1489
1490         /*
1491          * Any directory records referencing this inode which are not in
1492          * our current flush group must adjust our nlink count for the
1493          * purposes of synchronization to disk.
1494          *
1495          * Records which are in our flush group can be unlinked from our
1496          * inode now, potentially allowing the inode to be physically
1497          * deleted.
1498          */
1499         nlinks = ip->ino_data.nlinks;
1500         next = TAILQ_FIRST(&ip->target_list);
1501         while ((depend = next) != NULL) {
1502                 next = TAILQ_NEXT(depend, target_entry);
1503                 if (depend->flush_state == HAMMER_FST_FLUSH &&
1504                     depend->flush_group == ip->hmp->flusher_act) {
1505                         /*
1506                          * If this is an ADD that was deleted by the frontend
1507                          * the frontend nlinks count will have already been
1508                          * decremented, but the backend is going to sync its
1509                          * directory entry and must account for it.  The
1510                          * record will be converted to a delete-on-disk when
1511                          * it gets synced.
1512                          *
1513                          * If the ADD was not deleted by the frontend we
1514                          * can remove the dependancy from our target_list.
1515                          */
1516                         if (depend->flags & HAMMER_RECF_DELETED_FE) {
1517                                 ++nlinks;
1518                         } else {
1519                                 TAILQ_REMOVE(&ip->target_list, depend,
1520                                              target_entry);
1521                                 depend->target_ip = NULL;
1522                         }
1523                 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1524                         /*
1525                          * Not part of our flush group
1526                          */
1527                         KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1528                         switch(depend->type) {
1529                         case HAMMER_MEM_RECORD_ADD:
1530                                 --nlinks;
1531                                 break;
1532                         case HAMMER_MEM_RECORD_DEL:
1533                                 ++nlinks;
1534                                 break;
1535                         default:
1536                                 break;
1537                         }
1538                 }
1539         }
1540
1541         /*
1542          * Set dirty if we had to modify the link count.
1543          */
1544         if (ip->sync_ino_data.nlinks != nlinks) {
1545                 KKASSERT((int64_t)nlinks >= 0);
1546                 ip->sync_ino_data.nlinks = nlinks;
1547                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1548         }
1549
1550 #if 0
1551         /*
1552          * XXX DISABLED FOR NOW.  With the new reservation support
1553          * we cannot resync pending data without confusing the hell
1554          * out of the in-memory record tree.
1555          */
1556         /*
1557          * Queue up as many dirty buffers as we can then set a flag to
1558          * cause any further BIOs to go to the alternative queue.
1559          */
1560         if (ip->flags & HAMMER_INODE_VHELD)
1561                 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
1562         ip->flags |= HAMMER_INODE_WRITE_ALT;
1563
1564         /*
1565          * The buffer cache may contain dirty buffers beyond the inode
1566          * state we copied from the frontend to the backend.  Because
1567          * we are syncing our buffer cache on the backend, resync
1568          * the truncation point and the file size so we don't wipe out
1569          * any data.
1570          *
1571          * Syncing the buffer cache on the frontend has serious problems
1572          * because it prevents us from passively queueing dirty inodes
1573          * to the backend (the BIO's could stall indefinitely).
1574          */
1575         if (ip->flags & HAMMER_INODE_TRUNCATED) {
1576                 ip->sync_trunc_off = ip->trunc_off;
1577                 ip->sync_flags |= HAMMER_INODE_TRUNCATED;
1578         }
1579         if (ip->sync_ino_data.size != ip->ino_data.size) {
1580                 ip->sync_ino_data.size = ip->ino_data.size;
1581                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1582         }
1583 #endif
1584
1585         /*
1586          * If there is a trunction queued destroy any data past the (aligned)
1587          * truncation point.  Userland will have dealt with the buffer
1588          * containing the truncation point for us.
1589          *
1590          * We don't flush pending frontend data buffers until after we've
1591          * dealth with the truncation.
1592          *
1593          * Don't bother if the inode is or has been deleted.
1594          */
1595         if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1596                 /*
1597                  * Interlock trunc_off.  The VOP front-end may continue to
1598                  * make adjustments to it while we are blocked.
1599                  */
1600                 off_t trunc_off;
1601                 off_t aligned_trunc_off;
1602
1603                 trunc_off = ip->sync_trunc_off;
1604                 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1605                                     ~HAMMER_BUFMASK64;
1606
1607                 /*
1608                  * Delete any whole blocks on-media.  The front-end has
1609                  * already cleaned out any partial block and made it
1610                  * pending.  The front-end may have updated trunc_off
1611                  * while we were blocked so we only use sync_trunc_off.
1612                  */
1613                 error = hammer_ip_delete_range(&cursor, ip,
1614                                                 aligned_trunc_off,
1615                                                 0x7FFFFFFFFFFFFFFFLL, 1);
1616                 if (error)
1617                         Debugger("hammer_ip_delete_range errored");
1618
1619                 /*
1620                  * Clear the truncation flag on the backend after we have
1621                  * complete the deletions.  Backend data is now good again
1622                  * (including new records we are about to sync, below).
1623                  */
1624                 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1625                 ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1626         } else {
1627                 error = 0;
1628         }
1629
1630         /*
1631          * Now sync related records.  These will typically be directory
1632          * entries or delete-on-disk records.
1633          *
1634          * Not all records will be flushed, but clear XDIRTY anyway.  We
1635          * will set it again in the frontend hammer_flush_inode_done() 
1636          * if records remain.
1637          */
1638         if (error == 0) {
1639                 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1640                                     hammer_sync_record_callback, &cursor);
1641                 if (tmp_error < 0)
1642                         tmp_error = -error;
1643                 if (tmp_error)
1644                         error = tmp_error;
1645         }
1646
1647         /*
1648          * If we are deleting the inode the frontend had better not have
1649          * any active references on elements making up the inode.
1650          */
1651         if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
1652                 RB_EMPTY(&ip->rec_tree)  &&
1653             (ip->sync_flags & HAMMER_INODE_DELETING) &&
1654             (ip->flags & HAMMER_INODE_DELETED) == 0) {
1655                 int count1 = 0;
1656
1657                 ip->flags |= HAMMER_INODE_DELETED;
1658                 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
1659                 if (error == 0) {
1660                         ip->sync_flags &= ~HAMMER_INODE_DELETING;
1661                         ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1662                         KKASSERT(RB_EMPTY(&ip->rec_tree));
1663
1664                         /*
1665                          * Set delete_tid in both the frontend and backend
1666                          * copy of the inode record.  The DELETED flag handles
1667                          * this, do not set RDIRTY.
1668                          */
1669                         ip->ino_leaf.base.delete_tid = trans.tid;
1670                         ip->sync_ino_leaf.base.delete_tid = trans.tid;
1671
1672                         /*
1673                          * Adjust the inode count in the volume header
1674                          */
1675                         if (ip->flags & HAMMER_INODE_ONDISK) {
1676                                 hammer_modify_volume_field(&trans,
1677                                                            trans.rootvol,
1678                                                            vol0_stat_inodes);
1679                                 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1680                                 hammer_modify_volume_done(trans.rootvol);
1681                         }
1682                 } else {
1683                         ip->flags &= ~HAMMER_INODE_DELETED;
1684                         Debugger("hammer_ip_delete_range_all errored");
1685                 }
1686         }
1687
1688         ip->sync_flags &= ~HAMMER_INODE_BUFS;
1689
1690         if (error)
1691                 Debugger("RB_SCAN errored");
1692
1693         /*
1694          * Now update the inode's on-disk inode-data and/or on-disk record.
1695          * DELETED and ONDISK are managed only in ip->flags.
1696          */
1697         switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1698         case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1699                 /*
1700                  * If deleted and on-disk, don't set any additional flags.
1701                  * the delete flag takes care of things.
1702                  *
1703                  * Clear flags which may have been set by the frontend.
1704                  */
1705                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1706                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1707                                     HAMMER_INODE_DELETING);
1708                 break;
1709         case HAMMER_INODE_DELETED:
1710                 /*
1711                  * Take care of the case where a deleted inode was never
1712                  * flushed to the disk in the first place.
1713                  *
1714                  * Clear flags which may have been set by the frontend.
1715                  */
1716                 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1717                                     HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1718                                     HAMMER_INODE_DELETING);
1719                 while (RB_ROOT(&ip->rec_tree)) {
1720                         hammer_record_t record = RB_ROOT(&ip->rec_tree);
1721                         hammer_ref(&record->lock);
1722                         KKASSERT(record->lock.refs == 1);
1723                         record->flags |= HAMMER_RECF_DELETED_FE;
1724                         record->flags |= HAMMER_RECF_DELETED_BE;
1725                         hammer_rel_mem_record(record);
1726                 }
1727                 break;
1728         case HAMMER_INODE_ONDISK:
1729                 /*
1730                  * If already on-disk, do not set any additional flags.
1731                  */
1732                 break;
1733         default:
1734                 /*
1735                  * If not on-disk and not deleted, set both dirty flags
1736                  * to force an initial record to be written.  Also set
1737                  * the create_tid for the inode.
1738                  *
1739                  * Set create_tid in both the frontend and backend
1740                  * copy of the inode record.
1741                  */
1742                 ip->ino_leaf.base.create_tid = trans.tid;
1743                 ip->sync_ino_leaf.base.create_tid = trans.tid;
1744                 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1745                 break;
1746         }
1747
1748         /*
1749          * If RDIRTY or DDIRTY is set, write out a new record.  If the inode
1750          * is already on-disk the old record is marked as deleted.
1751          *
1752          * If DELETED is set hammer_update_inode() will delete the existing
1753          * record without writing out a new one.
1754          *
1755          * If *ONLY* the ITIMES flag is set we can update the record in-place.
1756          */
1757         if (ip->flags & HAMMER_INODE_DELETED) {
1758                 error = hammer_update_inode(&cursor, ip);
1759         } else 
1760         if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1761             HAMMER_INODE_ITIMES) {
1762                 error = hammer_update_itimes(&cursor, ip);
1763         } else
1764         if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
1765                 error = hammer_update_inode(&cursor, ip);
1766         }
1767         if (error)
1768                 Debugger("hammer_update_itimes/inode errored");
1769 done:
1770         /*
1771          * Save the TID we used to sync the inode with to make sure we
1772          * do not improperly reuse it.
1773          */
1774         hammer_done_cursor(&cursor);
1775         hammer_done_transaction(&trans);
1776         return(error);
1777 }
1778
1779 /*
1780  * This routine is called when the OS is no longer actively referencing
1781  * the inode (but might still be keeping it cached), or when releasing
1782  * the last reference to an inode.
1783  *
1784  * At this point if the inode's nlinks count is zero we want to destroy
1785  * it, which may mean destroying it on-media too.
1786  */
1787 void
1788 hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1789 {
1790         struct vnode *vp;
1791
1792         /*
1793          * Set the DELETING flag when the link count drops to 0 and the
1794          * OS no longer has any opens on the inode.
1795          *
1796          * The backend will clear DELETING (a mod flag) and set DELETED
1797          * (a state flag) when it is actually able to perform the
1798          * operation.
1799          */
1800         if (ip->ino_data.nlinks == 0 &&
1801             (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
1802                 ip->flags |= HAMMER_INODE_DELETING;
1803                 ip->flags |= HAMMER_INODE_TRUNCATED;
1804                 ip->trunc_off = 0;
1805                 vp = NULL;
1806                 if (getvp) {
1807                         if (hammer_get_vnode(ip, &vp) != 0)
1808                                 return;
1809                 }
1810
1811                 /*
1812                  * Final cleanup
1813                  */
1814                 if (ip->vp) {
1815                         vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1816                         vnode_pager_setsize(ip->vp, 0);
1817                 }
1818                 if (getvp) {
1819                         vput(vp);
1820                 }
1821         }
1822 }
1823
1824 /*
1825  * Re-test an inode when a dependancy had gone away to see if we
1826  * can chain flush it.
1827  */
1828 void
1829 hammer_test_inode(hammer_inode_t ip)
1830 {
1831         if (ip->flags & HAMMER_INODE_REFLUSH) {
1832                 ip->flags &= ~HAMMER_INODE_REFLUSH;
1833                 hammer_ref(&ip->lock);
1834                 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1835                         ip->flags &= ~HAMMER_INODE_RESIGNAL;
1836                         hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1837                 } else {
1838                         hammer_flush_inode(ip, 0);
1839                 }
1840                 hammer_rel_inode(ip, 0);
1841         }
1842 }
1843
1844 /*
1845  * When a HAMMER inode is reclaimed it may have to be queued to the backend
1846  * for its final sync to disk.  Programs like blogbench can cause the backlog
1847  * to grow indefinitely.  Put a cap on the number of inodes we allow to be
1848  * in this state by giving the flusher time to drain.
1849  */
1850 void
1851 hammer_inode_waitreclaims(hammer_mount_t hmp)
1852 {
1853         int count;
1854         int delay;
1855         int minpt;
1856         int maxpt;
1857
1858         while (hmp->inode_reclaims > HAMMER_RECLAIM_MIN) {
1859                 count = hmp->count_inodes;
1860                 minpt = count * HAMMER_RECLAIM_SLOPCT / 100;
1861                 maxpt = count * HAMMER_RECLAIM_MAXPCT / 100;
1862
1863                 if (hmp->inode_reclaims < minpt)
1864                         break;
1865                 if (hmp->inode_reclaims < maxpt) {
1866                         delay = (hmp->inode_reclaims - minpt) * hz /
1867                                 (maxpt - minpt);
1868                         if (delay == 0)
1869                                 delay = 1;
1870                         hammer_flusher_async(hmp);
1871                         tsleep(&count, 0, "hmitik", delay);
1872                         break;
1873                 }
1874                 hmp->flags |= HAMMER_MOUNT_WAITIMAX;
1875                 hammer_flusher_async(hmp);
1876                 tsleep(&hmp->inode_reclaims, 0, "hmimax", hz / 10);
1877         }
1878 }
1879
1880 void
1881 hammer_inode_wakereclaims(hammer_mount_t hmp)
1882 {
1883         int maxpt;
1884
1885         if ((hmp->flags & HAMMER_MOUNT_WAITIMAX) == 0)
1886                 return;
1887         maxpt = hmp->count_inodes * HAMMER_RECLAIM_MAXPCT / 100;
1888         if (hmp->inode_reclaims <= HAMMER_RECLAIM_MIN ||
1889             hmp->inode_reclaims < maxpt) {
1890                 hmp->flags &= ~HAMMER_MOUNT_WAITIMAX;
1891                 wakeup(&hmp->inode_reclaims);
1892         }
1893 }
1894