HAMMER 56C/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.78 2008/06/20 05:38:26 dillon Exp $
35 */
36
37#include "hammer.h"
38#include <vm/vm_extern.h>
39#include <sys/buf.h>
40#include <sys/buf2.h>
41
42static int hammer_unload_inode(struct hammer_inode *ip);
43static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
44static int hammer_setup_child_callback(hammer_record_t rec, void *data);
45static int hammer_setup_parent_inodes(hammer_inode_t ip);
46static int hammer_setup_parent_inodes_helper(hammer_record_t record);
47static void hammer_inode_wakereclaims(hammer_inode_t ip);
48
49#ifdef DEBUG_TRUNCATE
50extern struct hammer_inode *HammerTruncIp;
51#endif
52
53/*
54 * The kernel is not actively referencing this vnode but is still holding
55 * it cached.
56 *
57 * This is called from the frontend.
58 */
59int
60hammer_vop_inactive(struct vop_inactive_args *ap)
61{
62 struct hammer_inode *ip = VTOI(ap->a_vp);
63
64 /*
65 * Degenerate case
66 */
67 if (ip == NULL) {
68 vrecycle(ap->a_vp);
69 return(0);
70 }
71
72 /*
73 * If the inode no longer has visibility in the filesystem try to
74 * recycle it immediately, even if the inode is dirty. Recycling
75 * it quickly allows the system to reclaim buffer cache and VM
76 * resources which can matter a lot in a heavily loaded system.
77 *
78 * This can deadlock in vfsync() if we aren't careful.
79 *
80 * Do not queue the inode to the flusher if we still have visibility,
81 * otherwise namespace calls such as chmod will unnecessarily generate
82 * multiple inode updates.
83 */
84 hammer_inode_unloadable_check(ip, 0);
85 if (ip->ino_data.nlinks == 0) {
86 if (ip->flags & HAMMER_INODE_MODMASK)
87 hammer_flush_inode(ip, 0);
88 vrecycle(ap->a_vp);
89 }
90 return(0);
91}
92
93/*
94 * Release the vnode association. This is typically (but not always)
95 * the last reference on the inode.
96 *
97 * Once the association is lost we are on our own with regards to
98 * flushing the inode.
99 */
100int
101hammer_vop_reclaim(struct vop_reclaim_args *ap)
102{
103 struct hammer_inode *ip;
104 hammer_mount_t hmp;
105 struct vnode *vp;
106
107 vp = ap->a_vp;
108
109 if ((ip = vp->v_data) != NULL) {
110 hmp = ip->hmp;
111 vp->v_data = NULL;
112 ip->vp = NULL;
113
114 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) {
115 ++hammer_count_reclaiming;
116 ++hmp->inode_reclaims;
117 ip->flags |= HAMMER_INODE_RECLAIM;
118 if (hmp->inode_reclaims > HAMMER_RECLAIM_FLUSH &&
119 (hmp->inode_reclaims & 255) == 0) {
120 hammer_flusher_async(hmp);
121 }
122 }
123 hammer_rel_inode(ip, 1);
124 }
125 return(0);
126}
127
128/*
129 * Return a locked vnode for the specified inode. The inode must be
130 * referenced but NOT LOCKED on entry and will remain referenced on
131 * return.
132 *
133 * Called from the frontend.
134 */
135int
136hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
137{
138 hammer_mount_t hmp;
139 struct vnode *vp;
140 int error = 0;
141
142 hmp = ip->hmp;
143
144 for (;;) {
145 if ((vp = ip->vp) == NULL) {
146 error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
147 if (error)
148 break;
149 hammer_lock_ex(&ip->lock);
150 if (ip->vp != NULL) {
151 hammer_unlock(&ip->lock);
152 vp->v_type = VBAD;
153 vx_put(vp);
154 continue;
155 }
156 hammer_ref(&ip->lock);
157 vp = *vpp;
158 ip->vp = vp;
159 vp->v_type =
160 hammer_get_vnode_type(ip->ino_data.obj_type);
161
162 hammer_inode_wakereclaims(ip);
163
164 switch(ip->ino_data.obj_type) {
165 case HAMMER_OBJTYPE_CDEV:
166 case HAMMER_OBJTYPE_BDEV:
167 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
168 addaliasu(vp, ip->ino_data.rmajor,
169 ip->ino_data.rminor);
170 break;
171 case HAMMER_OBJTYPE_FIFO:
172 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
173 break;
174 default:
175 break;
176 }
177
178 /*
179 * Only mark as the root vnode if the ip is not
180 * historical, otherwise the VFS cache will get
181 * confused. The other half of the special handling
182 * is in hammer_vop_nlookupdotdot().
183 */
184 if (ip->obj_id == HAMMER_OBJID_ROOT &&
185 ip->obj_asof == hmp->asof) {
186 vp->v_flag |= VROOT;
187 }
188
189 vp->v_data = (void *)ip;
190 /* vnode locked by getnewvnode() */
191 /* make related vnode dirty if inode dirty? */
192 hammer_unlock(&ip->lock);
193 if (vp->v_type == VREG)
194 vinitvmio(vp, ip->ino_data.size);
195 break;
196 }
197
198 /*
199 * loop if the vget fails (aka races), or if the vp
200 * no longer matches ip->vp.
201 */
202 if (vget(vp, LK_EXCLUSIVE) == 0) {
203 if (vp == ip->vp)
204 break;
205 vput(vp);
206 }
207 }
208 *vpp = vp;
209 return(error);
210}
211
212/*
213 * Acquire a HAMMER inode. The returned inode is not locked. These functions
214 * do not attach or detach the related vnode (use hammer_get_vnode() for
215 * that).
216 *
217 * The flags argument is only applied for newly created inodes, and only
218 * certain flags are inherited.
219 *
220 * Called from the frontend.
221 */
222struct hammer_inode *
223hammer_get_inode(hammer_transaction_t trans, hammer_inode_t dip,
224 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
225{
226 hammer_mount_t hmp = trans->hmp;
227 struct hammer_inode_info iinfo;
228 struct hammer_cursor cursor;
229 struct hammer_inode *ip;
230
231 /*
232 * Determine if we already have an inode cached. If we do then
233 * we are golden.
234 */
235 iinfo.obj_id = obj_id;
236 iinfo.obj_asof = asof;
237loop:
238 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
239 if (ip) {
240 hammer_ref(&ip->lock);
241 *errorp = 0;
242 return(ip);
243 }
244
245 /*
246 * Allocate a new inode structure and deal with races later.
247 */
248 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
249 ++hammer_count_inodes;
250 ++hmp->count_inodes;
251 ip->obj_id = obj_id;
252 ip->obj_asof = iinfo.obj_asof;
253 ip->hmp = hmp;
254 ip->flags = flags & HAMMER_INODE_RO;
255 ip->cache[0].ip = ip;
256 ip->cache[1].ip = ip;
257 if (hmp->ronly)
258 ip->flags |= HAMMER_INODE_RO;
259 ip->sync_trunc_off = ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
260 RB_INIT(&ip->rec_tree);
261 TAILQ_INIT(&ip->target_list);
262
263 /*
264 * Locate the on-disk inode.
265 */
266retry:
267 hammer_init_cursor(trans, &cursor, (dip ? &dip->cache[0] : NULL), NULL);
268 cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
269 cursor.key_beg.obj_id = ip->obj_id;
270 cursor.key_beg.key = 0;
271 cursor.key_beg.create_tid = 0;
272 cursor.key_beg.delete_tid = 0;
273 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
274 cursor.key_beg.obj_type = 0;
275 cursor.asof = iinfo.obj_asof;
276 cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
277 HAMMER_CURSOR_ASOF;
278
279 *errorp = hammer_btree_lookup(&cursor);
280 if (*errorp == EDEADLK) {
281 hammer_done_cursor(&cursor);
282 goto retry;
283 }
284
285 /*
286 * On success the B-Tree lookup will hold the appropriate
287 * buffer cache buffers and provide a pointer to the requested
288 * information. Copy the information to the in-memory inode
289 * and cache the B-Tree node to improve future operations.
290 */
291 if (*errorp == 0) {
292 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
293 ip->ino_data = cursor.data->inode;
294
295 /*
296 * cache[0] tries to cache the location of the object inode.
297 * The assumption is that it is near the directory inode.
298 *
299 * cache[1] tries to cache the location of the object data.
300 * The assumption is that it is near the directory data.
301 */
302 hammer_cache_node(&ip->cache[0], cursor.node);
303 if (dip && dip->cache[1].node)
304 hammer_cache_node(&ip->cache[1], dip->cache[1].node);
305
306 /*
307 * The file should not contain any data past the file size
308 * stored in the inode. Setting sync_trunc_off to the
309 * file size instead of max reduces B-Tree lookup overheads
310 * on append by allowing the flusher to avoid checking for
311 * record overwrites.
312 */
313 ip->sync_trunc_off = ip->ino_data.size;
314 }
315
316 /*
317 * The inode is placed on the red-black tree and will be synced to
318 * the media when flushed or by the filesystem sync. If this races
319 * another instantiation/lookup the insertion will fail.
320 */
321 if (*errorp == 0) {
322 hammer_ref(&ip->lock);
323 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
324 hammer_uncache_node(&ip->cache[0]);
325 hammer_uncache_node(&ip->cache[1]);
326 KKASSERT(ip->lock.refs == 1);
327 --hammer_count_inodes;
328 --hmp->count_inodes;
329 kfree(ip, M_HAMMER);
330 hammer_done_cursor(&cursor);
331 goto loop;
332 }
333 ip->flags |= HAMMER_INODE_ONDISK;
334 } else {
335 /*
336 * Do not panic on read-only accesses which fail, particularly
337 * historical accesses where the snapshot might not have
338 * complete connectivity.
339 */
340 if ((flags & HAMMER_INODE_RO) == 0) {
341 kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
342 ip, ip->obj_id, &cursor, *errorp);
343 Debugger("x");
344 }
345 if (ip->flags & HAMMER_INODE_RSV_INODES) {
346 ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
347 --hmp->rsv_inodes;
348 }
349 hmp->rsv_databufs -= ip->rsv_databufs;
350 ip->rsv_databufs = 0; /* sanity */
351
352 --hammer_count_inodes;
353 --hmp->count_inodes;
354 kfree(ip, M_HAMMER);
355 ip = NULL;
356 }
357 hammer_done_cursor(&cursor);
358 return (ip);
359}
360
361/*
362 * Create a new filesystem object, returning the inode in *ipp. The
363 * returned inode will be referenced.
364 *
365 * The inode is created in-memory.
366 */
367int
368hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
369 struct ucred *cred, hammer_inode_t dip,
370 struct hammer_inode **ipp)
371{
372 hammer_mount_t hmp;
373 hammer_inode_t ip;
374 uid_t xuid;
375
376 hmp = trans->hmp;
377 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
378 ++hammer_count_inodes;
379 ++hmp->count_inodes;
380 ip->obj_id = hammer_alloc_objid(trans, dip);
381 KKASSERT(ip->obj_id != 0);
382 ip->obj_asof = hmp->asof;
383 ip->hmp = hmp;
384 ip->flush_state = HAMMER_FST_IDLE;
385 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
386 ip->cache[0].ip = ip;
387 ip->cache[1].ip = ip;
388
389 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
390 RB_INIT(&ip->rec_tree);
391 TAILQ_INIT(&ip->target_list);
392
393 ip->ino_data.atime = trans->time;
394 ip->ino_data.mtime = trans->time;
395 ip->ino_data.size = 0;
396 ip->ino_data.nlinks = 0;
397
398 /*
399 * A nohistory designator on the parent directory is inherited by
400 * the child.
401 */
402 ip->ino_data.uflags = dip->ino_data.uflags &
403 (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
404
405 ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
406 ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
407 ip->ino_leaf.base.obj_id = ip->obj_id;
408 ip->ino_leaf.base.key = 0;
409 ip->ino_leaf.base.create_tid = 0;
410 ip->ino_leaf.base.delete_tid = 0;
411 ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
412 ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
413
414 ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
415 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
416 ip->ino_data.mode = vap->va_mode;
417 ip->ino_data.ctime = trans->time;
418 ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
419
420 switch(ip->ino_leaf.base.obj_type) {
421 case HAMMER_OBJTYPE_CDEV:
422 case HAMMER_OBJTYPE_BDEV:
423 ip->ino_data.rmajor = vap->va_rmajor;
424 ip->ino_data.rminor = vap->va_rminor;
425 break;
426 default:
427 break;
428 }
429
430 /*
431 * Calculate default uid/gid and overwrite with information from
432 * the vap.
433 */
434 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
435 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
436 &vap->va_mode);
437 ip->ino_data.mode = vap->va_mode;
438
439 if (vap->va_vaflags & VA_UID_UUID_VALID)
440 ip->ino_data.uid = vap->va_uid_uuid;
441 else if (vap->va_uid != (uid_t)VNOVAL)
442 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
443 else
444 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
445
446 if (vap->va_vaflags & VA_GID_UUID_VALID)
447 ip->ino_data.gid = vap->va_gid_uuid;
448 else if (vap->va_gid != (gid_t)VNOVAL)
449 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
450 else
451 ip->ino_data.gid = dip->ino_data.gid;
452
453 hammer_ref(&ip->lock);
454 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
455 hammer_unref(&ip->lock);
456 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
457 }
458 *ipp = ip;
459 return(0);
460}
461
462/*
463 * Called by hammer_sync_inode().
464 */
465static int
466hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
467{
468 hammer_transaction_t trans = cursor->trans;
469 hammer_record_t record;
470 int error;
471
472retry:
473 error = 0;
474
475 /*
476 * If the inode has a presence on-disk then locate it and mark
477 * it deleted, setting DELONDISK.
478 *
479 * The record may or may not be physically deleted, depending on
480 * the retention policy.
481 */
482 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
483 HAMMER_INODE_ONDISK) {
484 hammer_normalize_cursor(cursor);
485 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
486 cursor->key_beg.obj_id = ip->obj_id;
487 cursor->key_beg.key = 0;
488 cursor->key_beg.create_tid = 0;
489 cursor->key_beg.delete_tid = 0;
490 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
491 cursor->key_beg.obj_type = 0;
492 cursor->asof = ip->obj_asof;
493 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
494 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
495 cursor->flags |= HAMMER_CURSOR_BACKEND;
496
497 error = hammer_btree_lookup(cursor);
498 if (hammer_debug_inode)
499 kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
500 if (error) {
501 kprintf("error %d\n", error);
502 Debugger("hammer_update_inode");
503 }
504
505 if (error == 0) {
506 error = hammer_ip_delete_record(cursor, ip, trans->tid);
507 if (hammer_debug_inode)
508 kprintf(" error %d\n", error);
509 if (error && error != EDEADLK) {
510 kprintf("error %d\n", error);
511 Debugger("hammer_update_inode2");
512 }
513 if (error == 0) {
514 ip->flags |= HAMMER_INODE_DELONDISK;
515 }
516 if (cursor->node)
517 hammer_cache_node(&ip->cache[0], cursor->node);
518 }
519 if (error == EDEADLK) {
520 hammer_done_cursor(cursor);
521 error = hammer_init_cursor(trans, cursor,
522 &ip->cache[0], ip);
523 if (hammer_debug_inode)
524 kprintf("IPDED %p %d\n", ip, error);
525 if (error == 0)
526 goto retry;
527 }
528 }
529
530 /*
531 * Ok, write out the initial record or a new record (after deleting
532 * the old one), unless the DELETED flag is set. This routine will
533 * clear DELONDISK if it writes out a record.
534 *
535 * Update our inode statistics if this is the first application of
536 * the inode on-disk.
537 */
538 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
539 /*
540 * Generate a record and write it to the media
541 */
542 record = hammer_alloc_mem_record(ip, 0);
543 record->type = HAMMER_MEM_RECORD_INODE;
544 record->flush_state = HAMMER_FST_FLUSH;
545 record->leaf = ip->sync_ino_leaf;
546 record->leaf.base.create_tid = trans->tid;
547 record->leaf.data_len = sizeof(ip->sync_ino_data);
548 record->data = (void *)&ip->sync_ino_data;
549 record->flags |= HAMMER_RECF_INTERLOCK_BE;
550 for (;;) {
551 error = hammer_ip_sync_record_cursor(cursor, record);
552 if (hammer_debug_inode)
553 kprintf("GENREC %p rec %08x %d\n",
554 ip, record->flags, error);
555 if (error != EDEADLK)
556 break;
557 hammer_done_cursor(cursor);
558 error = hammer_init_cursor(trans, cursor,
559 &ip->cache[0], ip);
560 if (hammer_debug_inode)
561 kprintf("GENREC reinit %d\n", error);
562 if (error)
563 break;
564 }
565 if (error) {
566 kprintf("error %d\n", error);
567 Debugger("hammer_update_inode3");
568 }
569
570 /*
571 * The record isn't managed by the inode's record tree,
572 * destroy it whether we succeed or fail.
573 */
574 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
575 record->flags |= HAMMER_RECF_DELETED_FE;
576 record->flush_state = HAMMER_FST_IDLE;
577 hammer_rel_mem_record(record);
578
579 /*
580 * Finish up.
581 */
582 if (error == 0) {
583 if (hammer_debug_inode)
584 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
585 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
586 HAMMER_INODE_ITIMES);
587 ip->flags &= ~HAMMER_INODE_DELONDISK;
588
589 /*
590 * Root volume count of inodes
591 */
592 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
593 hammer_modify_volume_field(trans,
594 trans->rootvol,
595 vol0_stat_inodes);
596 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
597 hammer_modify_volume_done(trans->rootvol);
598 ip->flags |= HAMMER_INODE_ONDISK;
599 if (hammer_debug_inode)
600 kprintf("NOWONDISK %p\n", ip);
601 }
602 }
603 }
604
605 /*
606 * If the inode has been destroyed, clean out any left-over flags
607 * that may have been set by the frontend.
608 */
609 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
610 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
611 HAMMER_INODE_ITIMES);
612 }
613 return(error);
614}
615
616/*
617 * Update only the itimes fields. This is done no-historically. The
618 * record is updated in-place on the disk.
619 */
620static int
621hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
622{
623 hammer_transaction_t trans = cursor->trans;
624 struct hammer_btree_leaf_elm *leaf;
625 int error;
626
627retry:
628 error = 0;
629 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
630 HAMMER_INODE_ONDISK) {
631 hammer_normalize_cursor(cursor);
632 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
633 cursor->key_beg.obj_id = ip->obj_id;
634 cursor->key_beg.key = 0;
635 cursor->key_beg.create_tid = 0;
636 cursor->key_beg.delete_tid = 0;
637 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
638 cursor->key_beg.obj_type = 0;
639 cursor->asof = ip->obj_asof;
640 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
641 cursor->flags |= HAMMER_CURSOR_ASOF;
642 cursor->flags |= HAMMER_CURSOR_GET_LEAF;
643 cursor->flags |= HAMMER_CURSOR_GET_DATA;
644 cursor->flags |= HAMMER_CURSOR_BACKEND;
645
646 error = hammer_btree_lookup(cursor);
647 if (error) {
648 kprintf("error %d\n", error);
649 Debugger("hammer_update_itimes1");
650 }
651 if (error == 0) {
652 /*
653 * atime/mtime updates can be done in place, but
654 * they are nasty because we also have to update the
655 * data_crc in the B-Tree leaf, which means we
656 * ALSO have to generate UNDO records.
657 */
658 hammer_modify_buffer(trans, cursor->data_buffer,
659 HAMMER_ITIMES_BASE(&cursor->data->inode),
660 HAMMER_ITIMES_BYTES);
661 cursor->data->inode.atime = ip->sync_ino_data.atime;
662 cursor->data->inode.mtime = ip->sync_ino_data.mtime;
663 hammer_modify_buffer_done(cursor->data_buffer);
664
665 leaf = cursor->leaf;
666 hammer_modify_node(trans, cursor->node,
667 &leaf->data_crc,
668 sizeof(leaf->data_crc));
669 leaf->data_crc = crc32(cursor->data, leaf->data_len);
670 hammer_modify_node_done(cursor->node);
671
672 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
673 /* XXX recalculate crc */
674 hammer_cache_node(&ip->cache[0], cursor->node);
675 }
676 if (error == EDEADLK) {
677 hammer_done_cursor(cursor);
678 error = hammer_init_cursor(trans, cursor,
679 &ip->cache[0], ip);
680 if (error == 0)
681 goto retry;
682 }
683 }
684 return(error);
685}
686
687/*
688 * Release a reference on an inode, flush as requested.
689 *
690 * On the last reference we queue the inode to the flusher for its final
691 * disposition.
692 */
693void
694hammer_rel_inode(struct hammer_inode *ip, int flush)
695{
696 hammer_mount_t hmp = ip->hmp;
697
698 /*
699 * Handle disposition when dropping the last ref.
700 */
701 for (;;) {
702 if (ip->lock.refs == 1) {
703 /*
704 * Determine whether on-disk action is needed for
705 * the inode's final disposition.
706 */
707 KKASSERT(ip->vp == NULL);
708 hammer_inode_unloadable_check(ip, 0);
709 if (ip->flags & HAMMER_INODE_MODMASK) {
710 if (hmp->rsv_inodes > desiredvnodes) {
711 hammer_flush_inode(ip,
712 HAMMER_FLUSH_SIGNAL);
713 } else {
714 hammer_flush_inode(ip, 0);
715 }
716 } else if (ip->lock.refs == 1) {
717 hammer_unload_inode(ip);
718 break;
719 }
720 } else {
721 if (flush)
722 hammer_flush_inode(ip, 0);
723
724 /*
725 * The inode still has multiple refs, try to drop
726 * one ref.
727 */
728 KKASSERT(ip->lock.refs >= 1);
729 if (ip->lock.refs > 1) {
730 hammer_unref(&ip->lock);
731 break;
732 }
733 }
734 }
735}
736
737/*
738 * Unload and destroy the specified inode. Must be called with one remaining
739 * reference. The reference is disposed of.
740 *
741 * This can only be called in the context of the flusher.
742 */
743static int
744hammer_unload_inode(struct hammer_inode *ip)
745{
746 hammer_mount_t hmp = ip->hmp;
747
748 KASSERT(ip->lock.refs == 1,
749 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
750 KKASSERT(ip->vp == NULL);
751 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
752 KKASSERT(ip->cursor_ip_refs == 0);
753 KKASSERT(ip->lock.lockcount == 0);
754 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
755
756 KKASSERT(RB_EMPTY(&ip->rec_tree));
757 KKASSERT(TAILQ_EMPTY(&ip->target_list));
758
759 RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
760
761 hammer_uncache_node(&ip->cache[0]);
762 hammer_uncache_node(&ip->cache[1]);
763 if (ip->objid_cache)
764 hammer_clear_objid(ip);
765 --hammer_count_inodes;
766 --hmp->count_inodes;
767
768 hammer_inode_wakereclaims(ip);
769 kfree(ip, M_HAMMER);
770
771 return(0);
772}
773
774/*
775 * Called on mount -u when switching from RW to RO or vise-versa. Adjust
776 * the read-only flag for cached inodes.
777 *
778 * This routine is called from a RB_SCAN().
779 */
780int
781hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
782{
783 hammer_mount_t hmp = ip->hmp;
784
785 if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
786 ip->flags |= HAMMER_INODE_RO;
787 else
788 ip->flags &= ~HAMMER_INODE_RO;
789 return(0);
790}
791
792/*
793 * A transaction has modified an inode, requiring updates as specified by
794 * the passed flags.
795 *
796 * HAMMER_INODE_DDIRTY: Inode data has been updated
797 * HAMMER_INODE_XDIRTY: Dirty in-memory records
798 * HAMMER_INODE_BUFS: Dirty buffer cache buffers
799 * HAMMER_INODE_DELETED: Inode record/data must be deleted
800 * HAMMER_INODE_ITIMES: mtime/atime has been updated
801 */
802void
803hammer_modify_inode(hammer_inode_t ip, int flags)
804{
805 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
806 (flags & (HAMMER_INODE_DDIRTY |
807 HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
808 HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
809 if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
810 ip->flags |= HAMMER_INODE_RSV_INODES;
811 ++ip->hmp->rsv_inodes;
812 }
813
814 ip->flags |= flags;
815}
816
817/*
818 * Request that an inode be flushed. This whole mess cannot block and may
819 * recurse (if not synchronous). Once requested HAMMER will attempt to
820 * actively flush the inode until the flush can be done.
821 *
822 * The inode may already be flushing, or may be in a setup state. We can
823 * place the inode in a flushing state if it is currently idle and flag it
824 * to reflush if it is currently flushing.
825 *
826 * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to
827 * flush the indoe synchronously using the caller's context.
828 */
829void
830hammer_flush_inode(hammer_inode_t ip, int flags)
831{
832 int good;
833
834 /*
835 * Trivial 'nothing to flush' case. If the inode is ina SETUP
836 * state we have to put it back into an IDLE state so we can
837 * drop the extra ref.
838 */
839 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
840 if (ip->flush_state == HAMMER_FST_SETUP) {
841 ip->flush_state = HAMMER_FST_IDLE;
842 hammer_rel_inode(ip, 0);
843 }
844 return;
845 }
846
847 /*
848 * Our flush action will depend on the current state.
849 */
850 switch(ip->flush_state) {
851 case HAMMER_FST_IDLE:
852 /*
853 * We have no dependancies and can flush immediately. Some
854 * our children may not be flushable so we have to re-test
855 * with that additional knowledge.
856 */
857 hammer_flush_inode_core(ip, flags);
858 break;
859 case HAMMER_FST_SETUP:
860 /*
861 * Recurse upwards through dependancies via target_list
862 * and start their flusher actions going if possible.
863 *
864 * 'good' is our connectivity. -1 means we have none and
865 * can't flush, 0 means there weren't any dependancies, and
866 * 1 means we have good connectivity.
867 */
868 good = hammer_setup_parent_inodes(ip);
869
870 /*
871 * We can continue if good >= 0. Determine how many records
872 * under our inode can be flushed (and mark them).
873 */
874 if (good >= 0) {
875 hammer_flush_inode_core(ip, flags);
876 } else {
877 ip->flags |= HAMMER_INODE_REFLUSH;
878 if (flags & HAMMER_FLUSH_SIGNAL) {
879 ip->flags |= HAMMER_INODE_RESIGNAL;
880 hammer_flusher_async(ip->hmp);
881 }
882 }
883 break;
884 default:
885 /*
886 * We are already flushing, flag the inode to reflush
887 * if needed after it completes its current flush.
888 */
889 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
890 ip->flags |= HAMMER_INODE_REFLUSH;
891 if (flags & HAMMER_FLUSH_SIGNAL) {
892 ip->flags |= HAMMER_INODE_RESIGNAL;
893 hammer_flusher_async(ip->hmp);
894 }
895 break;
896 }
897}
898
899/*
900 * Scan ip->target_list, which is a list of records owned by PARENTS to our
901 * ip which reference our ip.
902 *
903 * XXX This is a huge mess of recursive code, but not one bit of it blocks
904 * so for now do not ref/deref the structures. Note that if we use the
905 * ref/rel code later, the rel CAN block.
906 */
907static int
908hammer_setup_parent_inodes(hammer_inode_t ip)
909{
910 hammer_record_t depend;
911#if 0
912 hammer_record_t next;
913 hammer_inode_t pip;
914#endif
915 int good;
916 int r;
917
918 good = 0;
919 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
920 r = hammer_setup_parent_inodes_helper(depend);
921 KKASSERT(depend->target_ip == ip);
922 if (r < 0 && good == 0)
923 good = -1;
924 if (r > 0)
925 good = 1;
926 }
927 return(good);
928
929#if 0
930retry:
931 good = 0;
932 next = TAILQ_FIRST(&ip->target_list);
933 if (next) {
934 hammer_ref(&next->lock);
935 hammer_ref(&next->ip->lock);
936 }
937 while ((depend = next) != NULL) {
938 if (depend->target_ip == NULL) {
939 pip = depend->ip;
940 hammer_rel_mem_record(depend);
941 hammer_rel_inode(pip, 0);
942 goto retry;
943 }
944 KKASSERT(depend->target_ip == ip);
945 next = TAILQ_NEXT(depend, target_entry);
946 if (next) {
947 hammer_ref(&next->lock);
948 hammer_ref(&next->ip->lock);
949 }
950 r = hammer_setup_parent_inodes_helper(depend);
951 if (r < 0 && good == 0)
952 good = -1;
953 if (r > 0)
954 good = 1;
955 pip = depend->ip;
956 hammer_rel_mem_record(depend);
957 hammer_rel_inode(pip, 0);
958 }
959 return(good);
960#endif
961}
962
963/*
964 * This helper function takes a record representing the dependancy between
965 * the parent inode and child inode.
966 *
967 * record->ip = parent inode
968 * record->target_ip = child inode
969 *
970 * We are asked to recurse upwards and convert the record from SETUP
971 * to FLUSH if possible.
972 *
973 * Return 1 if the record gives us connectivity
974 *
975 * Return 0 if the record is not relevant
976 *
977 * Return -1 if we can't resolve the dependancy and there is no connectivity.
978 */
979static int
980hammer_setup_parent_inodes_helper(hammer_record_t record)
981{
982 hammer_mount_t hmp;
983 hammer_inode_t pip;
984 int good;
985
986 KKASSERT(record->flush_state != HAMMER_FST_IDLE);
987 pip = record->ip;
988 hmp = pip->hmp;
989
990 /*
991 * If the record is already flushing, is it in our flush group?
992 *
993 * If it is in our flush group but it is a general record or a
994 * delete-on-disk, it does not improve our connectivity (return 0),
995 * and if the target inode is not trying to destroy itself we can't
996 * allow the operation yet anyway (the second return -1).
997 */
998 if (record->flush_state == HAMMER_FST_FLUSH) {
999 if (record->flush_group != hmp->flusher.next) {
1000 pip->flags |= HAMMER_INODE_REFLUSH;
1001 return(-1);
1002 }
1003 if (record->type == HAMMER_MEM_RECORD_ADD)
1004 return(1);
1005 /* GENERAL or DEL */
1006 return(0);
1007 }
1008
1009 /*
1010 * It must be a setup record. Try to resolve the setup dependancies
1011 * by recursing upwards so we can place ip on the flush list.
1012 */
1013 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
1014
1015 good = hammer_setup_parent_inodes(pip);
1016
1017 /*
1018 * We can't flush ip because it has no connectivity (XXX also check
1019 * nlinks for pre-existing connectivity!). Flag it so any resolution
1020 * recurses back down.
1021 */
1022 if (good < 0) {
1023 pip->flags |= HAMMER_INODE_REFLUSH;
1024 return(good);
1025 }
1026
1027 /*
1028 * We are go, place the parent inode in a flushing state so we can
1029 * place its record in a flushing state. Note that the parent
1030 * may already be flushing. The record must be in the same flush
1031 * group as the parent.
1032 */
1033 if (pip->flush_state != HAMMER_FST_FLUSH)
1034 hammer_flush_inode_core(pip, HAMMER_FLUSH_RECURSION);
1035 KKASSERT(pip->flush_state == HAMMER_FST_FLUSH);
1036 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
1037
1038#if 0
1039 if (record->type == HAMMER_MEM_RECORD_DEL &&
1040 (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
1041 /*
1042 * Regardless of flushing state we cannot sync this path if the
1043 * record represents a delete-on-disk but the target inode
1044 * is not ready to sync its own deletion.
1045 *
1046 * XXX need to count effective nlinks to determine whether
1047 * the flush is ok, otherwise removing a hardlink will
1048 * just leave the DEL record to rot.
1049 */
1050 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
1051 return(-1);
1052 } else
1053#endif
1054 if (pip->flush_group == pip->hmp->flusher.next) {
1055 /*
1056 * This is the record we wanted to synchronize. If the
1057 * record went into a flush state while we blocked it
1058 * had better be in the correct flush group.
1059 */
1060 if (record->flush_state != HAMMER_FST_FLUSH) {
1061 record->flush_state = HAMMER_FST_FLUSH;
1062 record->flush_group = pip->flush_group;
1063 hammer_ref(&record->lock);
1064 } else {
1065 KKASSERT(record->flush_group == pip->flush_group);
1066 }
1067 if (record->type == HAMMER_MEM_RECORD_ADD)
1068 return(1);
1069
1070 /*
1071 * A general or delete-on-disk record does not contribute
1072 * to our visibility. We can still flush it, however.
1073 */
1074 return(0);
1075 } else {
1076 /*
1077 * We couldn't resolve the dependancies, request that the
1078 * inode be flushed when the dependancies can be resolved.
1079 */
1080 pip->flags |= HAMMER_INODE_REFLUSH;
1081 return(-1);
1082 }
1083}
1084
1085/*
1086 * This is the core routine placing an inode into the FST_FLUSH state.
1087 */
1088static void
1089hammer_flush_inode_core(hammer_inode_t ip, int flags)
1090{
1091 int go_count;
1092
1093 /*
1094 * Set flush state and prevent the flusher from cycling into
1095 * the next flush group. Do not place the ip on the list yet.
1096 * Inodes not in the idle state get an extra reference.
1097 */
1098 KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1099 if (ip->flush_state == HAMMER_FST_IDLE)
1100 hammer_ref(&ip->lock);
1101 ip->flush_state = HAMMER_FST_FLUSH;
1102 ip->flush_group = ip->hmp->flusher.next;
1103 ++ip->hmp->flusher.group_lock;
1104 ++ip->hmp->count_iqueued;
1105 ++hammer_count_iqueued;
1106
1107 /*
1108 * We need to be able to vfsync/truncate from the backend.
1109 */
1110 KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1111 if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1112 ip->flags |= HAMMER_INODE_VHELD;
1113 vref(ip->vp);
1114 }
1115
1116 /*
1117 * Figure out how many in-memory records we can actually flush
1118 * (not including inode meta-data, buffers, etc).
1119 */
1120 if (flags & HAMMER_FLUSH_RECURSION) {
1121 go_count = 1;
1122 } else {
1123 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1124 hammer_setup_child_callback, NULL);
1125 }
1126
1127 /*
1128 * This is a more involved test that includes go_count. If we
1129 * can't flush, flag the inode and return. If go_count is 0 we
1130 * were are unable to flush any records in our rec_tree and
1131 * must ignore the XDIRTY flag.
1132 */
1133 if (go_count == 0) {
1134 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1135 ip->flags |= HAMMER_INODE_REFLUSH;
1136
1137 --ip->hmp->count_iqueued;
1138 --hammer_count_iqueued;
1139
1140 ip->flush_state = HAMMER_FST_SETUP;
1141 if (ip->flags & HAMMER_INODE_VHELD) {
1142 ip->flags &= ~HAMMER_INODE_VHELD;
1143 vrele(ip->vp);
1144 }
1145 if (flags & HAMMER_FLUSH_SIGNAL) {
1146 ip->flags |= HAMMER_INODE_RESIGNAL;
1147 hammer_flusher_async(ip->hmp);
1148 }
1149 if (--ip->hmp->flusher.group_lock == 0)
1150 wakeup(&ip->hmp->flusher.group_lock);
1151 return;
1152 }
1153 }
1154
1155 /*
1156 * Snapshot the state of the inode for the backend flusher.
1157 *
1158 * The truncation must be retained in the frontend until after
1159 * we've actually performed the record deletion.
1160 *
1161 * We continue to retain sync_trunc_off even when all truncations
1162 * have been resolved as an optimization to determine if we can
1163 * skip the B-Tree lookup for overwrite deletions.
1164 *
1165 * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1166 * and stays in ip->flags. Once set, it stays set until the
1167 * inode is destroyed.
1168 */
1169 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1170 if (ip->sync_flags & HAMMER_INODE_TRUNCATED)
1171 ip->sync_trunc_off = ip->trunc_off;
1172 ip->sync_ino_leaf = ip->ino_leaf;
1173 ip->sync_ino_data = ip->ino_data;
1174 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1175 ip->flags &= ~HAMMER_INODE_MODMASK;
1176#ifdef DEBUG_TRUNCATE
1177 if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1178 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1179#endif
1180
1181 /*
1182 * The flusher list inherits our inode and reference.
1183 */
1184 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
1185 if (--ip->hmp->flusher.group_lock == 0)
1186 wakeup(&ip->hmp->flusher.group_lock);
1187
1188 if (flags & HAMMER_FLUSH_SIGNAL) {
1189 hammer_flusher_async(ip->hmp);
1190 }
1191}
1192
1193/*
1194 * Callback for scan of ip->rec_tree. Try to include each record in our
1195 * flush. ip->flush_group has been set but the inode has not yet been
1196 * moved into a flushing state.
1197 *
1198 * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1199 * both inodes.
1200 *
1201 * We return 1 for any record placed or found in FST_FLUSH, which prevents
1202 * the caller from shortcutting the flush.
1203 */
1204static int
1205hammer_setup_child_callback(hammer_record_t rec, void *data)
1206{
1207 hammer_inode_t target_ip;
1208 hammer_inode_t ip;
1209 int r;
1210
1211 /*
1212 * Deleted records are ignored. Note that the flush detects deleted
1213 * front-end records at multiple points to deal with races. This is
1214 * just the first line of defense. The only time DELETED_FE cannot
1215 * be set is when HAMMER_RECF_INTERLOCK_BE is set.
1216 *
1217 * Don't get confused between record deletion and, say, directory
1218 * entry deletion. The deletion of a directory entry that is on
1219 * the media has nothing to do with the record deletion flags.
1220 */
1221 if (rec->flags & (HAMMER_RECF_DELETED_FE|HAMMER_RECF_DELETED_BE))
1222 return(0);
1223
1224 /*
1225 * If the record is in an idle state it has no dependancies and
1226 * can be flushed.
1227 */
1228 ip = rec->ip;
1229 r = 0;
1230
1231 switch(rec->flush_state) {
1232 case HAMMER_FST_IDLE:
1233 /*
1234 * Record has no setup dependancy, we can flush it.
1235 */
1236 KKASSERT(rec->target_ip == NULL);
1237 rec->flush_state = HAMMER_FST_FLUSH;
1238 rec->flush_group = ip->flush_group;
1239 hammer_ref(&rec->lock);
1240 r = 1;
1241 break;
1242 case HAMMER_FST_SETUP:
1243 /*
1244 * Record has a setup dependancy. Try to include the
1245 * target ip in the flush.
1246 *
1247 * We have to be careful here, if we do not do the right
1248 * thing we can lose track of dirty inodes and the system
1249 * will lockup trying to allocate buffers.
1250 */
1251 target_ip = rec->target_ip;
1252 KKASSERT(target_ip != NULL);
1253 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1254 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1255 /*
1256 * If the target IP is already flushing in our group
1257 * we are golden, otherwise make sure the target
1258 * reflushes.
1259 */
1260 if (target_ip->flush_group == ip->flush_group) {
1261 rec->flush_state = HAMMER_FST_FLUSH;
1262 rec->flush_group = ip->flush_group;
1263 hammer_ref(&rec->lock);
1264 r = 1;
1265 } else {
1266 target_ip->flags |= HAMMER_INODE_REFLUSH;
1267 }
1268 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1269 /*
1270 * If the target IP is not flushing we can force
1271 * it to flush, even if it is unable to write out
1272 * any of its own records we have at least one in
1273 * hand that we CAN deal with.
1274 */
1275 rec->flush_state = HAMMER_FST_FLUSH;
1276 rec->flush_group = ip->flush_group;
1277 hammer_ref(&rec->lock);
1278 hammer_flush_inode_core(target_ip,
1279 HAMMER_FLUSH_RECURSION);
1280 r = 1;
1281 } else {
1282 /*
1283 * General or delete-on-disk record.
1284 *
1285 * XXX this needs help. If a delete-on-disk we could
1286 * disconnect the target. If the target has its own
1287 * dependancies they really need to be flushed.
1288 *
1289 * XXX
1290 */
1291 rec->flush_state = HAMMER_FST_FLUSH;
1292 rec->flush_group = ip->flush_group;
1293 hammer_ref(&rec->lock);
1294 hammer_flush_inode_core(target_ip,
1295 HAMMER_FLUSH_RECURSION);
1296 r = 1;
1297 }
1298 break;
1299 case HAMMER_FST_FLUSH:
1300 /*
1301 * Record already associated with a flush group. It had
1302 * better be ours.
1303 */
1304 KKASSERT(rec->flush_group == ip->flush_group);
1305 r = 1;
1306 break;
1307 }
1308 return(r);
1309}
1310
1311/*
1312 * Wait for a previously queued flush to complete
1313 */
1314void
1315hammer_wait_inode(hammer_inode_t ip)
1316{
1317 while (ip->flush_state != HAMMER_FST_IDLE) {
1318 if (ip->flush_state == HAMMER_FST_SETUP) {
1319 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1320 } else {
1321 ip->flags |= HAMMER_INODE_FLUSHW;
1322 tsleep(&ip->flags, 0, "hmrwin", 0);
1323 }
1324 }
1325}
1326
1327/*
1328 * Called by the backend code when a flush has been completed.
1329 * The inode has already been removed from the flush list.
1330 *
1331 * A pipelined flush can occur, in which case we must re-enter the
1332 * inode on the list and re-copy its fields.
1333 */
1334void
1335hammer_flush_inode_done(hammer_inode_t ip)
1336{
1337 hammer_mount_t hmp;
1338 int dorel;
1339
1340 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1341
1342 hmp = ip->hmp;
1343
1344 /*
1345 * Merge left-over flags back into the frontend and fix the state.
1346 */
1347 ip->flags |= ip->sync_flags;
1348
1349 /*
1350 * The backend may have adjusted nlinks, so if the adjusted nlinks
1351 * does not match the fronttend set the frontend's RDIRTY flag again.
1352 */
1353 if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1354 ip->flags |= HAMMER_INODE_DDIRTY;
1355
1356 /*
1357 * Fix up the dirty buffer status. IO completions will also
1358 * try to clean up rsv_databufs.
1359 */
1360 if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1361 ip->flags |= HAMMER_INODE_BUFS;
1362 } else {
1363 hmp->rsv_databufs -= ip->rsv_databufs;
1364 ip->rsv_databufs = 0;
1365 }
1366
1367 /*
1368 * Re-set the XDIRTY flag if some of the inode's in-memory records
1369 * could not be flushed.
1370 */
1371 KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1372 (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1373 (!RB_EMPTY(&ip->rec_tree) &&
1374 (ip->flags & HAMMER_INODE_XDIRTY) != 0));
1375
1376 /*
1377 * Do not lose track of inodes which no longer have vnode
1378 * assocations, otherwise they may never get flushed again.
1379 */
1380 if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
1381 ip->flags |= HAMMER_INODE_REFLUSH;
1382
1383 /*
1384 * Adjust flush_state. The target state (idle or setup) shouldn't
1385 * be terribly important since we will reflush if we really need
1386 * to do anything. XXX
1387 */
1388 if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1389 ip->flush_state = HAMMER_FST_IDLE;
1390 dorel = 1;
1391 } else {
1392 ip->flush_state = HAMMER_FST_SETUP;
1393 dorel = 0;
1394 }
1395
1396 --hmp->count_iqueued;
1397 --hammer_count_iqueued;
1398
1399 /*
1400 * Clean up the vnode ref
1401 */
1402 if (ip->flags & HAMMER_INODE_VHELD) {
1403 ip->flags &= ~HAMMER_INODE_VHELD;
1404 vrele(ip->vp);
1405 }
1406
1407 /*
1408 * If the frontend made more changes and requested another flush,
1409 * then try to get it running.
1410 */
1411 if (ip->flags & HAMMER_INODE_REFLUSH) {
1412 ip->flags &= ~HAMMER_INODE_REFLUSH;
1413 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1414 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1415 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1416 } else {
1417 hammer_flush_inode(ip, 0);
1418 }
1419 }
1420
1421 /*
1422 * If the inode is now clean drop the space reservation.
1423 */
1424 if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1425 (ip->flags & HAMMER_INODE_RSV_INODES)) {
1426 ip->flags &= ~HAMMER_INODE_RSV_INODES;
1427 --hmp->rsv_inodes;
1428 }
1429
1430 /*
1431 * Finally, if the frontend is waiting for a flush to complete,
1432 * wake it up.
1433 */
1434 if (ip->flush_state != HAMMER_FST_FLUSH) {
1435 if (ip->flags & HAMMER_INODE_FLUSHW) {
1436 ip->flags &= ~HAMMER_INODE_FLUSHW;
1437 wakeup(&ip->flags);
1438 }
1439 }
1440 if (dorel)
1441 hammer_rel_inode(ip, 0);
1442}
1443
1444/*
1445 * Called from hammer_sync_inode() to synchronize in-memory records
1446 * to the media.
1447 */
1448static int
1449hammer_sync_record_callback(hammer_record_t record, void *data)
1450{
1451 hammer_cursor_t cursor = data;
1452 hammer_transaction_t trans = cursor->trans;
1453 int error;
1454
1455 /*
1456 * Skip records that do not belong to the current flush.
1457 */
1458 ++hammer_stats_record_iterations;
1459 if (record->flush_state != HAMMER_FST_FLUSH)
1460 return(0);
1461
1462#if 1
1463 if (record->flush_group != record->ip->flush_group) {
1464 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1465 Debugger("blah2");
1466 return(0);
1467 }
1468#endif
1469 KKASSERT(record->flush_group == record->ip->flush_group);
1470
1471 /*
1472 * Interlock the record using the BE flag. Once BE is set the
1473 * frontend cannot change the state of FE.
1474 *
1475 * NOTE: If FE is set prior to us setting BE we still sync the
1476 * record out, but the flush completion code converts it to
1477 * a delete-on-disk record instead of destroying it.
1478 */
1479 KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
1480 record->flags |= HAMMER_RECF_INTERLOCK_BE;
1481
1482 /*
1483 * The backend may have already disposed of the record.
1484 */
1485 if (record->flags & HAMMER_RECF_DELETED_BE) {
1486 error = 0;
1487 goto done;
1488 }
1489
1490 /*
1491 * If the whole inode is being deleting all on-disk records will
1492 * be deleted very soon, we can't sync any new records to disk
1493 * because they will be deleted in the same transaction they were
1494 * created in (delete_tid == create_tid), which will assert.
1495 *
1496 * XXX There may be a case with RECORD_ADD with DELETED_FE set
1497 * that we currently panic on.
1498 */
1499 if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
1500 switch(record->type) {
1501 case HAMMER_MEM_RECORD_DATA:
1502 /*
1503 * We don't have to do anything, if the record was
1504 * committed the space will have been accounted for
1505 * in the blockmap.
1506 */
1507 /* fall through */
1508 case HAMMER_MEM_RECORD_GENERAL:
1509 record->flags |= HAMMER_RECF_DELETED_FE;
1510 record->flags |= HAMMER_RECF_DELETED_BE;
1511 error = 0;
1512 goto done;
1513 case HAMMER_MEM_RECORD_ADD:
1514 panic("hammer_sync_record_callback: illegal add "
1515 "during inode deletion record %p", record);
1516 break; /* NOT REACHED */
1517 case HAMMER_MEM_RECORD_INODE:
1518 panic("hammer_sync_record_callback: attempt to "
1519 "sync inode record %p?", record);
1520 break; /* NOT REACHED */
1521 case HAMMER_MEM_RECORD_DEL:
1522 /*
1523 * Follow through and issue the on-disk deletion
1524 */
1525 break;
1526 }
1527 }
1528
1529 /*
1530 * If DELETED_FE is set special handling is needed for directory
1531 * entries. Dependant pieces related to the directory entry may
1532 * have already been synced to disk. If this occurs we have to
1533 * sync the directory entry and then change the in-memory record
1534 * from an ADD to a DELETE to cover the fact that it's been
1535 * deleted by the frontend.
1536 *
1537 * A directory delete covering record (MEM_RECORD_DEL) can never
1538 * be deleted by the frontend.
1539 *
1540 * Any other record type (aka DATA) can be deleted by the frontend.
1541 * XXX At the moment the flusher must skip it because there may
1542 * be another data record in the flush group for the same block,
1543 * meaning that some frontend data changes can leak into the backend's
1544 * synchronization point.
1545 */
1546 if (record->flags & HAMMER_RECF_DELETED_FE) {
1547 if (record->type == HAMMER_MEM_RECORD_ADD) {
1548 record->flags |= HAMMER_RECF_CONVERT_DELETE;
1549 } else {
1550 KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
1551 record->flags |= HAMMER_RECF_DELETED_BE;
1552 error = 0;
1553 goto done;
1554 }
1555 }
1556
1557 /*
1558 * Assign the create_tid for new records. Deletions already
1559 * have the record's entire key properly set up.
1560 */
1561 if (record->type != HAMMER_MEM_RECORD_DEL)
1562 record->leaf.base.create_tid = trans->tid;
1563 for (;;) {
1564 error = hammer_ip_sync_record_cursor(cursor, record);
1565 if (error != EDEADLK)
1566 break;
1567 hammer_done_cursor(cursor);
1568 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1569 record->ip);
1570 if (error)
1571 break;
1572 }
1573 record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
1574
1575 if (error) {
1576 error = -error;
1577 if (error != -ENOSPC) {
1578 kprintf("hammer_sync_record_callback: sync failed rec "
1579 "%p, error %d\n", record, error);
1580 Debugger("sync failed rec");
1581 }
1582 }
1583done:
1584 hammer_flush_record_done(record, error);
1585 return(error);
1586}
1587
1588/*
1589 * XXX error handling
1590 */
1591int
1592hammer_sync_inode(hammer_inode_t ip)
1593{
1594 struct hammer_transaction trans;
1595 struct hammer_cursor cursor;
1596 hammer_node_t tmp_node;
1597 hammer_record_t depend;
1598 hammer_record_t next;
1599 int error, tmp_error;
1600 u_int64_t nlinks;
1601
1602 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
1603 return(0);
1604
1605 hammer_start_transaction_fls(&trans, ip->hmp);
1606 error = hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
1607 if (error)
1608 goto done;
1609
1610 /*
1611 * Any directory records referencing this inode which are not in
1612 * our current flush group must adjust our nlink count for the
1613 * purposes of synchronization to disk.
1614 *
1615 * Records which are in our flush group can be unlinked from our
1616 * inode now, potentially allowing the inode to be physically
1617 * deleted.
1618 *
1619 * This cannot block.
1620 */
1621 nlinks = ip->ino_data.nlinks;
1622 next = TAILQ_FIRST(&ip->target_list);
1623 while ((depend = next) != NULL) {
1624 next = TAILQ_NEXT(depend, target_entry);
1625 if (depend->flush_state == HAMMER_FST_FLUSH &&
1626 depend->flush_group == ip->hmp->flusher.act) {
1627 /*
1628 * If this is an ADD that was deleted by the frontend
1629 * the frontend nlinks count will have already been
1630 * decremented, but the backend is going to sync its
1631 * directory entry and must account for it. The
1632 * record will be converted to a delete-on-disk when
1633 * it gets synced.
1634 *
1635 * If the ADD was not deleted by the frontend we
1636 * can remove the dependancy from our target_list.
1637 */
1638 if (depend->flags & HAMMER_RECF_DELETED_FE) {
1639 ++nlinks;
1640 } else {
1641 TAILQ_REMOVE(&ip->target_list, depend,
1642 target_entry);
1643 depend->target_ip = NULL;
1644 }
1645 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1646 /*
1647 * Not part of our flush group
1648 */
1649 KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1650 switch(depend->type) {
1651 case HAMMER_MEM_RECORD_ADD:
1652 --nlinks;
1653 break;
1654 case HAMMER_MEM_RECORD_DEL:
1655 ++nlinks;
1656 break;
1657 default:
1658 break;
1659 }
1660 }
1661 }
1662
1663 /*
1664 * Set dirty if we had to modify the link count.
1665 */
1666 if (ip->sync_ino_data.nlinks != nlinks) {
1667 KKASSERT((int64_t)nlinks >= 0);
1668 ip->sync_ino_data.nlinks = nlinks;
1669 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1670 }
1671
1672 /*
1673 * If there is a trunction queued destroy any data past the (aligned)
1674 * truncation point. Userland will have dealt with the buffer
1675 * containing the truncation point for us.
1676 *
1677 * We don't flush pending frontend data buffers until after we've
1678 * dealt with the truncation.
1679 */
1680 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1681 /*
1682 * Interlock trunc_off. The VOP front-end may continue to
1683 * make adjustments to it while we are blocked.
1684 */
1685 off_t trunc_off;
1686 off_t aligned_trunc_off;
1687 int blkmask;
1688
1689 trunc_off = ip->sync_trunc_off;
1690 blkmask = hammer_blocksize(trunc_off) - 1;
1691 aligned_trunc_off = (trunc_off + blkmask) & ~(int64_t)blkmask;
1692
1693 /*
1694 * Delete any whole blocks on-media. The front-end has
1695 * already cleaned out any partial block and made it
1696 * pending. The front-end may have updated trunc_off
1697 * while we were blocked so we only use sync_trunc_off.
1698 */
1699 error = hammer_ip_delete_range(&cursor, ip,
1700 aligned_trunc_off,
1701 0x7FFFFFFFFFFFFFFFLL, 1);
1702 if (error)
1703 Debugger("hammer_ip_delete_range errored");
1704
1705 /*
1706 * Clear the truncation flag on the backend after we have
1707 * complete the deletions. Backend data is now good again
1708 * (including new records we are about to sync, below).
1709 *
1710 * Leave sync_trunc_off intact. As we write additional
1711 * records the backend will update sync_trunc_off. This
1712 * tells the backend whether it can skip the overwrite
1713 * test. This should work properly even when the backend
1714 * writes full blocks where the truncation point straddles
1715 * the block because the comparison is against the base
1716 * offset of the record.
1717 */
1718 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1719 /* ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL; */
1720 } else {
1721 error = 0;
1722 }
1723
1724 /*
1725 * Now sync related records. These will typically be directory
1726 * entries or delete-on-disk records.
1727 *
1728 * Not all records will be flushed, but clear XDIRTY anyway. We
1729 * will set it again in the frontend hammer_flush_inode_done()
1730 * if records remain.
1731 */
1732 if (error == 0) {
1733 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1734 hammer_sync_record_callback, &cursor);
1735 if (tmp_error < 0)
1736 tmp_error = -error;
1737 if (tmp_error)
1738 error = tmp_error;
1739 }
1740 hammer_cache_node(&ip->cache[1], cursor.node);
1741
1742 /*
1743 * Re-seek for inode update.
1744 */
1745 if (error == 0) {
1746 tmp_node = hammer_ref_node_safe(ip->hmp, &ip->cache[0], &error);
1747 if (tmp_node) {
1748 hammer_cursor_seek(&cursor, tmp_node, 0);
1749 hammer_rel_node(tmp_node);
1750 }
1751 error = 0;
1752 }
1753
1754 /*
1755 * If we are deleting the inode the frontend had better not have
1756 * any active references on elements making up the inode.
1757 */
1758 if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
1759 RB_EMPTY(&ip->rec_tree) &&
1760 (ip->sync_flags & HAMMER_INODE_DELETING) &&
1761 (ip->flags & HAMMER_INODE_DELETED) == 0) {
1762 int count1 = 0;
1763
1764 ip->flags |= HAMMER_INODE_DELETED;
1765 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
1766 if (error == 0) {
1767 ip->sync_flags &= ~HAMMER_INODE_DELETING;
1768 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1769 KKASSERT(RB_EMPTY(&ip->rec_tree));
1770
1771 /*
1772 * Set delete_tid in both the frontend and backend
1773 * copy of the inode record. The DELETED flag handles
1774 * this, do not set RDIRTY.
1775 */
1776 ip->ino_leaf.base.delete_tid = trans.tid;
1777 ip->sync_ino_leaf.base.delete_tid = trans.tid;
1778
1779 /*
1780 * Adjust the inode count in the volume header
1781 */
1782 if (ip->flags & HAMMER_INODE_ONDISK) {
1783 hammer_modify_volume_field(&trans,
1784 trans.rootvol,
1785 vol0_stat_inodes);
1786 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1787 hammer_modify_volume_done(trans.rootvol);
1788 }
1789 } else {
1790 ip->flags &= ~HAMMER_INODE_DELETED;
1791 Debugger("hammer_ip_delete_range_all errored");
1792 }
1793 }
1794
1795 ip->sync_flags &= ~HAMMER_INODE_BUFS;
1796
1797 if (error)
1798 Debugger("RB_SCAN errored");
1799
1800 /*
1801 * Now update the inode's on-disk inode-data and/or on-disk record.
1802 * DELETED and ONDISK are managed only in ip->flags.
1803 */
1804 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1805 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1806 /*
1807 * If deleted and on-disk, don't set any additional flags.
1808 * the delete flag takes care of things.
1809 *
1810 * Clear flags which may have been set by the frontend.
1811 */
1812 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1813 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1814 HAMMER_INODE_DELETING);
1815 break;
1816 case HAMMER_INODE_DELETED:
1817 /*
1818 * Take care of the case where a deleted inode was never
1819 * flushed to the disk in the first place.
1820 *
1821 * Clear flags which may have been set by the frontend.
1822 */
1823 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
1824 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1825 HAMMER_INODE_DELETING);
1826 while (RB_ROOT(&ip->rec_tree)) {
1827 hammer_record_t record = RB_ROOT(&ip->rec_tree);
1828 hammer_ref(&record->lock);
1829 KKASSERT(record->lock.refs == 1);
1830 record->flags |= HAMMER_RECF_DELETED_FE;
1831 record->flags |= HAMMER_RECF_DELETED_BE;
1832 hammer_rel_mem_record(record);
1833 }
1834 break;
1835 case HAMMER_INODE_ONDISK:
1836 /*
1837 * If already on-disk, do not set any additional flags.
1838 */
1839 break;
1840 default:
1841 /*
1842 * If not on-disk and not deleted, set both dirty flags
1843 * to force an initial record to be written. Also set
1844 * the create_tid for the inode.
1845 *
1846 * Set create_tid in both the frontend and backend
1847 * copy of the inode record.
1848 */
1849 ip->ino_leaf.base.create_tid = trans.tid;
1850 ip->sync_ino_leaf.base.create_tid = trans.tid;
1851 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1852 break;
1853 }
1854
1855 /*
1856 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1857 * is already on-disk the old record is marked as deleted.
1858 *
1859 * If DELETED is set hammer_update_inode() will delete the existing
1860 * record without writing out a new one.
1861 *
1862 * If *ONLY* the ITIMES flag is set we can update the record in-place.
1863 */
1864 if (ip->flags & HAMMER_INODE_DELETED) {
1865 error = hammer_update_inode(&cursor, ip);
1866 } else
1867 if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1868 HAMMER_INODE_ITIMES) {
1869 error = hammer_update_itimes(&cursor, ip);
1870 } else
1871 if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
1872 error = hammer_update_inode(&cursor, ip);
1873 }
1874 if (error)
1875 Debugger("hammer_update_itimes/inode errored");
1876done:
1877 /*
1878 * Save the TID we used to sync the inode with to make sure we
1879 * do not improperly reuse it.
1880 */
1881 hammer_done_cursor(&cursor);
1882 hammer_done_transaction(&trans);
1883 return(error);
1884}
1885
1886/*
1887 * This routine is called when the OS is no longer actively referencing
1888 * the inode (but might still be keeping it cached), or when releasing
1889 * the last reference to an inode.
1890 *
1891 * At this point if the inode's nlinks count is zero we want to destroy
1892 * it, which may mean destroying it on-media too.
1893 */
1894void
1895hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1896{
1897 struct vnode *vp;
1898
1899 /*
1900 * Set the DELETING flag when the link count drops to 0 and the
1901 * OS no longer has any opens on the inode.
1902 *
1903 * The backend will clear DELETING (a mod flag) and set DELETED
1904 * (a state flag) when it is actually able to perform the
1905 * operation.
1906 */
1907 if (ip->ino_data.nlinks == 0 &&
1908 (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
1909 ip->flags |= HAMMER_INODE_DELETING;
1910 ip->flags |= HAMMER_INODE_TRUNCATED;
1911 ip->trunc_off = 0;
1912 vp = NULL;
1913 if (getvp) {
1914 if (hammer_get_vnode(ip, &vp) != 0)
1915 return;
1916 }
1917
1918 /*
1919 * Final cleanup
1920 */
1921 if (ip->vp) {
1922 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1923 vnode_pager_setsize(ip->vp, 0);
1924 }
1925 if (getvp) {
1926 vput(vp);
1927 }
1928 }
1929}
1930
1931/*
1932 * Re-test an inode when a dependancy had gone away to see if we
1933 * can chain flush it.
1934 */
1935void
1936hammer_test_inode(hammer_inode_t ip)
1937{
1938 if (ip->flags & HAMMER_INODE_REFLUSH) {
1939 ip->flags &= ~HAMMER_INODE_REFLUSH;
1940 hammer_ref(&ip->lock);
1941 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1942 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1943 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1944 } else {
1945 hammer_flush_inode(ip, 0);
1946 }
1947 hammer_rel_inode(ip, 0);
1948 }
1949}
1950
1951/*
1952 * Clear the RECLAIM flag on an inode. This occurs when the inode is
1953 * reassociated with a vp or just before it gets freed.
1954 *
1955 * Wakeup one thread blocked waiting on reclaims to complete. Note that
1956 * the inode the thread is waiting on behalf of is a different inode then
1957 * the inode we are called with. This is to create a pipeline.
1958 */
1959static void
1960hammer_inode_wakereclaims(hammer_inode_t ip)
1961{
1962 struct hammer_reclaim *reclaim;
1963 hammer_mount_t hmp = ip->hmp;
1964
1965 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0)
1966 return;
1967
1968 --hammer_count_reclaiming;
1969 --hmp->inode_reclaims;
1970 ip->flags &= ~HAMMER_INODE_RECLAIM;
1971
1972 if ((reclaim = TAILQ_FIRST(&hmp->reclaim_list)) != NULL) {
1973 TAILQ_REMOVE(&hmp->reclaim_list, reclaim, entry);
1974 reclaim->okydoky = 1;
1975 wakeup(reclaim);
1976 }
1977}
1978
1979/*
1980 * Setup our reclaim pipeline. We only let so many detached (and dirty)
1981 * inodes build up before we start blocking.
1982 *
1983 * When we block we don't care *which* inode has finished reclaiming,
1984 * as lone as one does. This is somewhat heuristical... we also put a
1985 * cap on how long we are willing to wait.
1986 */
1987void
1988hammer_inode_waitreclaims(hammer_mount_t hmp)
1989{
1990 struct hammer_reclaim reclaim;
1991 int delay;
1992
1993 if (hmp->inode_reclaims > HAMMER_RECLAIM_WAIT) {
1994 reclaim.okydoky = 0;
1995 TAILQ_INSERT_TAIL(&hmp->reclaim_list,
1996 &reclaim, entry);
1997 } else {
1998 reclaim.okydoky = 1;
1999 }
2000
2001 if (reclaim.okydoky == 0) {
2002 delay = (hmp->inode_reclaims - HAMMER_RECLAIM_WAIT) * hz /
2003 HAMMER_RECLAIM_WAIT;
2004 if (delay >= 0)
2005 tsleep(&reclaim, 0, "hmrrcm", delay + 1);
2006 if (reclaim.okydoky == 0)
2007 TAILQ_REMOVE(&hmp->reclaim_list, &reclaim, entry);
2008 }
2009}
2010