2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * TRANSACTION AND FLUSH HANDLING
39 * Deceptively simple but actually fairly difficult to implement properly is
40 * how I would describe it.
42 * The biggest problem is that each PFS may belong to a cluster so its
43 * media modify_tid and mirror_tid fields are in a completely different
44 * domain than the topology related to the super-root. Most of the code
45 * operates using modify_xid and delete_xid which are local identifiers.
47 * The second biggest problem is that we really want to allow flushes to run
48 * concurrently with new front-end operations, which means that the in-memory
49 * topology of hammer2_chain structures can represent both current state and
50 * snapshot-for-flush state.
53 #include <sys/cdefs.h>
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
65 * Recursively flush the specified chain. The chain is locked and
66 * referenced by the caller and will remain so on return. The chain
67 * will remain referenced throughout but can temporarily lose its
68 * lock during the recursion to avoid unnecessarily stalling user
71 struct hammer2_flush_info {
72 hammer2_chain_t *parent;
73 hammer2_trans_t *trans;
78 struct h2_flush_deferral_list flush_list;
79 hammer2_xid_t sync_xid; /* memory synchronization point */
82 typedef struct hammer2_flush_info hammer2_flush_info_t;
84 static void hammer2_flush_core(hammer2_flush_info_t *info,
85 hammer2_chain_t **chainp, int deleting);
86 static int hammer2_flush_pass1(hammer2_chain_t *child, void *data);
87 static int hammer2_flush_pass2(hammer2_chain_t *child, void *data);
88 static int hammer2_flush_pass3(hammer2_chain_t *child, void *data);
89 static int hammer2_flush_pass4(hammer2_chain_t *child, void *data);
90 static int hammer2_flush_pass5(hammer2_chain_t *child, void *data);
91 static void hammer2_rollup_stats(hammer2_chain_t *parent,
92 hammer2_chain_t *child, int how);
96 * Can we ignore a chain for the purposes of flushing modifications
99 * This code is now degenerate. We used to have to distinguish between
100 * deleted chains and deleted chains associated with inodes that were
101 * still open. This mechanic has been fixed so the function is now
106 h2ignore_deleted(hammer2_flush_info_t *info, hammer2_chain_t *chain)
108 return (chain->delete_xid <= info->sync_xid);
114 hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref,
119 if (bref->type != 0) {
120 bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX);
121 if (bref->type == HAMMER2_BREF_TYPE_INODE)
122 info->inode_count += how;
124 info->data_count -= bytes;
126 info->data_count += bytes;
132 * For now use a global transaction manager. What we ultimately want to do
133 * is give each non-overlapping hmp/pmp group its own transaction manager.
135 * Transactions govern XID tracking on the physical media (the hmp), but they
136 * also govern TID tracking which is per-PFS and thus might cross multiple
137 * hmp's. So we can't just stuff tmanage into hammer2_mount or
140 static hammer2_trans_manage_t tmanage;
143 hammer2_trans_manage_init(void)
145 lockinit(&tmanage.translk, "h2trans", 0, 0);
146 TAILQ_INIT(&tmanage.transq);
147 tmanage.flush_xid = 1;
148 tmanage.alloc_xid = tmanage.flush_xid + 1;
152 hammer2_trans_newxid(hammer2_pfsmount_t *pmp __unused)
157 xid = atomic_fetchadd_int(&tmanage.alloc_xid, 1);
165 * Transaction support functions for writing to the filesystem.
167 * Initializing a new transaction allocates a transaction ID. Typically
168 * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can
169 * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single
170 * media target. The latter mode is used by the recovery code.
172 * TWO TRANSACTION IDs can run concurrently, where one is a flush and the
173 * other is a set of any number of concurrent filesystem operations. We
174 * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops>
175 * or we can have <running_flush> + <concurrent_fs_ops>.
177 * During a flush, new fs_ops are only blocked until the fs_ops prior to
178 * the flush complete. The new fs_ops can then run concurrent with the flush.
180 * Buffer-cache transactions operate as fs_ops but never block. A
181 * buffer-cache flush will run either before or after the current pending
182 * flush depending on its state.
185 hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
187 hammer2_trans_manage_t *tman;
188 hammer2_trans_t *head;
192 bzero(trans, sizeof(*trans));
194 trans->flags = flags;
195 trans->td = curthread;
197 lockmgr(&tman->translk, LK_EXCLUSIVE);
199 if (flags & HAMMER2_TRANS_ISFLUSH) {
201 * If multiple flushes are trying to run we have to
202 * wait until it is our turn. All flushes are serialized.
204 * We queue ourselves and then wait to become the head
205 * of the queue, allowing all prior flushes to complete.
207 * Multiple normal transactions can share the current
208 * transaction id but a flush transaction needs its own
209 * unique TID for proper block table update accounting.
213 pmp->flush_tid = pmp->alloc_tid;
214 tman->flush_xid = hammer2_trans_newxid(pmp);
215 trans->sync_xid = tman->flush_xid;
217 TAILQ_INSERT_TAIL(&tman->transq, trans, entry);
218 if (TAILQ_FIRST(&tman->transq) != trans) {
220 while (trans->blocked) {
221 lksleep(&trans->sync_xid, &tman->translk,
225 } else if (tman->flushcnt == 0) {
227 * No flushes are pending, we can go. Use prior flush_xid + 1.
229 * WARNING! Also see hammer2_chain_setsubmod()
231 TAILQ_INSERT_TAIL(&tman->transq, trans, entry);
232 trans->sync_xid = tman->flush_xid + 1;
234 /* XXX improve/optimize inode allocation */
235 } else if (trans->flags & HAMMER2_TRANS_BUFCACHE) {
237 * A buffer cache transaction is requested while a flush
238 * is in progress. The flush's PREFLUSH flag must be set
241 * The buffer cache flush takes on the main flush's
244 TAILQ_FOREACH(head, &tman->transq, entry) {
245 if (head->flags & HAMMER2_TRANS_ISFLUSH)
249 KKASSERT(head->flags & HAMMER2_TRANS_PREFLUSH);
250 trans->flags |= HAMMER2_TRANS_PREFLUSH;
251 TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry);
252 trans->sync_xid = head->sync_xid;
253 trans->flags |= HAMMER2_TRANS_CONCURRENT;
254 /* not allowed to block */
257 * A normal transaction is requested while a flush is in
258 * progress. We insert after the current flush and may
261 * WARNING! Also see hammer2_chain_setsubmod()
263 TAILQ_FOREACH(head, &tman->transq, entry) {
264 if (head->flags & HAMMER2_TRANS_ISFLUSH)
268 TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry);
269 trans->sync_xid = head->sync_xid + 1;
270 trans->flags |= HAMMER2_TRANS_CONCURRENT;
273 * XXX for now we must block new transactions, synchronous
274 * flush mode is on by default.
276 * If synchronous flush mode is enabled concurrent
277 * frontend transactions during the flush are not
278 * allowed (except we don't have a choice for buffer
281 if (hammer2_synchronous_flush > 0 ||
282 TAILQ_FIRST(&tman->transq) != head) {
284 while (trans->blocked) {
285 lksleep(&trans->sync_xid,
291 if (flags & HAMMER2_TRANS_NEWINODE) {
294 * Super-root transaction, all new inodes have an
295 * inode number of 1. Normal pfs inode cache
296 * semantics are not used.
298 trans->inode_tid = 1;
303 if (pmp->inode_tid < HAMMER2_INODE_START)
304 pmp->inode_tid = HAMMER2_INODE_START;
305 trans->inode_tid = pmp->inode_tid++;
309 lockmgr(&tman->translk, LK_RELEASE);
313 hammer2_trans_done(hammer2_trans_t *trans)
315 hammer2_trans_manage_t *tman;
316 hammer2_trans_t *head;
317 hammer2_trans_t *scan;
324 lockmgr(&tman->translk, LK_EXCLUSIVE);
325 TAILQ_REMOVE(&tman->transq, trans, entry);
326 head = TAILQ_FIRST(&tman->transq);
329 * Adjust flushcnt if this was a flush, clear TRANS_CONCURRENT
330 * up through the next flush. (If the head is a flush then we
331 * stop there, unlike the unblock code following this section).
333 if (trans->flags & HAMMER2_TRANS_ISFLUSH) {
336 while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) {
337 atomic_clear_int(&scan->flags,
338 HAMMER2_TRANS_CONCURRENT);
339 scan = TAILQ_NEXT(scan, entry);
344 * Unblock the head of the queue and any additional transactions
345 * up to the next flush. The head can be a flush and it will be
346 * unblocked along with the non-flush transactions following it
347 * (which are allowed to run concurrently with it).
349 * In synchronous flush mode we stop if the head transaction is
352 if (head && head->blocked) {
354 wakeup(&head->sync_xid);
356 if (hammer2_synchronous_flush > 0)
359 scan = TAILQ_NEXT(head, entry);
360 while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) {
363 wakeup(&scan->sync_xid);
365 scan = TAILQ_NEXT(scan, entry);
368 lockmgr(&tman->translk, LK_RELEASE);
372 * Flush the chain and all modified sub-chains through the specified
373 * synchronization point, propagating parent chain modifications and
374 * mirror_tid updates back up as needed. Since we are recursing downward
375 * we do not have to deal with the complexities of multi-homed chains (chains
376 * with multiple parents).
378 * Caller must have interlocked against any non-flush-related modifying
379 * operations in progress whos modify_xid values are less than or equal
380 * to the passed sync_xid.
382 * Caller must have already vetted synchronization points to ensure they
383 * are properly flushed. Only snapshots and cluster flushes can create
384 * these sorts of synchronization points.
386 * This routine can be called from several places but the most important
389 * chain is locked on call and will remain locked on return. If a flush
390 * occured, the chain's FLUSH_CREATE and/or FLUSH_DELETE bit will be set
391 * indicating that its parent (which is not part of the flush) should be
392 * updated. The chain may be replaced by the call if it was modified.
395 hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp)
397 hammer2_chain_t *chain = *chainp;
398 hammer2_chain_t *scan;
399 hammer2_chain_core_t *core;
400 hammer2_flush_info_t info;
404 * Execute the recursive flush and handle deferrals.
406 * Chains can be ridiculously long (thousands deep), so to
407 * avoid blowing out the kernel stack the recursive flush has a
408 * depth limit. Elements at the limit are placed on a list
409 * for re-execution after the stack has been popped.
411 bzero(&info, sizeof(info));
412 TAILQ_INIT(&info.flush_list);
414 info.sync_xid = trans->sync_xid;
415 info.cache_index = -1;
420 * Extra ref needed because flush_core expects it when replacing
423 hammer2_chain_ref(chain);
428 * Unwind deep recursions which had been deferred. This
429 * can leave the FLUSH_* bits set for these chains, which
430 * will be handled when we [re]flush chain after the unwind.
432 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) {
433 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED);
434 TAILQ_REMOVE(&info.flush_list, scan, flush_node);
435 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED);
438 * Now that we've popped back up we can do a secondary
439 * recursion on the deferred elements.
441 * NOTE: hammer2_flush() may replace scan.
443 if (hammer2_debug & 0x0040)
444 kprintf("deferred flush %p\n", scan);
445 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE);
446 hammer2_chain_drop(scan); /* ref from deferral */
447 hammer2_flush(trans, &scan);
448 hammer2_chain_unlock(scan);
454 info.diddeferral = 0;
455 hammer2_flush_core(&info, &chain, 0);
458 * Only loop if deep recursions have been deferred.
460 if (TAILQ_EMPTY(&info.flush_list))
463 if (++loops % 1000 == 0) {
464 kprintf("hammer2_flush: excessive loops on %p\n",
466 if (hammer2_debug & 0x100000)
470 hammer2_chain_drop(chain);
475 * This is the core of the chain flushing code. The chain is locked by the
476 * caller and must also have an extra ref on it by the caller, and remains
477 * locked and will have an extra ref on return. Upon return, the caller can
478 * test the FLUSH_CREATE and FLUSH_DELETE bits to determine what action must
479 * be taken on the parent.
481 * (1) Determine if this node is a candidate for the flush, return if it is
482 * not. fchain and vchain are always candidates for the flush.
484 * (2) If we recurse too deep the chain is entered onto the deferral list and
485 * the current flush stack is aborted until after the deferral list is
488 * (3) Recursively flush live children (rbtree). This can create deferrals.
489 * A successful flush clears the MODIFIED bit in the children.
491 * (4) Recursively flush deleted children (dbtree). Deletions may be
492 * considered 'live' if the delete_tid is beyond the flush_tid. If
493 * considered 'dead' the recursion is still needed in order to clean
494 * up the chain. This can create deferrals.
496 * A successful flush clears the MODIFIED bit in the children.
498 * (5) Calculate block table updates on chain based on the children scans
499 * in (3) and (4) by testing the FLUSH_CREATE and FLUSH_DELETE bits,
500 * modifying chain if necessary to perform the block table updates.
501 * Deletions must be removed from dbtree when removed from the
502 * chain's block table.
504 * If 'chain' itself is marked DELETED but treated as live, the block
505 * table update(s) must be propagated to all contemporary chains. In
506 * fact, all contemporary chains must be locked and updated uninterrupted
507 * to avoid lookup races. Once MODIFIED and FLUSH_CREATE is cleared,
508 * a chain can be unloaded from memory with the expectation that it can
509 * be reloaded later via the block table at any time.
511 * WARNING ON BREF MODIFY_TID/MIRROR_TID
513 * blockref.modify_tid and blockref.mirror_tid are consistent only within a
514 * PFS. This is why we cannot cache sync_tid in the transaction structure.
515 * Instead we access it from the pmp.
518 hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp,
521 hammer2_chain_t *chain = *chainp;
522 hammer2_chain_t *saved_parent;
523 hammer2_mount_t *hmp;
524 hammer2_chain_core_t *core;
530 diddeferral = info->diddeferral;
533 * (1) Check if we even have any work to do.
535 * This bit of code is capable of short-cutting entire sub-trees
536 * if they have not been touched or if they have already been
539 if (/*(chain->flags & HAMMER2_CHAIN_MODIFIED) == 0 &&*/
540 (chain->update_xlo >= info->sync_xid || /* already synced */
541 chain->update_xlo >= chain->update_xhi)) { /* old/unchanged */
542 /* update_xlo/_xhi already filters chain out, do not update */
543 /* don't update bref.mirror_tid, pass2 is not called */
548 * mirror_tid should not be forward-indexed
550 KKASSERT(chain->bref.mirror_tid <= chain->pmp->flush_tid);
553 * Ignore chains modified beyond the current flush point. These
554 * will be treated as if they did not exist. Subchains with lower
555 * modify_xid's will still be accessible via other parents.
557 * Do not update bref.mirror_tid here, it will interfere with
558 * synchronization. e.g. inode flush tid 1, concurrent D-D tid 2,
559 * then later on inode flush tid 2. If we were to set mirror_tid
560 * to 1 during inode flush tid 1 the blockrefs would only be partially
561 * updated (and likely panic).
563 * We must update chain->update_xlo here to prevent re-entry in this
566 * (vchain and fchain are exceptions since they cannot be duplicated)
568 if (chain->modify_xid > info->sync_xid &&
569 chain != &hmp->fchain && chain != &hmp->vchain) {
570 /* do not update bref.mirror_tid, pass2 ignores chain */
571 /* chain->update_xlo = info->sync_xid; */
576 * (2) Recurse downward and check recursion depth.
577 * (3) Flush live children
578 * (4) Flush deleted children
580 * We adjust update_xlo if not deferring chain to prevent re-entry
581 * in this flush cycle, but it must be set AFTER the flush in case
582 * a deeper flush hits the chain. Otherwise the deeper flush cannot
583 * complete. We re-check the condition after finishing the flushes.
585 * update_xhi was already checked and prevents initial recursions on
586 * subtrees which have not been modified.
588 saved_parent = info->parent;
589 saved_domodify = info->domodify;
590 info->parent = chain;
593 if (chain->flags & HAMMER2_CHAIN_DEFERRED) {
595 } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) {
596 if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) {
597 hammer2_chain_ref(chain);
598 TAILQ_INSERT_TAIL(&info->flush_list,
600 atomic_set_int(&chain->flags,
601 HAMMER2_CHAIN_DEFERRED);
605 hammer2_chain_t *scan;
608 * The flush is queue-agnostic when running pass1, but order
609 * is important to catch any races where an existing
610 * flush-visible child is moved from rbtree->dbtree/dbq.
612 * New children added by concurrent operations are not visible
613 * to the flush anyway so we don't care about those races.
614 * However, the flush itself can move a child from dbq to
615 * dbtree (rare in pass1 but it is possible).
617 * pass1 can handle re-execution of a child.
619 spin_lock(&core->cst.spin);
620 KKASSERT(core->good == 0x1234 && core->sharecnt > 0);
621 RB_SCAN(hammer2_chain_tree, &core->rbtree,
622 NULL, hammer2_flush_pass1, info);
623 RB_SCAN(hammer2_chain_tree, &core->dbtree,
624 NULL, hammer2_flush_pass1, info);
625 scan = TAILQ_FIRST(&core->dbq);
627 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
628 hammer2_flush_pass1(scan, info);
629 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
630 scan = TAILQ_NEXT(scan, db_entry);
632 scan = TAILQ_FIRST(&core->dbq);
634 spin_unlock(&core->cst.spin);
638 * Stop if deferred, do not update update_xlo.
640 if (info->diddeferral) {
645 * If a block table update is needed place the parent in a modified
646 * state, which might delete-duplicate it.
648 * - To prevent loops and other confusion, we synchronize update_xlo
649 * for the original chain.
651 * - The original parent will not be used by the flush so we can
652 * clear its MODIFIED bit.
654 if (info->domodify) {
655 hammer2_chain_modify(info->trans, &info->parent, 0);
656 if (info->parent != chain) {
661 * NOTE: bref.mirror_tid cannot be updated
662 * unless MODIFIED is cleared or already
665 chain->inode_reason += 0x10000000;
666 info->parent->inode_reason += 0x100;
667 KKASSERT(info->parent->core == chain->core);
668 if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
669 atomic_clear_int(&chain->flags,
670 HAMMER2_CHAIN_MODIFIED);
671 hammer2_pfs_memory_wakeup(chain->pmp);
672 hammer2_chain_drop(chain);
675 if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) {
676 atomic_clear_int(&chain->flags,
677 HAMMER2_CHAIN_FLUSH_CREATE);
678 hammer2_chain_drop(chain);
680 if (info->parent->flags & HAMMER2_CHAIN_FLUSH_DELETE) {
681 atomic_clear_int(&info->parent->flags,
682 HAMMER2_CHAIN_FLUSH_DELETE);
683 hammer2_chain_drop(info->parent);
686 if (chain->update_xlo < info->sync_xid)
687 chain->update_xlo = info->sync_xid;
688 KKASSERT(info->parent->update_xlo < info->sync_xid);
689 hammer2_chain_drop(chain);
690 hammer2_chain_ref(info->parent);
692 chain = info->parent;
696 * If a blocktable update is needed determine if this is the last
697 * parent requiring modification (check all parents using the core).
699 * Set bit 1 (0x02) of domodify if this is the last parent,
700 * which will cause scan2 to clear FLUSH_CREATE and FLUSH_DELETE.
703 hammer2_chain_t *scan;
705 spin_lock(&core->cst.spin);
706 TAILQ_FOREACH(scan, &core->ownerq, core_entry) {
708 * Ignore the current parent being processed (we do
709 * not adjust update_xlo until after the fixup).
715 * Ignore chains which have already been updated
716 * Ignore unmodified chains (lo >= hi).
718 if ((scan->flags & HAMMER2_CHAIN_MODIFIED) == 0 &&
719 (scan->update_xlo >= info->sync_xid ||
720 scan->update_xlo >= scan->update_xhi)) {
725 * Cannot exhaust all parents if one is not visible
726 * to the flush. The root chains are special-cased
727 * because they cannot really be delete-duplicated.
729 if (scan != &scan->hmp->fchain &&
730 scan != &scan->hmp->vchain &&
731 scan->modify_xid > info->sync_xid) {
736 * Fail if update_xlo has not been synchronized to
737 * at least our sync_xid on any modified parent chain.
739 if (scan->update_xlo < info->sync_xid)
742 spin_unlock(&core->cst.spin);
748 * (5) Calculate block table updates or child cleanups.
749 * (this whole operation has to be atomic)
751 * domodify 0x01 - block table updates
752 * 0x02 - child cleanups
754 * pass2 - Process deletions from dbtree and dbq.
755 * pass3 - Process insertions from rbtree, dbtree, and dbq.
756 * pass4 - Cleanup child flags on the last parent and
757 * Adjust queues on the live parent (deletions).
758 * pass5 - Cleanup child flags on the last parent and
759 * Adjust queues on the live parent (insertions).
761 * Queue adjustments had to be separated into deletions and
762 * insertions because both can occur on dbtree.
764 if (info->domodify) {
765 hammer2_chain_t *scan;
767 spin_lock(&core->cst.spin);
769 while ((info->domodify & 1) && info->parent) {
770 /* PASS2 - Deletions */
771 RB_SCAN(hammer2_chain_tree, &core->rbtree,
772 NULL, hammer2_flush_pass2, info);
773 RB_SCAN(hammer2_chain_tree, &core->dbtree,
774 NULL, hammer2_flush_pass2, info);
775 scan = TAILQ_FIRST(&core->dbq);
776 TAILQ_FOREACH(scan, &core->dbq, db_entry) {
777 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
778 hammer2_flush_pass2(scan, info);
781 /* PASS3 - Insertions */
782 RB_SCAN(hammer2_chain_tree, &core->rbtree,
783 NULL, hammer2_flush_pass3, info);
784 RB_SCAN(hammer2_chain_tree, &core->dbtree,
785 NULL, hammer2_flush_pass3, info);
786 TAILQ_FOREACH(scan, &core->dbq, db_entry) {
787 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
788 hammer2_flush_pass3(scan, info);
790 info->parent = TAILQ_NEXT(info->parent, core_entry);
792 kprintf("FLUSH SPECIAL UPDATE (%p) %p.%d %08x\n",
794 info->parent->bref.type,
795 info->parent->flags);
797 info->parent = chain;
799 /* PASS4 - Cleanup */
800 RB_SCAN(hammer2_chain_tree, &core->rbtree,
801 NULL, hammer2_flush_pass4, info);
802 scan = TAILQ_FIRST(&core->dbq);
804 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
805 hammer2_flush_pass4(scan, info);
806 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
807 scan = TAILQ_NEXT(scan, db_entry);
809 scan = TAILQ_FIRST(&core->dbq);
811 RB_SCAN(hammer2_chain_tree, &core->dbtree,
812 NULL, hammer2_flush_pass4, info);
814 /* PASS5 - Cleanup */
815 RB_SCAN(hammer2_chain_tree, &core->rbtree,
816 NULL, hammer2_flush_pass5, info);
817 scan = TAILQ_FIRST(&core->dbq);
819 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
820 hammer2_flush_pass5(scan, info);
821 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
822 scan = TAILQ_NEXT(scan, db_entry);
824 scan = TAILQ_FIRST(&core->dbq);
826 RB_SCAN(hammer2_chain_tree, &core->dbtree,
827 NULL, hammer2_flush_pass5, info);
829 spin_unlock(&core->cst.spin);
833 * Synchronize update_xlo to prevent reentrant block updates of this
836 chain->update_xlo = info->sync_xid;
839 * Skip the flush if the chain was not placed in a modified state
840 * or was not already in a modified state.
842 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0)
846 * FLUSH THE CHAIN (on the way back up the recursion)
848 * Chain is now deterministically being flushed and not being deferred.
849 * We've finished running the recursion and the blockref update.
851 * update bref.mirror_tid. update_xlo has already been updated.
853 chain->bref.mirror_tid = chain->pmp->flush_tid;
856 * Dispose of the modified bit. FLUSH_CREATE should already be
859 KKASSERT((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) ||
860 chain == &hmp->vchain);
861 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
862 hammer2_pfs_memory_wakeup(chain->pmp);
864 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) ||
865 chain == &hmp->vchain ||
866 chain == &hmp->fchain) {
868 * Drop the ref from the MODIFIED bit we cleared,
871 hammer2_chain_drop(chain);
874 * Drop the ref from the MODIFIED bit we cleared and
875 * set a ref for the FLUSH_CREATE bit we are setting.
878 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE);
882 * Skip the actual flush operation if the chain has been deleted
883 * in our flus hview. There will be no block table entry that
886 if (h2ignore_deleted(info, chain))
892 * A DELETED node that reaches this point must be flushed for
893 * synchronization point consistency.
895 * Update bref.mirror_tid, clear MODIFIED, and set MOVED.
897 * The caller will update the parent's reference to this chain
898 * by testing MOVED as long as the modification was in-bounds.
900 * MOVED is never set on the volume root as there is no parent
903 if (hammer2_debug & 0x1000) {
904 kprintf("Flush %p.%d %016jx/%d sync_xid=%08x data=%016jx\n",
905 chain, chain->bref.type,
906 chain->bref.key, chain->bref.keybits,
907 info->sync_xid, chain->bref.data_off);
909 if (hammer2_debug & 0x2000) {
910 Debugger("Flush hell");
914 * If this is part of a recursive flush we can go ahead and write
915 * out the buffer cache buffer and pass a new bref back up the chain
918 * Volume headers are NOT flushed here as they require special
921 switch(chain->bref.type) {
922 case HAMMER2_BREF_TYPE_FREEMAP:
923 KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED);
924 hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid;
926 case HAMMER2_BREF_TYPE_VOLUME:
928 * The free block table is flushed by hammer2_vfs_sync()
929 * before it flushes vchain. We must still hold fchain
930 * locked while copying voldata to volsync, however.
932 hammer2_voldata_lock(hmp);
933 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
935 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) ||
936 hmp->voldata.freemap_tid < info->trans->sync_tid) {
937 /* this will modify vchain as a side effect */
938 hammer2_chain_t *tmp = &hmp->fchain;
939 hammer2_chain_flush(info->trans, &tmp);
940 KKASSERT(tmp == &hmp->fchain);
945 * There is no parent to our root vchain and fchain to
946 * synchronize the bref to, their updated mirror_tid's
947 * must be synchronized to the volume header.
949 hmp->voldata.mirror_tid = chain->bref.mirror_tid;
950 hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid;
953 * The volume header is flushed manually by the syncer, not
954 * here. All we do here is adjust the crc's.
956 KKASSERT(chain->data != NULL);
957 KKASSERT(chain->dio == NULL);
959 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
961 (char *)&hmp->voldata +
962 HAMMER2_VOLUME_ICRC1_OFF,
963 HAMMER2_VOLUME_ICRC1_SIZE);
964 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
966 (char *)&hmp->voldata +
967 HAMMER2_VOLUME_ICRC0_OFF,
968 HAMMER2_VOLUME_ICRC0_SIZE);
969 hmp->voldata.icrc_volheader =
971 (char *)&hmp->voldata +
972 HAMMER2_VOLUME_ICRCVH_OFF,
973 HAMMER2_VOLUME_ICRCVH_SIZE);
974 hmp->volsync = hmp->voldata;
975 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC);
976 hammer2_chain_unlock(&hmp->fchain);
977 hammer2_voldata_unlock(hmp);
979 case HAMMER2_BREF_TYPE_DATA:
981 * Data elements have already been flushed via the logical
982 * file buffer cache. Their hash was set in the bref by
983 * the vop_write code.
985 * Make sure any device buffer(s) have been flushed out here.
986 * (there aren't usually any to flush).
990 case HAMMER2_BREF_TYPE_INDIRECT:
992 * Indirect blocks may be in an INITIAL state. Use the
993 * chain_lock() call to ensure that the buffer has been
994 * instantiated (even though it is already locked the buffer
995 * might not have been instantiated).
997 * Only write the buffer out if it is dirty, it is possible
998 * the operating system had already written out the buffer.
1000 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1001 KKASSERT(chain->dio != NULL);
1004 hammer2_io_bqrelse(&chain->dio);
1005 hammer2_chain_unlock(chain);
1008 case HAMMER2_BREF_TYPE_INDIRECT:
1009 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1010 case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
1011 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
1013 case HAMMER2_BREF_TYPE_INODE:
1014 if (chain->data->ipdata.op_flags & HAMMER2_OPFLAG_PFSROOT) {
1015 /* might not be mounted as a PFS */
1017 /* can't be mounted as a PFS */
1018 KKASSERT((chain->flags & HAMMER2_CHAIN_PFSROOT) == 0);
1020 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
1023 KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED);
1024 panic("hammer2_flush_core: unsupported embedded bref %d",
1030 * Final cleanup after flush
1033 KKASSERT(chain->refs > 1);
1034 info->domodify = saved_domodify;
1035 info->parent = saved_parent;
1038 KKASSERT(chain->bref.mirror_tid <= chain->pmp->flush_tid);
1042 * Flush helper pass1 (recursive)
1044 * Flushes the children of the caller's chain (info->parent), restricted
1045 * by sync_tid. Set info->domodify if the child's blockref must propagate
1046 * back up to the parent.
1048 * Ripouts can move child from rbtree to dbtree or dbq but the caller's
1049 * flush scan order prevents any chains from being lost. A child can be
1050 * executes more than once (update_xlo is used to prevent infinite recursions).
1052 * WARNING! If we do not call hammer2_flush_core() we must update
1053 * bref.mirror_tid ourselves to indicate that the flush has
1054 * processed the child.
1056 * WARNING! parent->core spinlock is held on entry and return.
1058 * WARNING! Flushes do not cross PFS boundaries. Specifically, a flush must
1059 * not cross a pfs-root boundary.
1062 hammer2_flush_pass1(hammer2_chain_t *child, void *data)
1064 hammer2_flush_info_t *info = data;
1065 hammer2_trans_t *trans = info->trans;
1066 hammer2_chain_t *parent = info->parent;
1069 * Child modified in a later transactions, nothing to flush in this
1072 * Remember that modifications generally delete-duplicate so if the
1073 * sub-tree is dirty another child will get us there. But not this
1076 * (child can never be fchain or vchain so a special check isn't
1079 if (child->modify_xid > trans->sync_xid) {
1080 KKASSERT(child->delete_xid >= child->modify_xid);
1081 /*child->update_xlo = info->sync_xid;*/
1082 /* do not update mirror_tid, pass2 will ignore chain */
1087 * We must ref the child before unlocking the spinlock.
1089 * The caller has added a ref to the parent so we can temporarily
1090 * unlock it in order to lock the child.
1092 hammer2_chain_ref(child);
1093 spin_unlock(&parent->core->cst.spin);
1095 hammer2_chain_unlock(parent);
1096 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE);
1099 * Never recurse across a mounted PFS boundary.
1101 * Recurse and collect deferral data. We only recursively sync
1102 * (basically) if update_xlo has not been updated, indicating that
1103 * the child has not already been processed.
1105 if ((child->flags & HAMMER2_CHAIN_PFSBOUNDARY) == 0 ||
1106 child->pmp == NULL) {
1107 if ((child->flags & HAMMER2_CHAIN_MODIFIED) ||
1108 (child->update_xlo < info->sync_xid &&
1109 child->update_xlo < child->update_xhi)) {
1111 hammer2_flush_core(info, &child, 0); /* XXX deleting */
1117 * Determine if domodify should be set. Do not otherwise adjust
1118 * the child or pass2 will get confused.
1121 * - child is flagged as possibly needing block table insertion.
1122 * - child not deleted or deletion is beyond transaction id
1123 * - child created beyond parent synchronization point
1124 * - parent not deleted as-of this transaction
1126 if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) &&
1127 child->delete_xid > trans->sync_xid &&
1128 child->modify_xid > parent->update_xlo &&
1129 parent->delete_xid > trans->sync_xid) {
1135 * - child is flagged as possibly needing block table removal.
1136 * - child deleted before or during this transaction
1137 * - child created prior or during parent synchronization point
1138 * - parent not yet synchronized to child deletion
1139 * - parent not deleted as-of this transaction
1141 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) &&
1142 child->delete_xid <= trans->sync_xid &&
1143 child->modify_xid <= parent->update_xlo &&
1144 child->delete_xid > parent->update_xlo &&
1145 parent->delete_xid > trans->sync_xid) {
1150 * Relock to continue the loop
1152 hammer2_chain_unlock(child);
1153 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE);
1154 hammer2_chain_drop(child);
1155 KKASSERT(info->parent == parent);
1157 spin_lock(&parent->core->cst.spin);
1162 * PASS2 - BLOCKTABLE DELETIONS
1165 hammer2_flush_pass2(hammer2_chain_t *child, void *data)
1167 hammer2_flush_info_t *info = data;
1168 hammer2_chain_t *parent = info->parent;
1169 hammer2_mount_t *hmp = child->hmp;
1170 hammer2_trans_t *trans = info->trans;
1171 hammer2_blockref_t *base;
1175 * Prefilter - Ignore children not flagged as needing a parent
1176 * blocktable update.
1178 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0)
1182 * Prefilter - Ignore children created after our flush_tid (not
1183 * visible to our flush).
1185 if (child->modify_xid > trans->sync_xid) {
1186 KKASSERT(child->delete_xid >= child->modify_xid);
1191 * Prefilter - Don't bother updating the blockrefs for a deleted
1192 * parent (from the flush's perspective). Otherwise,
1193 * we need to be COUNTEDBREFS synchronized for the
1194 * hammer2_base_*() functions.
1196 * NOTE: This test must match the similar one in flush_core.
1198 if (h2ignore_deleted(info, parent))
1202 * Calculate blockmap pointer
1204 switch(parent->bref.type) {
1205 case HAMMER2_BREF_TYPE_INODE:
1207 * Access the inode's block array. However, there is no
1208 * block array if the inode is flagged DIRECTDATA. The
1209 * DIRECTDATA case typicaly only occurs when a hardlink has
1210 * been shifted up the tree and the original inode gets
1211 * replaced with an OBJTYPE_HARDLINK placeholding inode.
1214 (parent->data->ipdata.op_flags &
1215 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1216 base = &parent->data->ipdata.u.blockset.blockref[0];
1220 count = HAMMER2_SET_COUNT;
1222 case HAMMER2_BREF_TYPE_INDIRECT:
1223 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1225 base = &parent->data->npdata[0];
1228 count = parent->bytes / sizeof(hammer2_blockref_t);
1230 case HAMMER2_BREF_TYPE_VOLUME:
1231 base = &hmp->voldata.sroot_blockset.blockref[0];
1232 count = HAMMER2_SET_COUNT;
1234 case HAMMER2_BREF_TYPE_FREEMAP:
1235 base = &parent->data->npdata[0];
1236 count = HAMMER2_SET_COUNT;
1241 panic("hammer2_flush_pass2: unrecognized blockref type: %d",
1247 * - child is flagged for removal
1248 * - child deleted before or during this transaction
1249 * - child created prior or during parent synchronization point
1250 * - parent not yet synchronized to child's deletion
1252 if (child->delete_xid <= trans->sync_xid &&
1253 child->modify_xid <= parent->update_xlo &&
1254 child->delete_xid > parent->update_xlo) {
1255 /* can't assert BMAPPED because state adjustment may occur
1256 * before we are done, and BMAPPED only applies to the live
1258 *KKASSERT(child->flags & HAMMER2_CHAIN_BMAPPED);*/
1260 hammer2_rollup_stats(parent, child, -1);
1261 hammer2_base_delete(trans, parent, base, count,
1262 &info->cache_index, child);
1270 * PASS3 - BLOCKTABLE INSERTIONS
1273 hammer2_flush_pass3(hammer2_chain_t *child, void *data)
1275 hammer2_flush_info_t *info = data;
1276 hammer2_chain_t *parent = info->parent;
1277 hammer2_mount_t *hmp = child->hmp;
1278 hammer2_trans_t *trans = info->trans;
1279 hammer2_blockref_t *base;
1283 * Prefilter - Ignore children not flagged as needing a parent
1284 * blocktable update.
1286 if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0)
1290 * Prefilter - Ignore children created after our flush_tid (not
1291 * visible to our flush).
1293 if (child->modify_xid > trans->sync_xid) {
1294 KKASSERT(child->delete_xid >= child->modify_xid);
1299 * Prefilter - Don't bother updating the blockrefs for a deleted
1300 * parent (from the flush's perspective). Otherwise,
1301 * we need to be COUNTEDBREFS synchronized for the
1302 * hammer2_base_*() functions.
1304 * NOTE: This test must match the similar one in flush_core.
1306 if (h2ignore_deleted(info, parent))
1310 * Calculate blockmap pointer
1312 switch(parent->bref.type) {
1313 case HAMMER2_BREF_TYPE_INODE:
1315 * Access the inode's block array. However, there is no
1316 * block array if the inode is flagged DIRECTDATA. The
1317 * DIRECTDATA case typicaly only occurs when a hardlink has
1318 * been shifted up the tree and the original inode gets
1319 * replaced with an OBJTYPE_HARDLINK placeholding inode.
1322 (parent->data->ipdata.op_flags &
1323 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1324 base = &parent->data->ipdata.u.blockset.blockref[0];
1328 count = HAMMER2_SET_COUNT;
1330 case HAMMER2_BREF_TYPE_INDIRECT:
1331 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1333 base = &parent->data->npdata[0];
1336 count = parent->bytes / sizeof(hammer2_blockref_t);
1338 case HAMMER2_BREF_TYPE_VOLUME:
1339 base = &hmp->voldata.sroot_blockset.blockref[0];
1340 count = HAMMER2_SET_COUNT;
1342 case HAMMER2_BREF_TYPE_FREEMAP:
1343 base = &parent->data->npdata[0];
1344 count = HAMMER2_SET_COUNT;
1349 panic("hammer2_flush_pass3: "
1350 "unrecognized blockref type: %d",
1356 * - child is flagged as possibly needing block table insertion.
1357 * - child not deleted or deletion is beyond transaction id
1358 * - child created beyond parent synchronization point
1360 if (child->delete_xid > trans->sync_xid &&
1361 child->modify_xid > parent->update_xlo) {
1363 hammer2_rollup_stats(parent, child, 1);
1364 hammer2_base_insert(trans, parent, base, count,
1365 &info->cache_index, child);
1373 * PASS4 - CLEANUP CHILDREN (non-recursive, but CAN be re-entrant)
1375 * Adjust queues and set or clear BMAPPED appropriately if processing
1376 * the live parent. pass4 handles deletions, pass5 handles insertions.
1377 * Separate passes are required because both deletions and insertions can
1380 * Cleanup FLUSH_CREATE/FLUSH_DELETE on the last parent.
1383 hammer2_flush_pass4(hammer2_chain_t *child, void *data)
1385 hammer2_flush_info_t *info = data;
1386 hammer2_chain_t *parent = info->parent;
1387 hammer2_chain_core_t *above = child->above;
1388 hammer2_trans_t *trans = info->trans;
1391 * Prefilter - Ignore children created after our flush_tid (not
1392 * visible to our flush).
1394 if (child->modify_xid > trans->sync_xid) {
1395 KKASSERT(child->delete_xid >= child->modify_xid);
1400 * Ref and lock child for operation, spinlock must be temporarily
1401 * Make sure child is referenced before we unlock.
1403 hammer2_chain_ref(child);
1404 spin_unlock(&above->cst.spin);
1405 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER);
1406 KKASSERT(child->above == above);
1407 KKASSERT(parent->core == above);
1410 * Adjust BMAPPED state and rbtree/queue only when we hit the
1411 * actual live parent.
1413 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) {
1414 spin_lock(&above->cst.spin);
1417 * Deleting from blockmap, move child out of dbtree
1418 * and clear BMAPPED. Child should not be on RBTREE.
1420 if (child->delete_xid <= trans->sync_xid &&
1421 child->modify_xid <= parent->update_xlo &&
1422 child->delete_xid > parent->update_xlo &&
1423 (child->flags & HAMMER2_CHAIN_BMAPPED)) {
1424 KKASSERT(child->flags & HAMMER2_CHAIN_ONDBTREE);
1425 RB_REMOVE(hammer2_chain_tree, &above->dbtree, child);
1426 atomic_clear_int(&child->flags, HAMMER2_CHAIN_ONDBTREE);
1427 atomic_clear_int(&child->flags, HAMMER2_CHAIN_BMAPPED);
1431 * Not on any list, place child on DBQ
1433 if ((child->flags & (HAMMER2_CHAIN_ONRBTREE |
1434 HAMMER2_CHAIN_ONDBTREE |
1435 HAMMER2_CHAIN_ONDBQ)) == 0) {
1436 KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1437 TAILQ_INSERT_TAIL(&above->dbq, child, db_entry);
1438 atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ);
1440 spin_unlock(&above->cst.spin);
1444 * Unlock the child. This can wind up dropping the child's
1445 * last ref, removing it from the parent's RB tree, and deallocating
1446 * the structure. The RB_SCAN() our caller is doing handles the
1449 hammer2_chain_unlock(child);
1450 hammer2_chain_drop(child);
1451 spin_lock(&above->cst.spin);
1454 * The parent may have been delete-duplicated.
1460 hammer2_flush_pass5(hammer2_chain_t *child, void *data)
1462 hammer2_flush_info_t *info = data;
1463 hammer2_chain_t *parent = info->parent;
1464 hammer2_chain_t *xchain;
1465 hammer2_chain_core_t *above = child->above;
1466 hammer2_trans_t *trans = info->trans;
1469 * Prefilter - Ignore children created after our flush_tid (not
1470 * visible to our flush).
1472 if (child->modify_xid > trans->sync_xid) {
1473 KKASSERT(child->delete_xid >= child->modify_xid);
1478 * Ref and lock child for operation, spinlock must be temporarily
1479 * Make sure child is referenced before we unlock.
1481 hammer2_chain_ref(child);
1482 spin_unlock(&above->cst.spin);
1483 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER);
1484 KKASSERT(child->above == above);
1485 KKASSERT(parent->core == above);
1488 * Adjust BMAPPED state and rbtree/queue only when we hit the
1489 * actual live parent.
1491 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) {
1492 spin_lock(&above->cst.spin);
1495 * Inserting into blockmap, place child in rbtree or dbtree.
1497 if (child->delete_xid > trans->sync_xid &&
1498 child->modify_xid > parent->update_xlo &&
1499 (child->flags & HAMMER2_CHAIN_BMAPPED) == 0) {
1500 if (child->flags & HAMMER2_CHAIN_ONDBQ) {
1501 TAILQ_REMOVE(&above->dbq, child, db_entry);
1502 atomic_clear_int(&child->flags,
1503 HAMMER2_CHAIN_ONDBQ);
1505 if ((child->flags & HAMMER2_CHAIN_DELETED) == 0 &&
1506 (child->flags & HAMMER2_CHAIN_ONRBTREE) == 0) {
1507 KKASSERT((child->flags &
1508 (HAMMER2_CHAIN_ONDBTREE |
1509 HAMMER2_CHAIN_ONDBQ)) == 0);
1510 xchain = RB_INSERT(hammer2_chain_tree,
1511 &above->rbtree, child);
1512 KKASSERT(xchain == NULL);
1513 atomic_set_int(&child->flags,
1514 HAMMER2_CHAIN_ONRBTREE);
1516 if ((child->flags & HAMMER2_CHAIN_DELETED) &&
1517 (child->flags & HAMMER2_CHAIN_ONDBTREE) == 0) {
1518 KKASSERT((child->flags &
1519 (HAMMER2_CHAIN_ONRBTREE |
1520 HAMMER2_CHAIN_ONDBQ)) == 0);
1521 xchain = RB_INSERT(hammer2_chain_tree,
1522 &above->dbtree, child);
1523 KKASSERT(xchain == NULL);
1524 atomic_set_int(&child->flags,
1525 HAMMER2_CHAIN_ONDBTREE);
1527 atomic_set_int(&child->flags, HAMMER2_CHAIN_BMAPPED);
1528 KKASSERT(child->flags &
1529 (HAMMER2_CHAIN_ONRBTREE |
1530 HAMMER2_CHAIN_ONDBTREE |
1531 HAMMER2_CHAIN_ONDBQ));
1535 * Not on any list, place child on DBQ
1537 if ((child->flags & (HAMMER2_CHAIN_ONRBTREE |
1538 HAMMER2_CHAIN_ONDBTREE |
1539 HAMMER2_CHAIN_ONDBQ)) == 0) {
1540 KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1541 TAILQ_INSERT_TAIL(&above->dbq, child, db_entry);
1542 atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ);
1544 spin_unlock(&above->cst.spin);
1548 * Cleanup flags on last parent iterated for flush.
1550 if (info->domodify & 2) {
1551 if (child->flags & HAMMER2_CHAIN_FLUSH_CREATE) {
1552 atomic_clear_int(&child->flags,
1553 HAMMER2_CHAIN_FLUSH_CREATE);
1554 hammer2_chain_drop(child);
1556 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) &&
1557 child->delete_xid <= trans->sync_xid) {
1558 KKASSERT((parent->flags & HAMMER2_CHAIN_DELETED) ||
1559 (child->flags & HAMMER2_CHAIN_ONDBTREE) == 0);
1560 /* XXX delete-duplicate chain insertion mech wrong */
1561 KKASSERT((parent->flags & HAMMER2_CHAIN_DELETED) ||
1562 (child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1563 atomic_clear_int(&child->flags,
1564 HAMMER2_CHAIN_FLUSH_DELETE);
1565 hammer2_chain_drop(child);
1570 * Unlock the child. This can wind up dropping the child's
1571 * last ref, removing it from the parent's RB tree, and deallocating
1572 * the structure. The RB_SCAN() our caller is doing handles the
1575 hammer2_chain_unlock(child);
1576 hammer2_chain_drop(child);
1577 spin_lock(&above->cst.spin);
1580 * The parent may have been delete-duplicated.
1586 hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how)
1589 hammer2_chain_t *grandp;
1592 parent->data_count += child->data_count;
1593 parent->inode_count += child->inode_count;
1594 child->data_count = 0;
1595 child->inode_count = 0;
1597 parent->data_count -= child->bytes;
1598 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) {
1599 parent->inode_count -= 1;
1601 /* XXX child->data may be NULL atm */
1602 parent->data_count -= child->data->ipdata.data_count;
1603 parent->inode_count -= child->data->ipdata.inode_count;
1606 } else if (how > 0) {
1607 parent->data_count += child->bytes;
1608 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) {
1609 parent->inode_count += 1;
1611 /* XXX child->data may be NULL atm */
1612 parent->data_count += child->data->ipdata.data_count;
1613 parent->inode_count += child->data->ipdata.inode_count;
1617 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) {
1618 parent->data->ipdata.data_count += parent->data_count;
1619 parent->data->ipdata.inode_count += parent->inode_count;
1621 for (grandp = parent->above->first_parent;
1623 grandp = grandp->next_parent) {
1624 grandp->data_count += parent->data_count;
1625 grandp->inode_count += parent->inode_count;
1628 parent->data_count = 0;
1629 parent->inode_count = 0;