2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * TRANSACTION AND FLUSH HANDLING
39 * Deceptively simple but actually fairly difficult to implement properly is
40 * how I would describe it.
42 * The biggest problem is that each PFS may belong to a cluster so its
43 * media modify_tid and mirror_tid fields are in a completely different
44 * domain than the topology related to the super-root. Most of the code
45 * operates using modify_xid and delete_xid which are local identifiers.
47 * The second biggest problem is that we really want to allow flushes to run
48 * concurrently with new front-end operations, which means that the in-memory
49 * topology of hammer2_chain structures can represent both current state and
50 * snapshot-for-flush state.
53 #include <sys/cdefs.h>
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
65 * Recursively flush the specified chain. The chain is locked and
66 * referenced by the caller and will remain so on return. The chain
67 * will remain referenced throughout but can temporarily lose its
68 * lock during the recursion to avoid unnecessarily stalling user
71 struct hammer2_flush_info {
72 hammer2_chain_t *parent;
73 hammer2_trans_t *trans;
78 struct h2_flush_deferral_list flush_list;
79 hammer2_xid_t sync_xid; /* memory synchronization point */
82 typedef struct hammer2_flush_info hammer2_flush_info_t;
84 static void hammer2_flush_core(hammer2_flush_info_t *info,
85 hammer2_chain_t **chainp, int deleting);
86 static int hammer2_flush_pass1(hammer2_chain_t *child, void *data);
87 static int hammer2_flush_pass2(hammer2_chain_t *child, void *data);
88 static int hammer2_flush_pass3(hammer2_chain_t *child, void *data);
89 static int hammer2_flush_pass4(hammer2_chain_t *child, void *data);
90 static int hammer2_flush_pass5(hammer2_chain_t *child, void *data);
91 static void hammer2_rollup_stats(hammer2_chain_t *parent,
92 hammer2_chain_t *child, int how);
96 * Can we ignore a chain for the purposes of flushing modifications
99 * This code is now degenerate. We used to have to distinguish between
100 * deleted chains and deleted chains associated with inodes that were
101 * still open. This mechanic has been fixed so the function is now
106 h2ignore_deleted(hammer2_flush_info_t *info, hammer2_chain_t *chain)
108 return (chain->delete_xid <= info->sync_xid);
114 hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref,
119 if (bref->type != 0) {
120 bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX);
121 if (bref->type == HAMMER2_BREF_TYPE_INODE)
122 info->inode_count += how;
124 info->data_count -= bytes;
126 info->data_count += bytes;
132 * For now use a global transaction manager. What we ultimately want to do
133 * is give each non-overlapping hmp/pmp group its own transaction manager.
135 * Transactions govern XID tracking on the physical media (the hmp), but they
136 * also govern TID tracking which is per-PFS and thus might cross multiple
137 * hmp's. So we can't just stuff tmanage into hammer2_mount or
140 static hammer2_trans_manage_t tmanage;
143 hammer2_trans_manage_init(void)
145 lockinit(&tmanage.translk, "h2trans", 0, 0);
146 TAILQ_INIT(&tmanage.transq);
147 tmanage.flush_xid = 1;
148 tmanage.alloc_xid = tmanage.flush_xid + 1;
152 hammer2_trans_newxid(hammer2_pfsmount_t *pmp __unused)
157 xid = atomic_fetchadd_int(&tmanage.alloc_xid, 1);
165 * Transaction support functions for writing to the filesystem.
167 * Initializing a new transaction allocates a transaction ID. Typically
168 * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can
169 * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single
170 * media target. The latter mode is used by the recovery code.
172 * TWO TRANSACTION IDs can run concurrently, where one is a flush and the
173 * other is a set of any number of concurrent filesystem operations. We
174 * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops>
175 * or we can have <running_flush> + <concurrent_fs_ops>.
177 * During a flush, new fs_ops are only blocked until the fs_ops prior to
178 * the flush complete. The new fs_ops can then run concurrent with the flush.
180 * Buffer-cache transactions operate as fs_ops but never block. A
181 * buffer-cache flush will run either before or after the current pending
182 * flush depending on its state.
185 hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags)
187 hammer2_trans_manage_t *tman;
188 hammer2_trans_t *head;
192 bzero(trans, sizeof(*trans));
194 trans->flags = flags;
195 trans->td = curthread;
197 lockmgr(&tman->translk, LK_EXCLUSIVE);
199 if (flags & HAMMER2_TRANS_ISFLUSH) {
201 * If multiple flushes are trying to run we have to
202 * wait until it is our turn. All flushes are serialized.
204 * We queue ourselves and then wait to become the head
205 * of the queue, allowing all prior flushes to complete.
207 * Multiple normal transactions can share the current
208 * transaction id but a flush transaction needs its own
209 * unique TID for proper block table update accounting.
213 pmp->flush_tid = pmp->alloc_tid;
214 tman->flush_xid = hammer2_trans_newxid(pmp);
215 trans->sync_xid = tman->flush_xid;
217 TAILQ_INSERT_TAIL(&tman->transq, trans, entry);
218 if (TAILQ_FIRST(&tman->transq) != trans) {
220 while (trans->blocked) {
221 lksleep(&trans->sync_xid, &tman->translk,
225 } else if (tman->flushcnt == 0) {
227 * No flushes are pending, we can go. Use prior flush_xid + 1.
229 * WARNING! Also see hammer2_chain_setsubmod()
231 TAILQ_INSERT_TAIL(&tman->transq, trans, entry);
232 trans->sync_xid = tman->flush_xid + 1;
234 /* XXX improve/optimize inode allocation */
235 } else if (trans->flags & HAMMER2_TRANS_BUFCACHE) {
237 * A buffer cache transaction is requested while a flush
238 * is in progress. The flush's PREFLUSH flag must be set
241 * The buffer cache flush takes on the main flush's
244 TAILQ_FOREACH(head, &tman->transq, entry) {
245 if (head->flags & HAMMER2_TRANS_ISFLUSH)
249 KKASSERT(head->flags & HAMMER2_TRANS_PREFLUSH);
250 trans->flags |= HAMMER2_TRANS_PREFLUSH;
251 TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry);
252 trans->sync_xid = head->sync_xid;
253 trans->flags |= HAMMER2_TRANS_CONCURRENT;
254 /* not allowed to block */
257 * A normal transaction is requested while a flush is in
258 * progress. We insert after the current flush and may
261 * WARNING! Also see hammer2_chain_setsubmod()
263 TAILQ_FOREACH(head, &tman->transq, entry) {
264 if (head->flags & HAMMER2_TRANS_ISFLUSH)
268 TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry);
269 trans->sync_xid = head->sync_xid + 1;
270 trans->flags |= HAMMER2_TRANS_CONCURRENT;
273 * XXX for now we must block new transactions, synchronous
274 * flush mode is on by default.
276 * If synchronous flush mode is enabled concurrent
277 * frontend transactions during the flush are not
278 * allowed (except we don't have a choice for buffer
281 if (hammer2_synchronous_flush > 0 ||
282 TAILQ_FIRST(&tman->transq) != head) {
284 while (trans->blocked) {
285 lksleep(&trans->sync_xid,
291 if (flags & HAMMER2_TRANS_NEWINODE) {
294 * Super-root transaction, all new inodes have an
295 * inode number of 1. Normal pfs inode cache
296 * semantics are not used.
298 trans->inode_tid = 1;
303 if (pmp->inode_tid < HAMMER2_INODE_START)
304 pmp->inode_tid = HAMMER2_INODE_START;
305 trans->inode_tid = pmp->inode_tid++;
309 lockmgr(&tman->translk, LK_RELEASE);
313 * This may only be called while in a flush transaction. It's a bit of a
314 * hack but after flushing a PFS we need to flush each volume root as part
315 * of the same transaction.
318 hammer2_trans_spmp(hammer2_trans_t *trans, hammer2_pfsmount_t *spmp)
321 spmp->flush_tid = spmp->alloc_tid;
328 hammer2_trans_done(hammer2_trans_t *trans)
330 hammer2_trans_manage_t *tman;
331 hammer2_trans_t *head;
332 hammer2_trans_t *scan;
339 lockmgr(&tman->translk, LK_EXCLUSIVE);
340 TAILQ_REMOVE(&tman->transq, trans, entry);
341 head = TAILQ_FIRST(&tman->transq);
344 * Adjust flushcnt if this was a flush, clear TRANS_CONCURRENT
345 * up through the next flush. (If the head is a flush then we
346 * stop there, unlike the unblock code following this section).
348 if (trans->flags & HAMMER2_TRANS_ISFLUSH) {
351 while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) {
352 atomic_clear_int(&scan->flags,
353 HAMMER2_TRANS_CONCURRENT);
354 scan = TAILQ_NEXT(scan, entry);
359 * Unblock the head of the queue and any additional transactions
360 * up to the next flush. The head can be a flush and it will be
361 * unblocked along with the non-flush transactions following it
362 * (which are allowed to run concurrently with it).
364 * In synchronous flush mode we stop if the head transaction is
367 if (head && head->blocked) {
369 wakeup(&head->sync_xid);
371 if (hammer2_synchronous_flush > 0)
374 scan = TAILQ_NEXT(head, entry);
375 while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) {
378 wakeup(&scan->sync_xid);
380 scan = TAILQ_NEXT(scan, entry);
383 lockmgr(&tman->translk, LK_RELEASE);
387 * Flush the chain and all modified sub-chains through the specified
388 * synchronization point, propagating parent chain modifications and
389 * mirror_tid updates back up as needed. Since we are recursing downward
390 * we do not have to deal with the complexities of multi-homed chains (chains
391 * with multiple parents).
393 * Caller must have interlocked against any non-flush-related modifying
394 * operations in progress whos modify_xid values are less than or equal
395 * to the passed sync_xid.
397 * Caller must have already vetted synchronization points to ensure they
398 * are properly flushed. Only snapshots and cluster flushes can create
399 * these sorts of synchronization points.
401 * This routine can be called from several places but the most important
404 * chain is locked on call and will remain locked on return. If a flush
405 * occured, the chain's FLUSH_CREATE and/or FLUSH_DELETE bit will be set
406 * indicating that its parent (which is not part of the flush) should be
407 * updated. The chain may be replaced by the call if it was modified.
410 hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp)
412 hammer2_chain_t *chain = *chainp;
413 hammer2_chain_t *scan;
414 hammer2_chain_core_t *core;
415 hammer2_flush_info_t info;
419 * Execute the recursive flush and handle deferrals.
421 * Chains can be ridiculously long (thousands deep), so to
422 * avoid blowing out the kernel stack the recursive flush has a
423 * depth limit. Elements at the limit are placed on a list
424 * for re-execution after the stack has been popped.
426 bzero(&info, sizeof(info));
427 TAILQ_INIT(&info.flush_list);
429 info.sync_xid = trans->sync_xid;
430 info.cache_index = -1;
435 * Extra ref needed because flush_core expects it when replacing
438 hammer2_chain_ref(chain);
443 * Unwind deep recursions which had been deferred. This
444 * can leave the FLUSH_* bits set for these chains, which
445 * will be handled when we [re]flush chain after the unwind.
447 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) {
448 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED);
449 TAILQ_REMOVE(&info.flush_list, scan, flush_node);
450 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED);
453 * Now that we've popped back up we can do a secondary
454 * recursion on the deferred elements.
456 * NOTE: hammer2_flush() may replace scan.
458 if (hammer2_debug & 0x0040)
459 kprintf("deferred flush %p\n", scan);
460 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE);
461 hammer2_chain_drop(scan); /* ref from deferral */
462 hammer2_flush(trans, &scan);
463 hammer2_chain_unlock(scan);
469 info.diddeferral = 0;
470 hammer2_flush_core(&info, &chain, 0);
473 * Only loop if deep recursions have been deferred.
475 if (TAILQ_EMPTY(&info.flush_list))
478 if (++loops % 1000 == 0) {
479 kprintf("hammer2_flush: excessive loops on %p\n",
481 if (hammer2_debug & 0x100000)
485 hammer2_chain_drop(chain);
490 * This is the core of the chain flushing code. The chain is locked by the
491 * caller and must also have an extra ref on it by the caller, and remains
492 * locked and will have an extra ref on return. Upon return, the caller can
493 * test the FLUSH_CREATE and FLUSH_DELETE bits to determine what action must
494 * be taken on the parent.
496 * (1) Determine if this node is a candidate for the flush, return if it is
497 * not. fchain and vchain are always candidates for the flush.
499 * (2) If we recurse too deep the chain is entered onto the deferral list and
500 * the current flush stack is aborted until after the deferral list is
503 * (3) Recursively flush live children (rbtree). This can create deferrals.
504 * A successful flush clears the MODIFIED bit in the children.
506 * (4) Recursively flush deleted children (dbtree). Deletions may be
507 * considered 'live' if the delete_tid is beyond the flush_tid. If
508 * considered 'dead' the recursion is still needed in order to clean
509 * up the chain. This can create deferrals.
511 * A successful flush clears the MODIFIED bit in the children.
513 * (5) Calculate block table updates on chain based on the children scans
514 * in (3) and (4) by testing the FLUSH_CREATE and FLUSH_DELETE bits,
515 * modifying chain if necessary to perform the block table updates.
516 * Deletions must be removed from dbtree when removed from the
517 * chain's block table.
519 * If 'chain' itself is marked DELETED but treated as live, the block
520 * table update(s) must be propagated to all contemporary chains. In
521 * fact, all contemporary chains must be locked and updated uninterrupted
522 * to avoid lookup races. Once MODIFIED and FLUSH_CREATE is cleared,
523 * a chain can be unloaded from memory with the expectation that it can
524 * be reloaded later via the block table at any time.
526 * WARNING ON BREF MODIFY_TID/MIRROR_TID
528 * blockref.modify_tid and blockref.mirror_tid are consistent only within a
529 * PFS. This is why we cannot cache sync_tid in the transaction structure.
530 * Instead we access it from the pmp.
533 hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp,
536 hammer2_chain_t *chain = *chainp;
537 hammer2_chain_t *saved_parent;
538 hammer2_mount_t *hmp;
539 hammer2_pfsmount_t *pmp;
540 hammer2_chain_core_t *core;
547 diddeferral = info->diddeferral;
550 * (1) Check if we even have any work to do.
552 * This bit of code is capable of short-cutting entire sub-trees
553 * if they have not been touched or if they have already been
556 if (/*(chain->flags & HAMMER2_CHAIN_MODIFIED) == 0 &&*/
557 (chain->update_xlo >= info->sync_xid || /* already synced */
558 chain->update_xlo >= chain->update_xhi)) { /* old/unchanged */
559 /* update_xlo/_xhi already filters chain out, do not update */
560 /* don't update bref.mirror_tid, pass2 is not called */
565 * mirror_tid should not be forward-indexed
567 KKASSERT(chain->bref.mirror_tid <= pmp->flush_tid);
570 * Ignore chains modified beyond the current flush point. These
571 * will be treated as if they did not exist. Subchains with lower
572 * modify_xid's will still be accessible via other parents.
574 * Do not update bref.mirror_tid here, it will interfere with
575 * synchronization. e.g. inode flush tid 1, concurrent D-D tid 2,
576 * then later on inode flush tid 2. If we were to set mirror_tid
577 * to 1 during inode flush tid 1 the blockrefs would only be partially
578 * updated (and likely panic).
580 * We must update chain->update_xlo here to prevent re-entry in this
583 * (vchain and fchain are exceptions since they cannot be duplicated)
585 if (chain->modify_xid > info->sync_xid &&
586 chain != &hmp->fchain && chain != &hmp->vchain) {
587 /* do not update bref.mirror_tid, pass2 ignores chain */
588 /* chain->update_xlo = info->sync_xid; */
593 * (2) Recurse downward and check recursion depth.
594 * (3) Flush live children
595 * (4) Flush deleted children
597 * We adjust update_xlo if not deferring chain to prevent re-entry
598 * in this flush cycle, but it must be set AFTER the flush in case
599 * a deeper flush hits the chain. Otherwise the deeper flush cannot
600 * complete. We re-check the condition after finishing the flushes.
602 * update_xhi was already checked and prevents initial recursions on
603 * subtrees which have not been modified.
605 saved_parent = info->parent;
606 saved_domodify = info->domodify;
607 info->parent = chain;
610 if (chain->flags & HAMMER2_CHAIN_DEFERRED) {
612 } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) {
613 if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) {
614 hammer2_chain_ref(chain);
615 TAILQ_INSERT_TAIL(&info->flush_list,
617 atomic_set_int(&chain->flags,
618 HAMMER2_CHAIN_DEFERRED);
622 hammer2_chain_t *scan;
625 * The flush is queue-agnostic when running pass1, but order
626 * is important to catch any races where an existing
627 * flush-visible child is moved from rbtree->dbtree/dbq.
629 * New children added by concurrent operations are not visible
630 * to the flush anyway so we don't care about those races.
631 * However, the flush itself can move a child from dbq to
632 * dbtree (rare in pass1 but it is possible).
634 * pass1 can handle re-execution of a child.
636 spin_lock(&core->cst.spin);
637 KKASSERT(core->good == 0x1234 && core->sharecnt > 0);
638 RB_SCAN(hammer2_chain_tree, &core->rbtree,
639 NULL, hammer2_flush_pass1, info);
640 RB_SCAN(hammer2_chain_tree, &core->dbtree,
641 NULL, hammer2_flush_pass1, info);
642 scan = TAILQ_FIRST(&core->dbq);
644 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
645 hammer2_flush_pass1(scan, info);
646 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
647 scan = TAILQ_NEXT(scan, db_entry);
649 scan = TAILQ_FIRST(&core->dbq);
651 spin_unlock(&core->cst.spin);
655 * Stop if deferred, do not update update_xlo.
657 if (info->diddeferral) {
662 * If a block table update is needed place the parent in a modified
663 * state, which might delete-duplicate it.
665 * - To prevent loops and other confusion, we synchronize update_xlo
666 * for the original chain.
668 * - The original parent will not be used by the flush so we can
669 * clear its MODIFIED bit.
671 if (info->domodify) {
672 hammer2_chain_modify(info->trans, &info->parent, 0);
673 if (info->parent != chain) {
678 * NOTE: bref.mirror_tid cannot be updated
679 * unless MODIFIED is cleared or already
682 chain->inode_reason += 0x10000000;
683 info->parent->inode_reason += 0x100;
684 KKASSERT(info->parent->core == chain->core);
685 if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
686 atomic_clear_int(&chain->flags,
687 HAMMER2_CHAIN_MODIFIED);
688 hammer2_pfs_memory_wakeup(pmp);
689 hammer2_chain_drop(chain);
692 if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) {
693 atomic_clear_int(&chain->flags,
694 HAMMER2_CHAIN_FLUSH_CREATE);
695 hammer2_chain_drop(chain);
697 if (info->parent->flags & HAMMER2_CHAIN_FLUSH_DELETE) {
698 atomic_clear_int(&info->parent->flags,
699 HAMMER2_CHAIN_FLUSH_DELETE);
700 hammer2_chain_drop(info->parent);
703 if (chain->update_xlo < info->sync_xid)
704 chain->update_xlo = info->sync_xid;
705 KKASSERT(info->parent->update_xlo < info->sync_xid);
706 hammer2_chain_drop(chain);
707 hammer2_chain_ref(info->parent);
709 chain = info->parent;
713 * If a blocktable update is needed determine if this is the last
714 * parent requiring modification (check all parents using the core).
716 * Set bit 1 (0x02) of domodify if this is the last parent,
717 * which will cause scan2 to clear FLUSH_CREATE and FLUSH_DELETE.
720 hammer2_chain_t *scan;
722 spin_lock(&core->cst.spin);
723 TAILQ_FOREACH(scan, &core->ownerq, core_entry) {
725 * Ignore the current parent being processed (we do
726 * not adjust update_xlo until after the fixup).
732 * Ignore chains which have already been updated
733 * Ignore unmodified chains (lo >= hi).
735 if ((scan->flags & HAMMER2_CHAIN_MODIFIED) == 0 &&
736 (scan->update_xlo >= info->sync_xid ||
737 scan->update_xlo >= scan->update_xhi)) {
742 * Cannot exhaust all parents if one is not visible
743 * to the flush. The root chains are special-cased
744 * because they cannot really be delete-duplicated.
746 if (scan != &scan->hmp->fchain &&
747 scan != &scan->hmp->vchain &&
748 scan->modify_xid > info->sync_xid) {
753 * Fail if update_xlo has not been synchronized to
754 * at least our sync_xid on any modified parent chain.
756 if (scan->update_xlo < info->sync_xid)
759 spin_unlock(&core->cst.spin);
765 * (5) Calculate block table updates or child cleanups.
766 * (this whole operation has to be atomic)
768 * domodify 0x01 - block table updates
769 * 0x02 - child cleanups
771 * pass2 - Process deletions from dbtree and dbq.
772 * pass3 - Process insertions from rbtree, dbtree, and dbq.
773 * pass4 - Cleanup child flags on the last parent and
774 * Adjust queues on the live parent (deletions).
775 * pass5 - Cleanup child flags on the last parent and
776 * Adjust queues on the live parent (insertions).
778 * Queue adjustments had to be separated into deletions and
779 * insertions because both can occur on dbtree.
781 if (info->domodify) {
782 hammer2_chain_t *scan;
784 spin_lock(&core->cst.spin);
786 while ((info->domodify & 1) && info->parent) {
787 /* PASS2 - Deletions */
788 RB_SCAN(hammer2_chain_tree, &core->rbtree,
789 NULL, hammer2_flush_pass2, info);
790 RB_SCAN(hammer2_chain_tree, &core->dbtree,
791 NULL, hammer2_flush_pass2, info);
792 scan = TAILQ_FIRST(&core->dbq);
793 TAILQ_FOREACH(scan, &core->dbq, db_entry) {
794 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
795 hammer2_flush_pass2(scan, info);
798 /* PASS3 - Insertions */
799 RB_SCAN(hammer2_chain_tree, &core->rbtree,
800 NULL, hammer2_flush_pass3, info);
801 RB_SCAN(hammer2_chain_tree, &core->dbtree,
802 NULL, hammer2_flush_pass3, info);
803 TAILQ_FOREACH(scan, &core->dbq, db_entry) {
804 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
805 hammer2_flush_pass3(scan, info);
807 info->parent = TAILQ_NEXT(info->parent, core_entry);
809 kprintf("FLUSH SPECIAL UPDATE (%p) %p.%d %08x\n",
811 info->parent->bref.type,
812 info->parent->flags);
814 info->parent = chain;
816 /* PASS4 - Cleanup */
817 RB_SCAN(hammer2_chain_tree, &core->rbtree,
818 NULL, hammer2_flush_pass4, info);
819 scan = TAILQ_FIRST(&core->dbq);
821 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
822 hammer2_flush_pass4(scan, info);
823 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
824 scan = TAILQ_NEXT(scan, db_entry);
826 scan = TAILQ_FIRST(&core->dbq);
828 RB_SCAN(hammer2_chain_tree, &core->dbtree,
829 NULL, hammer2_flush_pass4, info);
831 /* PASS5 - Cleanup */
832 RB_SCAN(hammer2_chain_tree, &core->rbtree,
833 NULL, hammer2_flush_pass5, info);
834 scan = TAILQ_FIRST(&core->dbq);
836 KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ);
837 hammer2_flush_pass5(scan, info);
838 if (scan->flags & HAMMER2_CHAIN_ONDBQ)
839 scan = TAILQ_NEXT(scan, db_entry);
841 scan = TAILQ_FIRST(&core->dbq);
843 RB_SCAN(hammer2_chain_tree, &core->dbtree,
844 NULL, hammer2_flush_pass5, info);
846 spin_unlock(&core->cst.spin);
850 * Synchronize update_xlo to prevent reentrant block updates of this
853 chain->update_xlo = info->sync_xid;
856 * Skip the flush if the chain was not placed in a modified state
857 * or was not already in a modified state.
859 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0)
863 * FLUSH THE CHAIN (on the way back up the recursion)
865 * Chain is now deterministically being flushed and not being deferred.
866 * We've finished running the recursion and the blockref update.
868 * update bref.mirror_tid. update_xlo has already been updated.
870 chain->bref.mirror_tid = pmp->flush_tid;
873 * Dispose of the modified bit. FLUSH_CREATE should already be
876 KKASSERT((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) ||
877 chain == &hmp->vchain);
878 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
879 hammer2_pfs_memory_wakeup(pmp);
881 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) ||
882 chain == &hmp->vchain ||
883 chain == &hmp->fchain) {
885 * Drop the ref from the MODIFIED bit we cleared,
888 hammer2_chain_drop(chain);
891 * Drop the ref from the MODIFIED bit we cleared and
892 * set a ref for the FLUSH_CREATE bit we are setting.
895 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE);
899 * Skip the actual flush operation if the chain has been deleted
900 * in our flus hview. There will be no block table entry that
903 if (h2ignore_deleted(info, chain))
909 * A DELETED node that reaches this point must be flushed for
910 * synchronization point consistency.
912 * Update bref.mirror_tid, clear MODIFIED, and set MOVED.
914 * The caller will update the parent's reference to this chain
915 * by testing MOVED as long as the modification was in-bounds.
917 * MOVED is never set on the volume root as there is no parent
920 if (hammer2_debug & 0x1000) {
921 kprintf("Flush %p.%d %016jx/%d sync_xid=%08x data=%016jx\n",
922 chain, chain->bref.type,
923 chain->bref.key, chain->bref.keybits,
924 info->sync_xid, chain->bref.data_off);
926 if (hammer2_debug & 0x2000) {
927 Debugger("Flush hell");
931 * If this is part of a recursive flush we can go ahead and write
932 * out the buffer cache buffer and pass a new bref back up the chain
935 * Volume headers are NOT flushed here as they require special
938 switch(chain->bref.type) {
939 case HAMMER2_BREF_TYPE_FREEMAP:
940 KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED);
941 hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid;
943 case HAMMER2_BREF_TYPE_VOLUME:
945 * The free block table is flushed by hammer2_vfs_sync()
946 * before it flushes vchain. We must still hold fchain
947 * locked while copying voldata to volsync, however.
949 hammer2_voldata_lock(hmp);
950 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
952 if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) ||
953 hmp->voldata.freemap_tid < info->trans->sync_tid) {
954 /* this will modify vchain as a side effect */
955 hammer2_chain_t *tmp = &hmp->fchain;
956 hammer2_chain_flush(info->trans, &tmp);
957 KKASSERT(tmp == &hmp->fchain);
962 * There is no parent to our root vchain and fchain to
963 * synchronize the bref to, their updated mirror_tid's
964 * must be synchronized to the volume header.
966 hmp->voldata.mirror_tid = chain->bref.mirror_tid;
967 hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid;
968 kprintf("mirror_tid %08jx\n", (intmax_t)chain->bref.mirror_tid);
971 * The volume header is flushed manually by the syncer, not
972 * here. All we do here is adjust the crc's.
974 KKASSERT(chain->data != NULL);
975 KKASSERT(chain->dio == NULL);
977 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
979 (char *)&hmp->voldata +
980 HAMMER2_VOLUME_ICRC1_OFF,
981 HAMMER2_VOLUME_ICRC1_SIZE);
982 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
984 (char *)&hmp->voldata +
985 HAMMER2_VOLUME_ICRC0_OFF,
986 HAMMER2_VOLUME_ICRC0_SIZE);
987 hmp->voldata.icrc_volheader =
989 (char *)&hmp->voldata +
990 HAMMER2_VOLUME_ICRCVH_OFF,
991 HAMMER2_VOLUME_ICRCVH_SIZE);
992 hmp->volsync = hmp->voldata;
993 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC);
994 hammer2_chain_unlock(&hmp->fchain);
995 hammer2_voldata_unlock(hmp);
997 case HAMMER2_BREF_TYPE_DATA:
999 * Data elements have already been flushed via the logical
1000 * file buffer cache. Their hash was set in the bref by
1001 * the vop_write code.
1003 * Make sure any device buffer(s) have been flushed out here.
1004 * (there aren't usually any to flush).
1008 case HAMMER2_BREF_TYPE_INDIRECT:
1010 * Indirect blocks may be in an INITIAL state. Use the
1011 * chain_lock() call to ensure that the buffer has been
1012 * instantiated (even though it is already locked the buffer
1013 * might not have been instantiated).
1015 * Only write the buffer out if it is dirty, it is possible
1016 * the operating system had already written out the buffer.
1018 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1019 KKASSERT(chain->dio != NULL);
1022 hammer2_io_bqrelse(&chain->dio);
1023 hammer2_chain_unlock(chain);
1026 case HAMMER2_BREF_TYPE_INDIRECT:
1027 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1028 case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
1029 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
1031 case HAMMER2_BREF_TYPE_INODE:
1032 if (chain->data->ipdata.op_flags & HAMMER2_OPFLAG_PFSROOT) {
1034 * non-NULL pmp if mounted as a PFS. We must sync
1035 * fields cached in the pmp.
1037 hammer2_inode_data_t *ipdata;
1039 ipdata = &chain->data->ipdata;
1040 ipdata->pfs_inum = pmp->inode_tid;
1042 /* can't be mounted as a PFS */
1043 KKASSERT((chain->flags & HAMMER2_CHAIN_PFSROOT) == 0);
1045 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
1048 KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED);
1049 panic("hammer2_flush_core: unsupported embedded bref %d",
1055 * Final cleanup after flush
1058 KKASSERT(chain->refs > 1);
1059 info->domodify = saved_domodify;
1060 info->parent = saved_parent;
1063 KKASSERT(chain->bref.mirror_tid <= chain->pmp->flush_tid);
1067 * Flush helper pass1 (recursive)
1069 * Flushes the children of the caller's chain (info->parent), restricted
1070 * by sync_tid. Set info->domodify if the child's blockref must propagate
1071 * back up to the parent.
1073 * Ripouts can move child from rbtree to dbtree or dbq but the caller's
1074 * flush scan order prevents any chains from being lost. A child can be
1075 * executes more than once (update_xlo is used to prevent infinite recursions).
1077 * WARNING! If we do not call hammer2_flush_core() we must update
1078 * bref.mirror_tid ourselves to indicate that the flush has
1079 * processed the child.
1081 * WARNING! parent->core spinlock is held on entry and return.
1083 * WARNING! Flushes do not cross PFS boundaries. Specifically, a flush must
1084 * not cross a pfs-root boundary.
1087 hammer2_flush_pass1(hammer2_chain_t *child, void *data)
1089 hammer2_flush_info_t *info = data;
1090 hammer2_trans_t *trans = info->trans;
1091 hammer2_chain_t *parent = info->parent;
1094 * Child modified in a later transactions, nothing to flush in this
1097 * Remember that modifications generally delete-duplicate so if the
1098 * sub-tree is dirty another child will get us there. But not this
1101 * (child can never be fchain or vchain so a special check isn't
1104 if (child->modify_xid > trans->sync_xid) {
1105 KKASSERT(child->delete_xid >= child->modify_xid);
1106 /*child->update_xlo = info->sync_xid;*/
1107 /* do not update mirror_tid, pass2 will ignore chain */
1112 * We must ref the child before unlocking the spinlock.
1114 * The caller has added a ref to the parent so we can temporarily
1115 * unlock it in order to lock the child.
1117 hammer2_chain_ref(child);
1118 spin_unlock(&parent->core->cst.spin);
1120 hammer2_chain_unlock(parent);
1121 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE);
1124 * Never recurse across a mounted PFS boundary.
1126 * Recurse and collect deferral data. We only recursively sync
1127 * (basically) if update_xlo has not been updated, indicating that
1128 * the child has not already been processed.
1130 if ((child->flags & HAMMER2_CHAIN_PFSBOUNDARY) == 0 ||
1131 child->pmp == NULL) {
1132 if ((child->flags & HAMMER2_CHAIN_MODIFIED) ||
1133 (child->update_xlo < info->sync_xid &&
1134 child->update_xlo < child->update_xhi)) {
1136 hammer2_flush_core(info, &child, 0); /* XXX deleting */
1142 * Determine if domodify should be set. Do not otherwise adjust
1143 * the child or pass2 will get confused.
1146 * - child is flagged as possibly needing block table insertion.
1147 * - child not deleted or deletion is beyond transaction id
1148 * - child created beyond parent synchronization point
1149 * - parent not deleted as-of this transaction
1151 if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) &&
1152 child->delete_xid > trans->sync_xid &&
1153 child->modify_xid > parent->update_xlo &&
1154 parent->delete_xid > trans->sync_xid) {
1160 * - child is flagged as possibly needing block table removal.
1161 * - child deleted before or during this transaction
1162 * - child created prior or during parent synchronization point
1163 * - parent not yet synchronized to child deletion
1164 * - parent not deleted as-of this transaction
1166 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) &&
1167 child->delete_xid <= trans->sync_xid &&
1168 child->modify_xid <= parent->update_xlo &&
1169 child->delete_xid > parent->update_xlo &&
1170 parent->delete_xid > trans->sync_xid) {
1175 * Relock to continue the loop
1177 hammer2_chain_unlock(child);
1178 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE);
1179 hammer2_chain_drop(child);
1180 KKASSERT(info->parent == parent);
1182 spin_lock(&parent->core->cst.spin);
1187 * PASS2 - BLOCKTABLE DELETIONS
1190 hammer2_flush_pass2(hammer2_chain_t *child, void *data)
1192 hammer2_flush_info_t *info = data;
1193 hammer2_chain_t *parent = info->parent;
1194 hammer2_mount_t *hmp = child->hmp;
1195 hammer2_trans_t *trans = info->trans;
1196 hammer2_blockref_t *base;
1200 * Prefilter - Ignore children not flagged as needing a parent
1201 * blocktable update.
1203 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0)
1207 * Prefilter - Ignore children created after our flush_tid (not
1208 * visible to our flush).
1210 if (child->modify_xid > trans->sync_xid) {
1211 KKASSERT(child->delete_xid >= child->modify_xid);
1216 * Prefilter - Don't bother updating the blockrefs for a deleted
1217 * parent (from the flush's perspective). Otherwise,
1218 * we need to be COUNTEDBREFS synchronized for the
1219 * hammer2_base_*() functions.
1221 * NOTE: This test must match the similar one in flush_core.
1223 if (h2ignore_deleted(info, parent))
1227 * Calculate blockmap pointer
1229 switch(parent->bref.type) {
1230 case HAMMER2_BREF_TYPE_INODE:
1232 * Access the inode's block array. However, there is no
1233 * block array if the inode is flagged DIRECTDATA. The
1234 * DIRECTDATA case typicaly only occurs when a hardlink has
1235 * been shifted up the tree and the original inode gets
1236 * replaced with an OBJTYPE_HARDLINK placeholding inode.
1239 (parent->data->ipdata.op_flags &
1240 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1241 base = &parent->data->ipdata.u.blockset.blockref[0];
1245 count = HAMMER2_SET_COUNT;
1247 case HAMMER2_BREF_TYPE_INDIRECT:
1248 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1250 base = &parent->data->npdata[0];
1253 count = parent->bytes / sizeof(hammer2_blockref_t);
1255 case HAMMER2_BREF_TYPE_VOLUME:
1256 base = &hmp->voldata.sroot_blockset.blockref[0];
1257 count = HAMMER2_SET_COUNT;
1259 case HAMMER2_BREF_TYPE_FREEMAP:
1260 base = &parent->data->npdata[0];
1261 count = HAMMER2_SET_COUNT;
1266 panic("hammer2_flush_pass2: unrecognized blockref type: %d",
1272 * - child is flagged for removal
1273 * - child deleted before or during this transaction
1274 * - child created prior or during parent synchronization point
1275 * - parent not yet synchronized to child's deletion
1277 if (child->delete_xid <= trans->sync_xid &&
1278 child->modify_xid <= parent->update_xlo &&
1279 child->delete_xid > parent->update_xlo) {
1280 /* can't assert BMAPPED because state adjustment may occur
1281 * before we are done, and BMAPPED only applies to the live
1283 *KKASSERT(child->flags & HAMMER2_CHAIN_BMAPPED);*/
1285 hammer2_rollup_stats(parent, child, -1);
1286 hammer2_base_delete(trans, parent, base, count,
1287 &info->cache_index, child);
1295 * PASS3 - BLOCKTABLE INSERTIONS
1298 hammer2_flush_pass3(hammer2_chain_t *child, void *data)
1300 hammer2_flush_info_t *info = data;
1301 hammer2_chain_t *parent = info->parent;
1302 hammer2_mount_t *hmp = child->hmp;
1303 hammer2_trans_t *trans = info->trans;
1304 hammer2_blockref_t *base;
1308 * Prefilter - Ignore children not flagged as needing a parent
1309 * blocktable update.
1311 if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0)
1315 * Prefilter - Ignore children created after our flush_tid (not
1316 * visible to our flush).
1318 if (child->modify_xid > trans->sync_xid) {
1319 KKASSERT(child->delete_xid >= child->modify_xid);
1324 * Prefilter - Don't bother updating the blockrefs for a deleted
1325 * parent (from the flush's perspective). Otherwise,
1326 * we need to be COUNTEDBREFS synchronized for the
1327 * hammer2_base_*() functions.
1329 * NOTE: This test must match the similar one in flush_core.
1331 if (h2ignore_deleted(info, parent))
1335 * Calculate blockmap pointer
1337 switch(parent->bref.type) {
1338 case HAMMER2_BREF_TYPE_INODE:
1340 * Access the inode's block array. However, there is no
1341 * block array if the inode is flagged DIRECTDATA. The
1342 * DIRECTDATA case typicaly only occurs when a hardlink has
1343 * been shifted up the tree and the original inode gets
1344 * replaced with an OBJTYPE_HARDLINK placeholding inode.
1347 (parent->data->ipdata.op_flags &
1348 HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1349 base = &parent->data->ipdata.u.blockset.blockref[0];
1353 count = HAMMER2_SET_COUNT;
1355 case HAMMER2_BREF_TYPE_INDIRECT:
1356 case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1358 base = &parent->data->npdata[0];
1361 count = parent->bytes / sizeof(hammer2_blockref_t);
1363 case HAMMER2_BREF_TYPE_VOLUME:
1364 base = &hmp->voldata.sroot_blockset.blockref[0];
1365 count = HAMMER2_SET_COUNT;
1367 case HAMMER2_BREF_TYPE_FREEMAP:
1368 base = &parent->data->npdata[0];
1369 count = HAMMER2_SET_COUNT;
1374 panic("hammer2_flush_pass3: "
1375 "unrecognized blockref type: %d",
1381 * - child is flagged as possibly needing block table insertion.
1382 * - child not deleted or deletion is beyond transaction id
1383 * - child created beyond parent synchronization point
1385 if (child->delete_xid > trans->sync_xid &&
1386 child->modify_xid > parent->update_xlo) {
1388 hammer2_rollup_stats(parent, child, 1);
1389 hammer2_base_insert(trans, parent, base, count,
1390 &info->cache_index, child);
1398 * PASS4 - CLEANUP CHILDREN (non-recursive, but CAN be re-entrant)
1400 * Adjust queues and set or clear BMAPPED appropriately if processing
1401 * the live parent. pass4 handles deletions, pass5 handles insertions.
1402 * Separate passes are required because both deletions and insertions can
1405 * Cleanup FLUSH_CREATE/FLUSH_DELETE on the last parent.
1408 hammer2_flush_pass4(hammer2_chain_t *child, void *data)
1410 hammer2_flush_info_t *info = data;
1411 hammer2_chain_t *parent = info->parent;
1412 hammer2_chain_core_t *above = child->above;
1413 hammer2_trans_t *trans = info->trans;
1416 * Prefilter - Ignore children created after our flush_tid (not
1417 * visible to our flush).
1419 if (child->modify_xid > trans->sync_xid) {
1420 KKASSERT(child->delete_xid >= child->modify_xid);
1425 * Ref and lock child for operation, spinlock must be temporarily
1426 * Make sure child is referenced before we unlock.
1428 hammer2_chain_ref(child);
1429 spin_unlock(&above->cst.spin);
1430 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER);
1431 KKASSERT(child->above == above);
1432 KKASSERT(parent->core == above);
1435 * Adjust BMAPPED state and rbtree/queue only when we hit the
1436 * actual live parent.
1438 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) {
1439 spin_lock(&above->cst.spin);
1442 * Deleting from blockmap, move child out of dbtree
1443 * and clear BMAPPED. Child should not be on RBTREE.
1445 if (child->delete_xid <= trans->sync_xid &&
1446 child->modify_xid <= parent->update_xlo &&
1447 child->delete_xid > parent->update_xlo &&
1448 (child->flags & HAMMER2_CHAIN_BMAPPED)) {
1449 KKASSERT(child->flags & HAMMER2_CHAIN_ONDBTREE);
1450 RB_REMOVE(hammer2_chain_tree, &above->dbtree, child);
1451 atomic_clear_int(&child->flags, HAMMER2_CHAIN_ONDBTREE);
1452 atomic_clear_int(&child->flags, HAMMER2_CHAIN_BMAPPED);
1456 * Not on any list, place child on DBQ
1458 if ((child->flags & (HAMMER2_CHAIN_ONRBTREE |
1459 HAMMER2_CHAIN_ONDBTREE |
1460 HAMMER2_CHAIN_ONDBQ)) == 0) {
1461 KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1462 TAILQ_INSERT_TAIL(&above->dbq, child, db_entry);
1463 atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ);
1465 spin_unlock(&above->cst.spin);
1469 * Unlock the child. This can wind up dropping the child's
1470 * last ref, removing it from the parent's RB tree, and deallocating
1471 * the structure. The RB_SCAN() our caller is doing handles the
1474 hammer2_chain_unlock(child);
1475 hammer2_chain_drop(child);
1476 spin_lock(&above->cst.spin);
1479 * The parent may have been delete-duplicated.
1485 hammer2_flush_pass5(hammer2_chain_t *child, void *data)
1487 hammer2_flush_info_t *info = data;
1488 hammer2_chain_t *parent = info->parent;
1489 hammer2_chain_t *xchain;
1490 hammer2_chain_core_t *above = child->above;
1491 hammer2_trans_t *trans = info->trans;
1494 * Prefilter - Ignore children created after our flush_tid (not
1495 * visible to our flush).
1497 if (child->modify_xid > trans->sync_xid) {
1498 KKASSERT(child->delete_xid >= child->modify_xid);
1503 * Ref and lock child for operation, spinlock must be temporarily
1504 * Make sure child is referenced before we unlock.
1506 hammer2_chain_ref(child);
1507 spin_unlock(&above->cst.spin);
1508 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER);
1509 KKASSERT(child->above == above);
1510 KKASSERT(parent->core == above);
1513 * Adjust BMAPPED state and rbtree/queue only when we hit the
1514 * actual live parent.
1516 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) {
1517 spin_lock(&above->cst.spin);
1520 * Inserting into blockmap, place child in rbtree or dbtree.
1522 if (child->delete_xid > trans->sync_xid &&
1523 child->modify_xid > parent->update_xlo &&
1524 (child->flags & HAMMER2_CHAIN_BMAPPED) == 0) {
1525 if (child->flags & HAMMER2_CHAIN_ONDBQ) {
1526 TAILQ_REMOVE(&above->dbq, child, db_entry);
1527 atomic_clear_int(&child->flags,
1528 HAMMER2_CHAIN_ONDBQ);
1530 if ((child->flags & HAMMER2_CHAIN_DELETED) == 0 &&
1531 (child->flags & HAMMER2_CHAIN_ONRBTREE) == 0) {
1532 KKASSERT((child->flags &
1533 (HAMMER2_CHAIN_ONDBTREE |
1534 HAMMER2_CHAIN_ONDBQ)) == 0);
1535 xchain = RB_INSERT(hammer2_chain_tree,
1536 &above->rbtree, child);
1537 KKASSERT(xchain == NULL);
1538 atomic_set_int(&child->flags,
1539 HAMMER2_CHAIN_ONRBTREE);
1541 if ((child->flags & HAMMER2_CHAIN_DELETED) &&
1542 (child->flags & HAMMER2_CHAIN_ONDBTREE) == 0) {
1543 KKASSERT((child->flags &
1544 (HAMMER2_CHAIN_ONRBTREE |
1545 HAMMER2_CHAIN_ONDBQ)) == 0);
1546 xchain = RB_INSERT(hammer2_chain_tree,
1547 &above->dbtree, child);
1548 KKASSERT(xchain == NULL);
1549 atomic_set_int(&child->flags,
1550 HAMMER2_CHAIN_ONDBTREE);
1552 atomic_set_int(&child->flags, HAMMER2_CHAIN_BMAPPED);
1553 KKASSERT(child->flags &
1554 (HAMMER2_CHAIN_ONRBTREE |
1555 HAMMER2_CHAIN_ONDBTREE |
1556 HAMMER2_CHAIN_ONDBQ));
1560 * Not on any list, place child on DBQ
1562 if ((child->flags & (HAMMER2_CHAIN_ONRBTREE |
1563 HAMMER2_CHAIN_ONDBTREE |
1564 HAMMER2_CHAIN_ONDBQ)) == 0) {
1565 KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1566 TAILQ_INSERT_TAIL(&above->dbq, child, db_entry);
1567 atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ);
1569 spin_unlock(&above->cst.spin);
1573 * Cleanup flags on last parent iterated for flush.
1575 if (info->domodify & 2) {
1576 if (child->flags & HAMMER2_CHAIN_FLUSH_CREATE) {
1577 atomic_clear_int(&child->flags,
1578 HAMMER2_CHAIN_FLUSH_CREATE);
1579 hammer2_chain_drop(child);
1581 if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) &&
1582 child->delete_xid <= trans->sync_xid) {
1583 KKASSERT((parent->flags & HAMMER2_CHAIN_DELETED) ||
1584 (child->flags & HAMMER2_CHAIN_ONDBTREE) == 0);
1585 /* XXX delete-duplicate chain insertion mech wrong */
1586 KKASSERT((parent->flags & HAMMER2_CHAIN_DELETED) ||
1587 (child->flags & HAMMER2_CHAIN_BMAPPED) == 0);
1588 atomic_clear_int(&child->flags,
1589 HAMMER2_CHAIN_FLUSH_DELETE);
1590 hammer2_chain_drop(child);
1595 * Unlock the child. This can wind up dropping the child's
1596 * last ref, removing it from the parent's RB tree, and deallocating
1597 * the structure. The RB_SCAN() our caller is doing handles the
1600 hammer2_chain_unlock(child);
1601 hammer2_chain_drop(child);
1602 spin_lock(&above->cst.spin);
1605 * The parent may have been delete-duplicated.
1611 hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how)
1614 hammer2_chain_t *grandp;
1617 parent->data_count += child->data_count;
1618 parent->inode_count += child->inode_count;
1619 child->data_count = 0;
1620 child->inode_count = 0;
1622 parent->data_count -= child->bytes;
1623 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) {
1624 parent->inode_count -= 1;
1626 /* XXX child->data may be NULL atm */
1627 parent->data_count -= child->data->ipdata.data_count;
1628 parent->inode_count -= child->data->ipdata.inode_count;
1631 } else if (how > 0) {
1632 parent->data_count += child->bytes;
1633 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) {
1634 parent->inode_count += 1;
1636 /* XXX child->data may be NULL atm */
1637 parent->data_count += child->data->ipdata.data_count;
1638 parent->inode_count += child->data->ipdata.inode_count;
1642 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) {
1643 parent->data->ipdata.data_count += parent->data_count;
1644 parent->data->ipdata.inode_count += parent->inode_count;
1646 for (grandp = parent->above->first_parent;
1648 grandp = grandp->next_parent) {
1649 grandp->data_count += parent->data_count;
1650 grandp->inode_count += parent->inode_count;
1653 parent->data_count = 0;
1654 parent->inode_count = 0;