From 8138a154be31c3db1d8bd046ca7b003a6c79c01c Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 18 Feb 2014 23:20:34 -0800 Subject: [PATCH] hammer2 - Refactor flush mechanics * Greatly simplify and reduce special cases in the flush code * Remove the multi-layer rbtree and replace with two discrete rbtree's (rbtree and dbtree) representing the live state and one linked list (dbq) representing set-aside deleted chains that are not part of the live state. * Cleanup some debugging junk, add more debugging junk. * Separate flushing state flags and TIDs into their own fields instead of trying to use the live state flags and bref TIDs. * Simplify transaction TID tracking. --- sys/vfs/hammer2/donew | 5 - sys/vfs/hammer2/donew2 | 5 - sys/vfs/hammer2/dossd | 11 - sys/vfs/hammer2/dossd2 | 11 - sys/vfs/hammer2/dotest | 11 - sys/vfs/hammer2/hammer2.h | 113 +- sys/vfs/hammer2/hammer2_ccms.c | 2 +- sys/vfs/hammer2/hammer2_ccms.h | 2 +- sys/vfs/hammer2/hammer2_chain.c | 926 ++++++++-------- sys/vfs/hammer2/hammer2_disk.h | 2 +- sys/vfs/hammer2/hammer2_flush.c | 1662 +++++++++++++---------------- sys/vfs/hammer2/hammer2_freemap.c | 20 +- sys/vfs/hammer2/hammer2_inode.c | 8 +- sys/vfs/hammer2/hammer2_io.c | 2 +- sys/vfs/hammer2/hammer2_ioctl.c | 2 +- sys/vfs/hammer2/hammer2_ioctl.h | 2 +- sys/vfs/hammer2/hammer2_mount.h | 2 +- sys/vfs/hammer2/hammer2_msgops.c | 2 +- sys/vfs/hammer2/hammer2_subr.c | 2 +- sys/vfs/hammer2/hammer2_vfsops.c | 97 +- sys/vfs/hammer2/hammer2_vnops.c | 18 +- sys/vfs/hammer2/mkvntest | 10 - 22 files changed, 1339 insertions(+), 1576 deletions(-) delete mode 100755 sys/vfs/hammer2/donew delete mode 100755 sys/vfs/hammer2/donew2 delete mode 100755 sys/vfs/hammer2/dossd delete mode 100755 sys/vfs/hammer2/dossd2 delete mode 100755 sys/vfs/hammer2/dotest delete mode 100755 sys/vfs/hammer2/mkvntest diff --git a/sys/vfs/hammer2/donew b/sys/vfs/hammer2/donew deleted file mode 100755 index db573eac82..0000000000 --- a/sys/vfs/hammer2/donew +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/csh -# - -umount /mnt -newfs_hammer2 -L ROOT /dev/da0s1d diff --git a/sys/vfs/hammer2/donew2 b/sys/vfs/hammer2/donew2 deleted file mode 100755 index d98c5a2e1b..0000000000 --- a/sys/vfs/hammer2/donew2 +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/csh -# - -umount /mnt -newfs_hammer2 -L ROOT /dev/da0s1b diff --git a/sys/vfs/hammer2/dossd b/sys/vfs/hammer2/dossd deleted file mode 100755 index 946bf7b7b6..0000000000 --- a/sys/vfs/hammer2/dossd +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/csh -# - -umount /mnt >& /dev/null -kldunload hammer2.ko >& /dev/null -kldstat | fgrep hammer2.ko >& /dev/null -if ( $status > 0 ) then - kldload /usr/obj/usr/src/sys/vfs/hammer2/hammer2.ko -endif -mount_hammer2 /dev/da0s1d@ROOT /mnt -sysctl vfs.hammer2.debug=0 diff --git a/sys/vfs/hammer2/dossd2 b/sys/vfs/hammer2/dossd2 deleted file mode 100755 index 124869cb50..0000000000 --- a/sys/vfs/hammer2/dossd2 +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/csh -# - -umount /mnt >& /dev/null -kldunload hammer2.ko >& /dev/null -kldstat | fgrep hammer2.ko >& /dev/null -if ( $status > 0 ) then - kldload /usr/obj/usr/src/sys/vfs/hammer2/hammer2.ko -endif -mount_hammer2 /dev/da0s1b@ROOT /mnt -sysctl vfs.hammer2.debug=0 diff --git a/sys/vfs/hammer2/dotest b/sys/vfs/hammer2/dotest deleted file mode 100755 index 803de0cbfc..0000000000 --- a/sys/vfs/hammer2/dotest +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/csh -# - -# ./mkvntest -umount /mnt >& /dev/null -kldunload hammer2.ko >& /dev/null -kldstat | fgrep hammer2.ko >& /dev/null -if ( $status > 0 ) then - kldload /usr/obj/usr/src/sys/vfs/hammer2/hammer2.ko -endif -mount_hammer2 /dev/vn0@ROOT /mnt diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index bbacbc746b..aa352c4204 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -91,14 +91,29 @@ struct hammer2_msg; * boundaries, renames, resizing, or any move of the chain to elsewhere * in the topology is accomplished via the DELETE-DUPLICATE mechanism. * - * DELETE-DUPLICATE allows HAMMER2 to track work across flush synchronization - * points without stalling the filesystem or corrupting the flush - * sychronization point. When necessary a chain will be marked DELETED - * and a new, duplicate chain will be allocated. + * Deletions and delete-duplicates: * - * This mechanism necessarily requires that we be able to overload chains - * at any given layer in the topology. Overloading is accomplished via a - * RBTREE recursion through chain->rbtree. + * Any movement of chains within the topology utilize a delete-duplicate + * operation instead of a simple rename. That is, the chain must be + * deleted from its original location and then duplicated to the new + * location. A new chain structure is allocated while the old is + * deleted. Deleted chains are removed from the above chain_core's + * rbtree but remain linked via the shadow topology for flush + * synchronization purposes. + * + * delete_bmap is allocated and a bit set if the chain was originally + * loaded via the blockmap. + * + * Flush synchronization: + * + * Flushes must synchronize chains up through the root. To do this + * the in-memory topology would normally have to be frozen during the + * flush. To avoid freezing the topology and to allow concurrent + * foreground / flush activity, any new modifications made while a + * flush is in progress retains the original chain in a shadow topology + * that is only visible to the flush code. Only one flush can be + * running at a time so the shadow hierarchy can be implemented with + * just a few link fields in our in-memory data structures. * * Advantages: * @@ -123,29 +138,22 @@ struct hammer2_msg; RB_HEAD(hammer2_chain_tree, hammer2_chain); TAILQ_HEAD(h2_flush_deferral_list, hammer2_chain); TAILQ_HEAD(h2_core_list, hammer2_chain); -TAILQ_HEAD(h2_layer_list, hammer2_chain_layer); - -struct hammer2_chain_layer { - int good; - TAILQ_ENTRY(hammer2_chain_layer) entry; - struct hammer2_chain_tree rbtree; - int refs; /* prevent destruction */ -}; -typedef struct hammer2_chain_layer hammer2_chain_layer_t; +#define CHAIN_CORE_DELETE_BMAP_ENTRIES \ + (HAMMER2_PBUFSIZE / sizeof(hammer2_blockref_t) / sizeof(uint32_t)) struct hammer2_chain_core { int good; struct ccms_cst cst; - struct h2_core_list ownerq; /* all chains sharing this core */ - struct h2_layer_list layerq; + struct h2_core_list ownerq; /* all chains sharing this core */ + struct hammer2_chain_tree rbtree; /* live chains */ + struct hammer2_chain_tree dbtree; /* bmapped deletions */ + struct h2_core_list dbq; /* other deletions */ int live_zero; /* blockref array opt */ - hammer2_tid_t update_lo; /* check update against parent */ - hammer2_tid_t update_hi; /* check update against parent */ - u_int chain_count; /* total chains in layers */ u_int sharecnt; u_int flags; u_int live_count; /* live (not deleted) chains in tree */ + u_int chain_count; /* live + deleted chains under core */ int generation; /* generation number (inserts only) */ }; @@ -186,9 +194,9 @@ typedef struct hammer2_io hammer2_io_t; * Primary chain structure keeps track of the topology in-memory. */ struct hammer2_chain { - RB_ENTRY(hammer2_chain) rbnode; /* node */ TAILQ_ENTRY(hammer2_chain) core_entry; /* contemporary chains */ - hammer2_chain_layer_t *inlayer; + RB_ENTRY(hammer2_chain) rbnode; /* live chain(s) */ + TAILQ_ENTRY(hammer2_chain) db_entry; /* non bmapped deletions */ hammer2_blockref_t bref; hammer2_chain_core_t *core; hammer2_chain_core_t *above; @@ -196,8 +204,23 @@ struct hammer2_chain { struct hammer2_mount *hmp; struct hammer2_pfsmount *pmp; /* can be NULL */ - hammer2_tid_t modify_tid; /* snapshot/flush filter */ - hammer2_tid_t delete_tid; + hammer2_blockref_t dsrc; /* DEBUG */ + int ninserts; /* DEBUG */ + int nremoves; /* DEBUG */ + hammer2_tid_t dsrc_dupfromat; /* DEBUG */ + uint32_t dsrc_dupfromflags; /* DEBUG */ + int dsrc_reason; /* DEBUG */ + int dsrc_ninserts; /* DEBUG */ + uint32_t dsrc_flags; /* DEBUG */ + hammer2_tid_t dsrc_modify; /* DEBUG */ + hammer2_tid_t dsrc_delete; /* DEBUG */ + hammer2_tid_t dsrc_update_lo; /* DEBUG */ + struct hammer2_chain *dsrc_original; /* DEBUG */ + + hammer2_tid_t modify_tid; /* flush filter */ + hammer2_tid_t delete_tid; /* flush filter */ + hammer2_tid_t update_lo; /* flush propagation */ + hammer2_tid_t update_hi; /* setsubmod propagation */ hammer2_key_t data_count; /* delta's to apply */ hammer2_key_t inode_count; /* delta's to apply */ hammer2_io_t *dio; /* physical data buffer */ @@ -224,32 +247,28 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); * related buffer. The data is assumed to be all-zeros. It * is primarily used for indirect blocks. * - * MOVED - A modified chain becomes MOVED after it flushes. A chain - * can also become MOVED if it is moved within the topology - * (even if not modified). - * * MODIFIED- The chain's media data has been modified. */ #define HAMMER2_CHAIN_MODIFIED 0x00000001 /* dirty chain data */ #define HAMMER2_CHAIN_ALLOCATED 0x00000002 /* kmalloc'd chain */ -#define HAMMER2_CHAIN_UNUSED0004 0x00000004 +#define HAMMER2_CHAIN_FLUSH_TEMPORARY 0x00000004 #define HAMMER2_CHAIN_FORCECOW 0x00000008 /* force copy-on-wr */ #define HAMMER2_CHAIN_DELETED 0x00000010 /* deleted chain */ #define HAMMER2_CHAIN_INITIAL 0x00000020 /* initial create */ -#define HAMMER2_CHAIN_FLUSHED 0x00000040 /* blktable updated */ -#define HAMMER2_CHAIN_MOVED 0x00000080 /* bref changed */ +#define HAMMER2_CHAIN_FLUSH_CREATE 0x00000040 /* needs flush blkadd */ +#define HAMMER2_CHAIN_FLUSH_DELETE 0x00000080 /* needs flush blkdel */ #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */ #define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list */ #define HAMMER2_CHAIN_UNLINKED 0x00000400 /* delete on reclaim */ #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */ -#define HAMMER2_CHAIN_UNUSED01000 0x00001000 +#define HAMMER2_CHAIN_ONDBQ 0x00001000 /* !bmapped deletes */ #define HAMMER2_CHAIN_MOUNTED 0x00002000 /* PFS is mounted */ #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */ #define HAMMER2_CHAIN_SNAPSHOT 0x00008000 /* snapshot special */ #define HAMMER2_CHAIN_EMBEDDED 0x00010000 /* embedded data */ #define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */ -#define HAMMER2_CHAIN_UNUSED40000 0x00040000 -#define HAMMER2_CHAIN_UNUSED80000 0x00080000 +#define HAMMER2_CHAIN_BMAPPED 0x00040000 /* in parent blkmap */ +#define HAMMER2_CHAIN_ONDBTREE 0x00080000 /* bmapped deletes */ #define HAMMER2_CHAIN_DUPLICATED 0x00100000 /* fwd delete-dup */ #define HAMMER2_CHAIN_PFSROOT 0x00200000 /* in pfs->cluster */ @@ -472,8 +491,8 @@ struct hammer2_trans { TAILQ_ENTRY(hammer2_trans) entry; struct hammer2_pfsmount *pmp; /* might be NULL */ struct hammer2_mount *hmp_single; /* if single-targetted */ - hammer2_tid_t sync_tid; - hammer2_tid_t real_tid; + hammer2_tid_t orig_tid; + hammer2_tid_t sync_tid; /* effective transaction id */ hammer2_tid_t inode_tid; thread_t td; /* pointer */ int flags; @@ -485,7 +504,7 @@ struct hammer2_trans { typedef struct hammer2_trans hammer2_trans_t; #define HAMMER2_TRANS_ISFLUSH 0x0001 /* formal flush */ -#define HAMMER2_TRANS_UNUSED0002 0x0002 +#define HAMMER2_TRANS_CONCURRENT 0x0002 /* concurrent w/flush */ #define HAMMER2_TRANS_BUFCACHE 0x0004 /* from bioq strategy write */ #define HAMMER2_TRANS_NEWINODE 0x0008 /* caller allocating inode */ #define HAMMER2_TRANS_ISALLOCATING 0x0010 /* in allocator */ @@ -644,6 +663,7 @@ extern int hammer2_debug; extern int hammer2_cluster_enable; extern int hammer2_hardlink_enable; extern int hammer2_flush_pipe; +extern int hammer2_synchronous_flush; extern long hammer2_limit_dirty_chains; extern long hammer2_iod_file_read; extern long hammer2_iod_meta_read; @@ -786,8 +806,8 @@ void hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, int nradix, int flags); void hammer2_chain_unlock(hammer2_chain_t *chain); void hammer2_chain_wait(hammer2_chain_t *chain); -hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, - hammer2_blockref_t *bref, int generation); +hammer2_chain_t *hammer2_chain_get(hammer2_chain_t *parent, int generation, + hammer2_blockref_t *bref); hammer2_chain_t *hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags); void hammer2_chain_lookup_done(hammer2_chain_t *parent); hammer2_chain_t *hammer2_chain_lookup(hammer2_chain_t **parentp, @@ -818,7 +838,7 @@ void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags); void hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, int flags); -void hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp); +void hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp); void hammer2_chain_commit(hammer2_trans_t *trans, hammer2_chain_t *chain); void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain); @@ -827,13 +847,12 @@ void hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp); void hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp); void hammer2_chain_countbrefs(hammer2_chain_t *chain, hammer2_blockref_t *base, int count); -void hammer2_chain_layer_check_locked(hammer2_mount_t *hmp, - hammer2_chain_core_t *core); int hammer2_base_find(hammer2_chain_t *chain, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_key_t *key_nextp, - hammer2_key_t key_beg, hammer2_key_t key_end); + hammer2_key_t key_beg, hammer2_key_t key_end, + int delete_filter); void hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_chain_t *child); @@ -880,11 +899,11 @@ void hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, void hammer2_io_bawrite(hammer2_io_t **diop); void hammer2_io_bdwrite(hammer2_io_t **diop); int hammer2_io_bwrite(hammer2_io_t **diop); +int hammer2_io_isdirty(hammer2_io_t *dio); void hammer2_io_setdirty(hammer2_io_t *dio); void hammer2_io_setinval(hammer2_io_t *dio, u_int bytes); void hammer2_io_brelse(hammer2_io_t **diop); void hammer2_io_bqrelse(hammer2_io_t **diop); -int hammer2_io_isdirty(hammer2_io_t *dio); /* * hammer2_msgops.c @@ -898,7 +917,7 @@ int hammer2_msg_adhoc_input(kdmsg_msg_t *msg); void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom); void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index); void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp); -void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp); +void hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx); void hammer2_bioq_sync(hammer2_pfsmount_t *pmp); int hammer2_vfs_sync(struct mount *mp, int waitflags); void hammer2_lwinprog_ref(hammer2_pfsmount_t *pmp); diff --git a/sys/vfs/hammer2/hammer2_ccms.c b/sys/vfs/hammer2/hammer2_ccms.c index ae500cafd3..47e9edb440 100644 --- a/sys/vfs/hammer2/hammer2_ccms.c +++ b/sys/vfs/hammer2/hammer2_ccms.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006,2012-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2006,2012-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_ccms.h b/sys/vfs/hammer2/hammer2_ccms.h index c13ac27ac8..cbbb7f7af7 100644 --- a/sys/vfs/hammer2/hammer2_ccms.h +++ b/sys/vfs/hammer2/hammer2_ccms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006,2012 The DragonFly Project. All rights reserved. + * Copyright (c) 2006,2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index ac0cc08e17..6aee4bb0aa 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -93,19 +93,14 @@ static hammer2_chain_t *hammer2_combined_find( int *cache_indexp, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end, hammer2_blockref_t **bresp); -static void hammer2_chain_assert_not_present(hammer2_chain_core_t *above, - hammer2_chain_t *chain); /* - * Basic RBTree for chains. Chains cannot overlap within any given - * core->rbtree without recursing through chain->rbtree. We effectively - * guarantee this by checking the full range rather than just the first - * key element. By matching on the full range callers can detect when - * recursrion through chain->rbtree is needed. + * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot + * overlap in the RB trees. Deleted chains are moved from rbtree to either + * dbtree or to dbq. * - * NOTE: This also means the a delete-duplicate on the same key will - * overload by placing the deleted element in the new element's - * chain->rbtree (when doing a direct replacement). + * Chains in delete-duplicate sequences can always iterate through core_entry + * to locate the live version of the chain. */ RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); @@ -117,6 +112,10 @@ hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) hammer2_key_t c2_beg; hammer2_key_t c2_end; + /* + * Compare chains. Overlaps are not supposed to happen and catch + * any software issues early we count overlaps as a match. + */ c1_beg = chain1->bref.key; c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; c2_beg = chain2->bref.key; @@ -144,40 +143,33 @@ hammer2_isclusterable(hammer2_chain_t *chain) } /* - * Recursively set the update_hi flag up to the root starting at chain's - * parent->core. update_hi is not set in chain's core. + * Recursively set update_hi starting at chain up through to the root. * * This controls top-down visibility for flushes. The child has just one * 'above' core, but the core itself can be multi-homed with parents iterated - * via core->ownerq. + * via core->ownerq. The last parent is the 'live' parent (all others had to + * have been delete-duplicated). We always propagate upward through the live + * parent. * * This function is not used during a flush (except when the flush is * allocating which requires the live tree). The flush keeps track of its * recursion itself. * - * XXX needs to be optimized to use roll-up TIDs. update_hi is only really - * compared against bref.mirror_tid which itself is only updated by a flush. + * XXX SMP races */ void hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain) { hammer2_chain_core_t *above; + if (chain->update_hi < trans->sync_tid) + chain->update_hi = trans->sync_tid; + while ((above = chain->above) != NULL) { spin_lock(&above->cst.spin); - /* XXX optimize */ - if (above->update_hi < trans->sync_tid) - above->update_hi = trans->sync_tid; chain = TAILQ_LAST(&above->ownerq, h2_core_list); -#if 0 - TAILQ_FOREACH_REVERSE(chain, &above->ownerq, - h2_core_list, core_entry) { - if (trans->sync_tid >= chain->modify_tid && - trans->sync_tid <= chain->delete_tid) { - break; - } - } -#endif + if (chain->update_hi < trans->sync_tid) + chain->update_hi = trans->sync_tid; spin_unlock(&above->cst.spin); } } @@ -236,12 +228,18 @@ hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, chain->delete_tid = HAMMER2_MAX_TID; /* - * Set modify_tid if a transaction is creating the chain. When - * loading a chain from backing store trans is passed as NULL and - * modify_tid is left set to 0. + * Set modify_tid if a transaction is creating the inode. + * Enforce update_lo = 0 so nearby transactions do not think + * it has been flushed when it hasn't. + * + * NOTE: When loading a chain from backing store or creating a + * snapshot, trans will be NULL and the caller is responsible + * for setting these fields. */ - if (trans) + if (trans) { chain->modify_tid = trans->sync_tid; + chain->update_lo = 0; + } return (chain); } @@ -271,14 +269,12 @@ hammer2_chain_core_alloc(hammer2_trans_t *trans, */ core = kmalloc(sizeof(*core), nchain->hmp->mchain, M_WAITOK | M_ZERO); - TAILQ_INIT(&core->layerq); TAILQ_INIT(&core->ownerq); + TAILQ_INIT(&core->dbq); + RB_INIT(&core->rbtree); /* live chains */ + RB_INIT(&core->dbtree); /* deleted original (bmapped) chains */ core->sharecnt = 1; core->good = 0x1234; - if (trans) - core->update_hi = trans->sync_tid; - else - core->update_hi = nchain->bref.mirror_tid; nchain->core = core; ccms_cst_init(&core->cst, nchain); TAILQ_INSERT_TAIL(&core->ownerq, nchain, core_entry); @@ -328,21 +324,20 @@ hammer2_chain_core_alloc(hammer2_trans_t *trans, spin_lock(&core->cst.spin); nchain->core = core; -#if 0 - if (core->update_hi < trans->sync_tid) - core->update_hi = trans->sync_tid; -#endif - /* * Maintain ordering for refactor test so we don't skip over * a snapshot. Also, during flushes, delete-duplications - * for block-table updates can occur on blocks already - * deleted (delete-duplicated by a later transaction). We - * must insert nchain after ochain but before the later - * transaction's copy. + * for block-table updates can occur on ochains already + * deleted (delete-duplicated by a later transaction), or + * on forward-indexed ochains. We must properly insert + * nchain relative to ochain. */ - TAILQ_INSERT_AFTER(&core->ownerq, ochain, nchain, core_entry); - + if (trans && trans->sync_tid < ochain->modify_tid) { + TAILQ_INSERT_BEFORE(ochain, nchain, core_entry); + } else { + TAILQ_INSERT_AFTER(&core->ownerq, ochain, + nchain, core_entry); + } spin_unlock(&core->cst.spin); } } @@ -357,8 +352,11 @@ hammer2_chain_ref(hammer2_chain_t *chain) } /* - * Insert the chain in the core rbtree at the first layer - * which accepts it (for now we don't sort layers by the transaction tid) + * Insert the chain in the core rbtree. + * + * Normal insertions are placed in the live rbtree. Insertion of a deleted + * chain is a special case used by the flush code that is placed on the + * unstaged deleted list to avoid confusing the live view. */ #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 @@ -366,46 +364,16 @@ hammer2_chain_ref(hammer2_chain_t *chain) static int -hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, - hammer2_chain_t *chain, int flags, int generation) +hammer2_chain_insert(hammer2_chain_core_t *above, + hammer2_chain_t *ochain, hammer2_chain_t *nchain, + int flags, int generation) { hammer2_chain_t *xchain; - hammer2_chain_layer_t *nlayer; int error = 0; if (flags & HAMMER2_CHAIN_INSERT_SPIN) spin_lock(&above->cst.spin); - /* - * Special case, place the chain in the next most-recent layer as the - * specified layer, inserting a layer inbetween if necessary. - */ - if (layer) { - KKASSERT((flags & HAMMER2_CHAIN_INSERT_RACE) == 0); - nlayer = TAILQ_PREV(layer, h2_layer_list, entry); - if (nlayer && RB_INSERT(hammer2_chain_tree, - &nlayer->rbtree, chain) == NULL) { - layer = nlayer; - goto done; - } - - spin_unlock(&above->cst.spin); - KKASSERT((flags & HAMMER2_CHAIN_INSERT_LIVE) == 0); - nlayer = kmalloc(sizeof(*nlayer), chain->hmp->mchain, - M_WAITOK | M_ZERO); - RB_INIT(&nlayer->rbtree); - nlayer->good = 0xABCD; - spin_lock(&above->cst.spin); - - if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) - hammer2_chain_assert_not_present(above, chain); - - TAILQ_INSERT_BEFORE(layer, nlayer, entry); - RB_INSERT(hammer2_chain_tree, &nlayer->rbtree, chain); - layer = nlayer; - goto done; - } - /* * Interlocked by spinlock, check for race */ @@ -416,54 +384,50 @@ hammer2_chain_insert(hammer2_chain_core_t *above, hammer2_chain_layer_t *layer, } /* - * Try to insert, allocate a new layer if a nominal collision - * occurs (a collision is different from a SMP race). + * Insert nchain */ - layer = TAILQ_FIRST(&above->layerq); - xchain = NULL; - - if (layer == NULL || - (xchain = RB_INSERT(hammer2_chain_tree, - &layer->rbtree, chain)) != NULL) { - - /* - * Allocate a new layer to resolve the issue. - */ - spin_unlock(&above->cst.spin); - layer = kmalloc(sizeof(*layer), chain->hmp->mchain, - M_WAITOK | M_ZERO); - RB_INIT(&layer->rbtree); - layer->good = 0xABCD; - spin_lock(&above->cst.spin); + if (nchain->flags & HAMMER2_CHAIN_DELETED) { + if (ochain && (ochain->flags & HAMMER2_CHAIN_BMAPPED)) { + if (ochain->flags & HAMMER2_CHAIN_ONDBTREE) { + RB_REMOVE(hammer2_chain_tree, + &above->dbtree, ochain); + atomic_clear_int(&ochain->flags, + HAMMER2_CHAIN_ONDBTREE); + TAILQ_INSERT_TAIL(&above->dbq, + ochain, db_entry); + atomic_set_int(&ochain->flags, + HAMMER2_CHAIN_ONDBQ); + } + /* clear BMAPPED (DBTREE, sometimes RBTREE) */ + atomic_clear_int(&ochain->flags, HAMMER2_CHAIN_BMAPPED); - if ((flags & HAMMER2_CHAIN_INSERT_RACE) && - above->generation != generation) { - spin_unlock(&above->cst.spin); - kfree(layer, chain->hmp->mchain); - spin_lock(&above->cst.spin); - error = EAGAIN; - goto failed; + xchain = RB_INSERT(hammer2_chain_tree, + &above->dbtree, nchain); + KKASSERT(xchain == NULL); + atomic_set_int(&nchain->flags, + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_BMAPPED); + } else { + TAILQ_INSERT_TAIL(&above->dbq, nchain, db_entry); + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONDBQ); } - hammer2_chain_assert_not_present(above, chain); - - TAILQ_INSERT_HEAD(&above->layerq, layer, entry); - RB_INSERT(hammer2_chain_tree, &layer->rbtree, chain); + } else { + xchain = RB_INSERT(hammer2_chain_tree, &above->rbtree, nchain); + KASSERT(xchain == NULL, + ("hammer2_chain_insert: collision %p", nchain)); + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONRBTREE); } -done: - chain->above = above; - chain->inlayer = layer; + + nchain->above = above; ++above->chain_count; ++above->generation; - atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); /* * We have to keep track of the effective live-view blockref count * so the create code knows when to push an indirect block. */ - if ((flags & HAMMER2_CHAIN_INSERT_LIVE) && - (chain->flags & HAMMER2_CHAIN_DELETED) == 0) { + if (flags & HAMMER2_CHAIN_INSERT_LIVE) atomic_add_int(&above->live_count, 1); - } failed: if (flags & HAMMER2_CHAIN_INSERT_SPIN) spin_unlock(&above->cst.spin); @@ -490,7 +454,9 @@ hammer2_chain_drop(hammer2_chain_t *chain) if (hammer2_debug & 0x200000) Debugger("drop"); - if (chain->flags & HAMMER2_CHAIN_MOVED) + if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) + ++need; + if (chain->flags & HAMMER2_CHAIN_FLUSH_DELETE) ++need; if (chain->flags & HAMMER2_CHAIN_MODIFIED) ++need; @@ -554,7 +520,6 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) hammer2_mount_t *hmp; hammer2_chain_core_t *above; hammer2_chain_core_t *core; - hammer2_chain_layer_t *layer; hammer2_chain_t *rdrop1; hammer2_chain_t *rdrop2; @@ -574,7 +539,7 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) * have their deleted flag set) short-cut recursive flush * dependencies and can be freed here. Any flushes which run * through stale children due to the flush synchronization - * point should have the MOVED bit set in the chain and not + * point should have a FLUSH_* bit set in the chain and not * reach lastdrop at this time. * * NOTE: We return (chain) on failure to retry. @@ -626,7 +591,6 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) pmp = chain->pmp; /* can be NULL */ rdrop1 = NULL; rdrop2 = NULL; - layer = NULL; /* * Spinlock the parent and try to drop the last ref on chain. @@ -647,21 +611,31 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) /* * 1->0 transition successful, remove chain from its - * above core. Track layer for removal/freeing. + * above core. */ - KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); - layer = chain->inlayer; - RB_REMOVE(hammer2_chain_tree, &layer->rbtree, chain); + switch (chain->flags & (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)) { + case HAMMER2_CHAIN_ONRBTREE: + RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); + break; + case HAMMER2_CHAIN_ONDBTREE: + RB_REMOVE(hammer2_chain_tree, &above->dbtree, chain); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); + break; + case HAMMER2_CHAIN_ONDBQ: + TAILQ_REMOVE(&above->dbq, chain, db_entry); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); + break; + default: + panic("hammer2_chain_lastdrop: chain %p badflags %08x", + chain, chain->flags); + break; + } + --above->chain_count; - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); chain->above = NULL; - chain->inlayer = NULL; - - if (RB_EMPTY(&layer->rbtree) && layer->refs == 0) { - TAILQ_REMOVE(&above->layerq, layer, entry); - } else { - layer = NULL; - } /* * If our chain was the last chain in the parent's core the @@ -708,22 +682,13 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) if (atomic_fetchadd_int(&core->sharecnt, -1) == 1) { /* * On the 1->0 transition of core we can destroy - * it. Any remaining layers should no longer be - * referenced or visibile to other threads. + * it. */ KKASSERT(TAILQ_EMPTY(&core->ownerq)); - if (layer) { - layer->good = 0xEF00; - kfree(layer, hmp->mchain); - } - while ((layer = TAILQ_FIRST(&core->layerq)) != NULL) { - KKASSERT(layer->refs == 0 && - RB_EMPTY(&layer->rbtree)); - TAILQ_REMOVE(&core->layerq, layer, entry); - layer->good = 0xEF01; - kfree(layer, hmp->mchain); - } - /* layer now NULL */ + KKASSERT(RB_EMPTY(&core->rbtree) && + RB_EMPTY(&core->dbtree) && + TAILQ_EMPTY(&core->dbq) && + core->chain_count == 0); KKASSERT(core->cst.count == 0); KKASSERT(core->cst.upgrade == 0); core->good = 0x5678; @@ -735,20 +700,13 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) /* * All spin locks are gone, finish freeing stuff. */ - KKASSERT((chain->flags & (HAMMER2_CHAIN_MOVED | + KKASSERT((chain->flags & (HAMMER2_CHAIN_FLUSH_CREATE | + HAMMER2_CHAIN_FLUSH_DELETE | HAMMER2_CHAIN_MODIFIED)) == 0); hammer2_chain_drop_data(chain, 1); KKASSERT(chain->dio == NULL); - /* - * Free saved empty layer and return chained drop. - */ - if (layer) { - layer->good = 0xEF02; - kfree(layer, hmp->mchain); - } - /* * Once chain resources are gone we can use the now dead chain * structure to placehold what might otherwise require a recursive @@ -945,6 +903,17 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) return (error); } +#if 0 + /* + * No need for this, always require that hammer2_chain_modify() + * be called before any modifying operations. + */ + if ((chain->flags & HAMMER2_CHAIN_MODIFIED) && + !hammer2_io_isdirty(chain->dio)) { + hammer2_io_setdirty(chain->dio); + } +#endif + /* * We can clear the INITIAL state now, we've resolved the buffer * to zeros and marked it dirty with hammer2_io_new(). @@ -1301,6 +1270,8 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, /* * Relocate the block, even if making it smaller (because different * block sizes may be in different regions). + * + * (data blocks only, we aren't copying the storage here). */ hammer2_freemap_alloc(trans, chain, nbytes); chain->bytes = nbytes; @@ -1313,17 +1284,6 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, */ KKASSERT(chain->dio == NULL); -#if 0 - /* - * Make sure the chain is marked MOVED and propagate the update - * to the root for flush. - */ - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); - } - hammer2_chain_setsubmod(trans, chain); -#endif *chainp = chain; } @@ -1402,15 +1362,6 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, hammer2_chain_delete_duplicate(trans, chainp, 0); chain = *chainp; } - - /* - * Fall through if fchain or vchain, clearing the CHAIN_FLUSHED - * flag. Basically other chains are delete-duplicated and so - * the duplicated chains of course will not have the FLUSHED - * flag set, but fchain and vchain are special-cased and the - * flag must be cleared when changing modify_tid. - */ - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_FLUSHED); } /* @@ -1424,20 +1375,20 @@ hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, } /* - * Otherwise do initial-chain handling + * Otherwise do initial-chain handling. Set MODIFIED to indicate + * that the chain has been modified. Set FLUSH_CREATE to flush + * the new blockref (the D-D set FLUSH_DELETE on the old chain to + * delete the old blockref). */ if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); hammer2_chain_ref(chain); hammer2_chain_memory_inc(chain->pmp); } -#if 0 - /* shouldn't be needed */ - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { + if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { + atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE); hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); } -#endif /* * The modification or re-modification requires an allocation and @@ -1622,44 +1573,12 @@ struct hammer2_chain_find_info { static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); -/* - * DEBUGGING - Assert that the chain will not collide. - */ -static -void -hammer2_chain_assert_not_present(hammer2_chain_core_t *core, - hammer2_chain_t *chain) -{ - struct hammer2_chain_find_info info; - hammer2_chain_layer_t *layer; - - if (chain->flags & HAMMER2_CHAIN_DELETED) - return; - - info.best = NULL; - info.key_beg = chain->bref.key; - info.key_end = chain->bref.key + - ((hammer2_key_t)1 << chain->bref.keybits) - 1; - info.key_next = HAMMER2_MAX_KEY; - - TAILQ_FOREACH(layer, &core->layerq, entry) { - KKASSERT(layer->good == 0xABCD); - RB_SCAN(hammer2_chain_tree, &layer->rbtree, - hammer2_chain_find_cmp, hammer2_chain_find_callback, - &info); - } - if (info.best && (info.best->flags & HAMMER2_CHAIN_DELETED) == 0) - panic("hammer2_chain_assert_not_present: %p/%p\n", - chain, info.best); -} - static hammer2_chain_t * hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, hammer2_key_t key_beg, hammer2_key_t key_end) { struct hammer2_chain_find_info info; - hammer2_chain_layer_t *layer; info.best = NULL; info.key_beg = key_beg; @@ -1667,12 +1586,9 @@ hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, info.key_next = *key_nextp; KKASSERT(parent->core->good == 0x1234); - TAILQ_FOREACH(layer, &parent->core->layerq, entry) { - KKASSERT(layer->good == 0xABCD); - RB_SCAN(hammer2_chain_tree, &layer->rbtree, - hammer2_chain_find_cmp, hammer2_chain_find_callback, - &info); - } + RB_SCAN(hammer2_chain_tree, &parent->core->rbtree, + hammer2_chain_find_cmp, hammer2_chain_find_callback, + &info); *key_nextp = info.key_next; #if 0 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", @@ -1682,6 +1598,36 @@ hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, return (info.best); } +/* + * Find a deleted chain covering a block table entry. Be careful to deal + * with the race condition where the block table has been updated but the + * chain has not yet been removed from dbtree (due to multiple parents having + * to be updated). + */ +static +hammer2_chain_t * +hammer2_chain_find_deleted(hammer2_chain_t *parent, + hammer2_key_t key_beg, hammer2_key_t key_end) +{ + struct hammer2_chain_find_info info; + hammer2_chain_t *child; + + info.best = NULL; + info.key_beg = key_beg; + info.key_end = key_end; + info.key_next = 0; + + KKASSERT(parent->core->good == 0x1234); + RB_SCAN(hammer2_chain_tree, &parent->core->dbtree, + hammer2_chain_find_cmp, hammer2_chain_find_callback, + &info); + if ((child = info.best) != NULL) { + if (child->delete_tid <= parent->update_lo) + child = NULL; + } + return child; +} + static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) @@ -1791,8 +1737,8 @@ hammer2_chain_find_callback(hammer2_chain_t *child, void *data) * need the parent's bref array to find our block. */ hammer2_chain_t * -hammer2_chain_get(hammer2_chain_t *parent, hammer2_blockref_t *bref, - int generation) +hammer2_chain_get(hammer2_chain_t *parent, int generation, + hammer2_blockref_t *bref) { hammer2_mount_t *hmp = parent->hmp; hammer2_chain_core_t *above = parent->core; @@ -1806,7 +1752,14 @@ hammer2_chain_get(hammer2_chain_t *parent, hammer2_blockref_t *bref, chain = hammer2_chain_alloc(hmp, parent->pmp, NULL, bref); hammer2_chain_core_alloc(NULL, chain, NULL); /* ref'd chain returned */ + + /* + * Set modify_tid and update_lo to the chain's synchronization + * point from the media. + */ chain->modify_tid = chain->bref.mirror_tid; + chain->update_lo = chain->bref.mirror_tid; + atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); /* * Link the chain into its parent. A spinlock is required to safely @@ -1820,7 +1773,9 @@ hammer2_chain_get(hammer2_chain_t *parent, hammer2_blockref_t *bref, HAMMER2_CHAIN_INSERT_RACE, generation); if (error) { - KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); + KKASSERT((chain->flags & (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)) == 0); kprintf("chain %p get race\n", chain); hammer2_chain_drop(chain); chain = NULL; @@ -2093,7 +2048,8 @@ again: spin_lock(&above->cst.spin); chain = hammer2_combined_find(parent, base, count, cache_indexp, key_nextp, - key_beg, key_end, &bref); + key_beg, key_end, + &bref); generation = above->generation; /* @@ -2114,7 +2070,8 @@ again: if (chain == NULL) { bcopy = *bref; spin_unlock(&above->cst.spin); - chain = hammer2_chain_get(parent, &bcopy, generation); + chain = hammer2_chain_get(parent, generation, + &bcopy); if (chain == NULL) { kprintf("retry lookup parent %p keys %016jx:%016jx\n", parent, key_beg, key_end); @@ -2398,7 +2355,8 @@ again: spin_lock(&above->cst.spin); chain = hammer2_combined_find(parent, base, count, cache_indexp, &next_key, - key, HAMMER2_MAX_KEY, &bref); + key, HAMMER2_MAX_KEY, + &bref); generation = above->generation; /* @@ -2416,7 +2374,7 @@ again: if (chain == NULL) { bcopy = *bref; spin_unlock(&above->cst.spin); - chain = hammer2_chain_get(parent, &bcopy, generation); + chain = hammer2_chain_get(parent, generation, &bcopy); if (chain == NULL) { kprintf("retry scan parent %p keys %016jx\n", parent, key); @@ -2605,11 +2563,9 @@ hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, * * Do NOT mess with the current state of the INITIAL flag. */ - KKASSERT(chain->modify_tid > parent->bref.mirror_tid); KKASSERT(chain->modify_tid == trans->sync_tid); chain->bref.key = key; chain->bref.keybits = keybits; - /* chain->modify_tid = chain->bref.mirror_tid; */ KKASSERT(chain->above == NULL); } @@ -2693,14 +2649,11 @@ again: } /* - * Link the chain into its parent. Later on we will have to set - * the MOVED bit in situations where we don't mark the new chain - * as being modified. + * Link the chain into its parent. */ if (chain->above != NULL) panic("hammer2: hammer2_chain_create: chain already connected"); KKASSERT(chain->above == NULL); - KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); hammer2_chain_insert(above, NULL, chain, HAMMER2_CHAIN_INSERT_SPIN | HAMMER2_CHAIN_INSERT_LIVE, @@ -2708,7 +2661,8 @@ again: if (allocated) { /* - * Mark the newly created chain modified. + * Mark the newly created chain modified. This will cause + * FLUSH_CREATE to be set. * * Device buffers are not instantiated for DATA elements * as these are handled by logical buffers. @@ -2741,13 +2695,14 @@ again: } } else { /* - * When reconnecting a chain we must set MOVED and setsubmod - * so the flush recognizes that it must update the bref in - * the parent. + * When reconnecting a chain we must set FLUSH_CREATE and + * setsubmod so the flush recognizes that it must update + * the bref in the parent. */ - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { + if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); + atomic_set_int(&chain->flags, + HAMMER2_CHAIN_FLUSH_CREATE); } } hammer2_chain_setsubmod(trans, chain); @@ -2819,16 +2774,15 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, hmp = ochain->hmp; KKASSERT(snapshot == 1 || (ochain->flags & HAMMER2_CHAIN_DELETED)); - atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); - /* * Now create a duplicate of the chain structure, associating * it with the same core, making it the same size, pointing it * to the same bref (the same media block). * - * Give the duplicate the same modify_tid that we previously - * ensured was sufficiently advanced to trigger a block table - * insertion on flush. + * Give nchain the same modify_tid that we previously ensured was + * sufficiently advanced to trigger a block table insertion on flush. + * + * nchain copies ochain's data and must inherit ochain->update_lo. * * NOTE: bref.mirror_tid duplicated by virtue of bref copy in * hammer2_chain_alloc() @@ -2846,11 +2800,14 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); nchain->bytes = bytes; nchain->modify_tid = ochain->modify_tid; + nchain->update_lo = ochain->update_lo; nchain->inode_reason = ochain->inode_reason + 0x100000; - if (ochain->flags & HAMMER2_CHAIN_INITIAL) - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_INITIAL); - if (ochain->flags & HAMMER2_CHAIN_UNLINKED) - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_UNLINKED); + atomic_set_int(&nchain->flags, + ochain->flags & (HAMMER2_CHAIN_INITIAL | + HAMMER2_CHAIN_FORCECOW | + HAMMER2_CHAIN_UNLINKED)); + if (ochain->modify_tid == trans->sync_tid) + atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); /* * Switch from ochain to nchain @@ -2885,7 +2842,8 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, /* * If parent is not NULL the duplicated chain will be entered under - * the parent and the MOVED bit set. + * the parent and the FLUSH_CREATE bit set to tell flush to update + * the blockref. * * Having both chains locked is extremely important for atomicy. */ @@ -2900,28 +2858,85 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, nchain->bref.type, nchain->bytes); parent = NULL; - if ((nchain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(nchain); - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_MOVED); - } + KKASSERT(nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE); hammer2_chain_setsubmod(trans, nchain); } -#if 0 + *chainp = nchain; +} + +/* + * Helper function for deleting chains. + * + * The chain is removed from the live view (the RBTREE). + * + * If appropriate, the chain is added to the shadow topology and FLUSH_DELETE + * is set for flusher visbility. The caller is responsible for calling + * setsubmod on chain, so we do not adjust update_hi here. + */ +static void +_hammer2_chain_delete_helper(hammer2_trans_t *trans, + hammer2_chain_core_t *above, + hammer2_chain_t *chain) +{ + hammer2_mount_t *hmp; + hammer2_chain_t *xchain; + + KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); + KKASSERT(trans->sync_tid >= chain->modify_tid); + KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | + HAMMER2_CHAIN_ONDBQ | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_FLUSH_DELETE)) == 0); + /* - * Unconditionally set MOVED to force the parent blockrefs to - * update, and adjust update_hi below nchain so nchain's - * blockrefs are updated with the new attachment. + * Flag as deleted, reduce live_count and bump the above core's + * generation. */ - if (nchain->core->update_hi < trans->sync_tid) { - spin_lock(&nchain->core->cst.spin); - if (nchain->core->update_hi < trans->sync_tid) - nchain->core->update_hi = trans->sync_tid; - spin_unlock(&nchain->core->cst.spin); - } -#endif + chain->delete_tid = trans->sync_tid; + atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); + atomic_add_int(&above->live_count, -1); + ++above->generation; + hmp = chain->hmp; - *chainp = nchain; + /* + * Remove from live tree + */ + RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); + + if (chain->flags & HAMMER2_CHAIN_BMAPPED) { + /* + * If the chain was originally bmapped we must place on the + * deleted tree and set FLUSH_DELETE (+ref) to prevent + * destruction of the chain until the flush can reconcile + * the parent's block table. + * + * NOTE! DBTREE is only representitive of the live view, + * the flush must check both DBTREE and DBQ. + */ + xchain = RB_INSERT(hammer2_chain_tree, &above->dbtree, chain); + KKASSERT(xchain == NULL); + atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); + + atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); + hammer2_chain_ref(chain); + } else { + /* + * If the chain no longer (and never had) an actual blockmap + * entry we must place it on the dbq list and set FLUSH_DELETE + * (+ref) to prevent destruction of the chain until the flush + * can reconcile the parent's block table. + * + * NOTE! DBTREE is only representitive of the live view, + * the flush must check both DBTREE and DBQ. + */ + TAILQ_INSERT_TAIL(&above->dbq, chain, db_entry); + atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); + + atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); + hammer2_chain_ref(chain); + } } /* @@ -2930,11 +2945,9 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, * with a duplicate. Atomicy is at the very-fine spin-lock level in * order to ensure that lookups do not race us. * - * If the old chain is already marked deleted the new chain will also be - * marked deleted. This case can occur when an inode is removed from the - * filesystem but programs still have an open descriptor to it, and during - * flushes when the flush needs to operate on a chain that is deleted in - * the live view but still alive in the flush view. + * The flush code will sometimes call this function with a deleted chain. + * In this situation the old chain's memory is reallocated without + * duplicating it. * * The new chain will be marked modified for the current transaction. */ @@ -2969,8 +2982,21 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, * In the case where nchain inherits ochains core, nchain is * effectively locked due to ochain being locked (and sharing the * core), until we can give nchain its own official ock. + * + * WARNING! Flusher concurrency can create two cases. The first is + * that the flusher might be working on a chain that has + * been deleted in the live view but is live in the flusher's + * view. In the second case the flusher may be duplicating + * a forward-transacted chain. In both situations nchain + * must be marked deleted. + * + * WARNING! hammer2_chain_core_alloc() also acts on these issues. */ nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, &ochain->bref); + if ((ochain->flags & HAMMER2_CHAIN_DELETED) || + (ochain->modify_tid > trans->sync_tid)) { + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_DELETED); + } if (flags & HAMMER2_DELDUP_RECORE) hammer2_chain_core_alloc(trans, nchain, NULL); else @@ -2982,17 +3008,17 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, nchain->bytes = bytes; /* - * Duplicate inherits ochain's live state including its modification + * nchain inherits ochain's live state including its modification * state. This function disposes of the original. Because we are * doing this in-place under the same parent the block array * inserted/deleted state does not change. * - * The caller isn't expected to make further modifications of ochain - * but set the FORCECOW bit anyway, just in case it does. If ochain - * was previously marked FORCECOW we also flag nchain FORCECOW - * (used during hardlink splits). FORCECOW forces a reallocation - * of the block when we modify the chain a little later, it does - * not force another delete-duplicate. + * nchain copies ochain's data and must inherit ochain->update_lo. + * + * If ochain was previously marked FORCECOW we also flag nchain + * FORCECOW (used during hardlink splits). FORCECOW forces a + * reallocation of the block when we modify the chain a little later, + * it does not force another delete-duplicate. * * NOTE: bref.mirror_tid duplicated by virtue of bref copy in * hammer2_chain_alloc() @@ -3003,106 +3029,73 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, ochain->flags & (HAMMER2_CHAIN_INITIAL | HAMMER2_CHAIN_FORCECOW | HAMMER2_CHAIN_UNLINKED)); - atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); + if (ochain->modify_tid == trans->sync_tid) + atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); nchain->inode_reason = ochain->inode_reason + 0x1000; + nchain->update_lo = ochain->update_lo; + nchain->dsrc = ochain->bref; /* DEBUG */ + nchain->dsrc_dupfromat = trans->sync_tid; /* DEBUG */ + nchain->dsrc_dupfromflags = trans->flags; /* DEBUG */ + nchain->dsrc_reason = ochain->inode_reason; /* DEBUG */ + nchain->dsrc_ninserts = ochain->ninserts; /* DEBUG */ + nchain->dsrc_flags = ochain->flags; /* DEBUG */ + nchain->dsrc_modify = ochain->modify_tid; /* DEBUG */ + nchain->dsrc_delete = ochain->delete_tid; /* DEBUG */ + nchain->dsrc_update_lo = ochain->update_lo; /* DEBUG */ + nchain->dsrc_original = ochain; /* DEBUG */ /* * Lock nchain so both chains are now locked (extremely important - * for atomicy). Mark ochain deleted and reinsert into the topology - * and insert nchain all in one go. - * - * If the ochain is already deleted it is left alone and nchain - * is inserted into the topology as a deleted chain. This is - * important because it allows ongoing operations to be executed - * on a deleted inode which still has open descriptors. - * - * The deleted case can also occur when a flush delete-duplicates - * a node which is being concurrently modified by ongoing operations - * in a later transaction. This creates a problem because the flush - * is intended to update blockrefs which then propagate, allowing - * the original covering in-memory chains to be freed up. In this - * situation the flush code does NOT free the original covering - * chains and will re-apply them to successive copies. + * for atomicy). The shared core allows us to unlock ochain without + * actually unlocking ochain. */ hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER); /* extra ref still present from original allocation */ - KKASSERT(ochain->flags & HAMMER2_CHAIN_ONRBTREE); + KKASSERT(ochain->flags & (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)); spin_lock(&above->cst.spin); - KKASSERT(ochain->flags & HAMMER2_CHAIN_ONRBTREE); - /* - * Ultimately nchain->modify_tid will be set to trans->sync_tid, - * but we can't do that here because we want to call - * hammer2_chain_modify() to reallocate the block (if necessary). - */ nchain->modify_tid = ochain->modify_tid; + nchain->delete_tid = HAMMER2_MAX_TID; - if (ochain->flags & HAMMER2_CHAIN_DELETED) { + if (nchain->flags & HAMMER2_CHAIN_DELETED) { /* - * ochain was deleted + * Special case, used by the flush code only in two cases: + * + * (1) The flush must operate on a chain that is visible to + * the flush but deleted in the live view. + * + * (2) The flush must operate on a forward-indexed chain in + * the live view, which typically + * + * In these situations nchain will be marked deleted and + * insert before ochain. nchain must inherit certain features + * of ochain such as the BMAPPED state. */ - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_DELETED); - if (ochain->delete_tid > trans->sync_tid) { - /* - * delete-duplicate a chain deleted in a later - * transaction. Only allowed on chains created - * before or during the current transaction (flush - * code should filter out chains created after the - * current transaction). - * - * To make this work is a bit of a hack. We convert - * ochain's delete_tid to the current sync_tid and - * create a nchain which sets up ochains original - * delete_tid. - * - * This effectively forces ochain to flush as a - * deletion and nchain as a creation. Thus MOVED - * must be set in ochain (it should already be - * set since it's original delete_tid could not - * have been flushed yet). Since ochain's delete_tid - * has been moved down to sync_tid, a re-flush at - * sync_tid won't try to delete-duplicate ochain - * again. - */ - KKASSERT(ochain->modify_tid <= trans->sync_tid); + KKASSERT(trans->flags & HAMMER2_TRANS_ISFLUSH); + KKASSERT(ochain->modify_tid < trans->sync_tid); + KKASSERT(ochain->delete_tid > trans->sync_tid); + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_TEMPORARY); + hammer2_chain_insert(above, ochain, nchain, 0, 0); + + if ((ochain->flags & HAMMER2_CHAIN_DELETED) && + ochain->modify_tid < trans->sync_tid) { nchain->delete_tid = ochain->delete_tid; ochain->delete_tid = trans->sync_tid; - KKASSERT(ochain->flags & HAMMER2_CHAIN_MOVED); - } else if (ochain->delete_tid == trans->sync_tid) { - /* - * ochain was deleted in the current transaction - */ - nchain->delete_tid = trans->sync_tid; - } else { - /* - * ochain was deleted in a prior transaction. - * create and delete nchain in the current - * transaction. - * - * (delete_tid might represent a deleted inode - * which still has an open descriptor). - */ - nchain->delete_tid = trans->sync_tid; + } else if (ochain->modify_tid > trans->sync_tid) { + nchain->delete_tid = ochain->modify_tid; } - hammer2_chain_insert(above, ochain->inlayer, nchain, 0, 0); } else { /* - * ochain was not deleted, delete it in the current - * transaction. + * Normal case, delete-duplicate deletes ochain and nchain + * is the new live chain. */ - KKASSERT(trans->sync_tid >= ochain->modify_tid); - ochain->delete_tid = trans->sync_tid; - atomic_set_int(&ochain->flags, HAMMER2_CHAIN_DELETED); - atomic_add_int(&above->live_count, -1); - hammer2_chain_insert(above, NULL, nchain, + _hammer2_chain_delete_helper(trans, above, ochain); + hammer2_chain_insert(above, ochain, nchain, HAMMER2_CHAIN_INSERT_LIVE, 0); } - - if ((ochain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(ochain); - atomic_set_int(&ochain->flags, HAMMER2_CHAIN_MOVED); - } spin_unlock(&above->cst.spin); /* @@ -3110,6 +3103,7 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, * a buffer cache buffer, so we need to release it so nchain can * potentially obtain it. */ + hammer2_chain_setsubmod(trans, ochain); hammer2_chain_unlock(ochain); /* @@ -3135,22 +3129,24 @@ hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, hammer2_chain_drop(nchain); /* - * Unconditionally set MOVED to force the parent blockrefs to + * Unconditionally set FLUSH_CREATE to force the parent blockrefs to * update as the chain_modify() above won't necessarily do it. - * - * Adjust update_hi below nchain so nchain's blockrefs are updated - * with the new attachment. */ - if ((nchain->flags & HAMMER2_CHAIN_MOVED) == 0) { - atomic_set_int(&nchain->flags, HAMMER2_CHAIN_MOVED); + if ((nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_CREATE); hammer2_chain_ref(nchain); } -#if 0 - if (nchain->core->update_hi < trans->sync_tid) { - spin_lock(&nchain->core->cst.spin); - if (nchain->core->update_hi < trans->sync_tid) - nchain->core->update_hi = trans->sync_tid; - spin_unlock(&nchain->core->cst.spin); + + /* + * If nchain is in a DELETED state we must set FLUSH_DELETE + */ + if (nchain->flags & HAMMER2_CHAIN_DELETED) + KKASSERT((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0); +#if 1 + if ((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0 && + (nchain->flags & HAMMER2_CHAIN_DELETED)) { + atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_DELETE); + hammer2_chain_ref(nchain); } #endif hammer2_chain_setsubmod(trans, nchain); @@ -3432,9 +3428,6 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, * We have to mark it modified to allocate its block, but use * OPTDATA to allow it to remain in the INITIAL state. Otherwise * it won't be acted upon by the flush code. - * - * XXX leave the node unmodified, depend on the update_hi - * flush to assign and modify parent blocks. */ hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA); @@ -3509,7 +3502,7 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, */ bcopy = *bref; spin_unlock(&above->cst.spin); - chain = hammer2_chain_get(parent, &bcopy, generation); + chain = hammer2_chain_get(parent, generation, &bcopy); if (chain == NULL) { reason = 1; spin_lock(&above->cst.spin); @@ -3580,9 +3573,9 @@ next_key_spinlocked: * cleared out some entries in the parent. We calculated a good * insertion index in the loop above (ichain->index). * - * We don't have to set MOVED here because we mark ichain modified - * down below (so the normal modified -> flush -> set-moved sequence - * applies). + * We don't have to set FLUSH_CREATE here because we mark ichain + * modified down below (so the normal modified -> flush -> set-moved + * sequence applies). * * The insertion shouldn't race as this is a completely new block * and the parent is locked. @@ -3599,19 +3592,8 @@ next_key_spinlocked: * also allocate the physical block in ichain for our caller, * and assign ichain->data to a pre-zero'd space (because there * is not prior data to copy into it). - * - * We have to set update_hi in ichain's flags manually so the - * flusher knows it has to recurse through it to get to all of - * our moved blocks, then call setsubmod() to set the bit - * recursively. */ /*hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA);*/ - if (ichain->core->update_hi < trans->sync_tid) { - spin_lock(&ichain->core->cst.spin); - if (ichain->core->update_hi < trans->sync_tid) - ichain->core->update_hi = trans->sync_tid; - spin_unlock(&ichain->core->cst.spin); - } hammer2_chain_setsubmod(trans, ichain); /* @@ -3685,7 +3667,8 @@ hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, } chain = hammer2_combined_find(parent, base, count, &cache_index, &key_next, - key_beg, key_end, &bref); + key_beg, key_end, + &bref); /* * Exhausted search @@ -3797,7 +3780,8 @@ hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, } chain = hammer2_combined_find(parent, base, count, &cache_index, &key_next, - key_beg, key_end, &bref); + key_beg, key_end, + &bref); /* * Exhausted search @@ -3916,7 +3900,7 @@ hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, } /* - * Sets CHAIN_DELETED and CHAIN_MOVED in the chain being deleted and + * Sets CHAIN_DELETED and CHAIN_FLUSH_DELETE in the chain being deleted and * set chain->delete_tid. The chain is not actually marked possibly-free * in the freemap until the deletion is completely flushed out (because * a flush which doesn't cover the entire deletion is flushing the deleted @@ -3963,61 +3947,17 @@ hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags) * We need the spinlock on the core whos RBTREE contains chain * to protect against races. */ - KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); spin_lock(&chain->above->cst.spin); - - KKASSERT(trans->sync_tid >= chain->modify_tid); - chain->delete_tid = trans->sync_tid; - atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); - atomic_add_int(&chain->above->live_count, -1); - ++chain->above->generation; - - /* - * We must set MOVED along with DELETED for the flush code to - * recognize the operation and properly disconnect the chain - * in-memory. - */ - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); - } + _hammer2_chain_delete_helper(trans, chain->above, chain); spin_unlock(&chain->above->cst.spin); hammer2_chain_setsubmod(trans, chain); } -/* - * Called with the core spinlock held to check for freeable layers. - * Used by the flush code. Layers can wind up not being freed due - * to the temporary layer->refs count. This function frees up any - * layers that were missed. - */ -void -hammer2_chain_layer_check_locked(hammer2_mount_t *hmp, - hammer2_chain_core_t *core) -{ - hammer2_chain_layer_t *layer; - hammer2_chain_layer_t *tmp; - - tmp = TAILQ_FIRST(&core->layerq); - while ((layer = tmp) != NULL) { - tmp = TAILQ_NEXT(tmp, entry); - if (layer->refs == 0 && RB_EMPTY(&layer->rbtree)) { - TAILQ_REMOVE(&core->layerq, layer, entry); - if (tmp) - ++tmp->refs; - spin_unlock(&core->cst.spin); - kfree(layer, hmp->mchain); - spin_lock(&core->cst.spin); - if (tmp) - --tmp->refs; - } - } -} - /* * Returns the index of the nearest element in the blockref array >= elm. - * Returns (count) if no element could be found. + * Returns (count) if no element could be found. If delete_filter is non-zero + * the scan filters out any blockrefs which match deleted chains on dbtree. * * Sets *key_nextp to the next key for loop purposes but does not modify * it if the next key would be higher than the current value of *key_nextp. @@ -4030,12 +3970,13 @@ hammer2_chain_layer_check_locked(hammer2_mount_t *hmp, * The spin lock on the related chain must be held. */ int -hammer2_base_find(hammer2_chain_t *chain, +hammer2_base_find(hammer2_chain_t *parent, hammer2_blockref_t *base, int count, int *cache_indexp, hammer2_key_t *key_nextp, - hammer2_key_t key_beg, hammer2_key_t key_end) + hammer2_key_t key_beg, hammer2_key_t key_end, + int delete_filter) { - hammer2_chain_core_t *core = chain->core; + hammer2_chain_core_t *core = parent->core; hammer2_blockref_t *scan; hammer2_key_t scan_end; int i; @@ -4045,7 +3986,7 @@ hammer2_base_find(hammer2_chain_t *chain, * Require the live chain's already have their core's counted * so we can optimize operations. */ - KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) || + KKASSERT((parent->flags & HAMMER2_CHAIN_DUPLICATED) || core->flags & HAMMER2_CORE_COUNTEDBREFS); /* @@ -4063,7 +4004,7 @@ hammer2_base_find(hammer2_chain_t *chain, */ i = *cache_indexp; cpu_ccfence(); - if (chain->flags & HAMMER2_CHAIN_DUPLICATED) + if (parent->flags & HAMMER2_CHAIN_DUPLICATED) limit = count; else limit = core->live_zero; @@ -4090,12 +4031,21 @@ hammer2_base_find(hammer2_chain_t *chain, */ while (i < count) { if (scan->type != 0) { - if (scan->key > key_beg) - break; scan_end = scan->key + ((hammer2_key_t)1 << scan->keybits) - 1; - if (scan_end >= key_beg) - break; + if (scan->key > key_beg || scan_end >= key_beg) { + /* + * Check to see if the entry is covered by + * a deleted chain and ignore the entry if + * it is and delete_filter != 0. + */ + if (delete_filter == 0) + break; + if (hammer2_chain_find_deleted( + parent, scan->key, scan_end) == NULL) { + break; + } + } } if (i >= limit) return (count); @@ -4124,11 +4074,15 @@ hammer2_base_find(hammer2_chain_t *chain, * both cases, or sets it to NULL if the search exhausted. Only returns * a non-NULL chain if the search matched from the in-memory chain. * + * When no in-memory chain has been found and a non-NULL bref is returned + * in *bresp. + * * Must be called with above's spinlock held. Spinlock remains held * through the operation. * * The returned chain is not locked or referenced. Use the returned bref - * to determine if the search exhausted or not. + * to determine if the search exhausted or not. Iterate if the base find + * is chosen but matches a deleted chain. */ static hammer2_chain_t * hammer2_combined_find(hammer2_chain_t *parent, @@ -4141,9 +4095,12 @@ hammer2_combined_find(hammer2_chain_t *parent, hammer2_chain_t *chain; int i; + /* + * Lookup in block array and in rbtree. + */ *key_nextp = key_end + 1; i = hammer2_base_find(parent, base, count, cache_indexp, - key_nextp, key_beg, key_end); + key_nextp, key_beg, key_end, 1); chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); /* @@ -4151,11 +4108,11 @@ hammer2_combined_find(hammer2_chain_t *parent, */ if (i == count && chain == NULL) { *bresp = NULL; - return(chain); /* NULL */ + return(NULL); } /* - * Only chain matched + * Only chain matched. */ if (i == count) { bref = &chain->bref; @@ -4171,38 +4128,27 @@ hammer2_combined_find(hammer2_chain_t *parent, } /* - * Both in-memory and blockref match. Select the nearer element. - * If both are flush with the left-hand side they are considered - * to be the same distance. + * Both in-memory and blockref matched, select the nearer element. * - * When both are the same distance away select the chain if it is - * live or if it's delete_tid is greater than the parent's - * synchronized bref.mirror_tid (a single test suffices for both - * conditions), otherwise select the element. - * - * (It is possible for an old deletion to linger after a rename-over - * and flush, which would make the media copy the correct choice). - */ - - /* - * Either both are flush with the left-hand side or they are the - * same distance away. Select the chain if it is not deleted - * or it has a higher delete_tid, else select the media. + * If both are flush with the left-hand side or both are the + * same distance away, select the chain. In this situation the + * chain must have been loaded from the matching blockmap. */ if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || chain->bref.key == base[i].key) { - if (chain->delete_tid > base[i].mirror_tid) { - bref = &chain->bref; - } else { - KKASSERT(chain->flags & HAMMER2_CHAIN_DELETED); - bref = &base[i]; - chain = NULL; + KKASSERT(chain->bref.key == base[i].key); + if ((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { + kprintf("chain not bmapped %p.%d %08x\n", chain, chain->bref.type, chain->flags); + kprintf("in chain mod/del %016jx %016jx\n", chain->modify_tid, chain->delete_tid); + kprintf("and updlo/hi %016jx %016jx\n", chain->update_lo, chain->update_hi); } + KKASSERT(chain->flags & HAMMER2_CHAIN_BMAPPED); + bref = &chain->bref; goto found; } /* - * Select the nearer key. + * Select the nearer key */ if (chain->bref.key < base[i].key) { bref = &chain->bref; @@ -4251,17 +4197,21 @@ hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *parent, */ key_next = 0; /* max range */ i = hammer2_base_find(parent, base, count, cache_indexp, - &key_next, elm->key, elm->key); + &key_next, elm->key, elm->key, 0); if (i == count || base[i].type == 0 || base[i].key != elm->key || base[i].keybits != elm->keybits) { - panic("delete base %p element not found at %d/%d elm %p\n", - base, i, count, elm); + spin_unlock(&core->cst.spin); + panic("delete base %p element not found at %d/%d elm %p\n" + "child ino_reason=%08x\n", + base, i, count, elm, + child->inode_reason); return; } bzero(&base[i], sizeof(*base)); - base[i].mirror_tid = (intptr_t)parent; /* debug */ - base[i].modify_tid = (intptr_t)child; /* debug */ - base[i].check.debug.sync_tid = trans->sync_tid; /* debug */ + base[i].mirror_tid = (intptr_t)parent; /* MEDIA DEBUG */ + base[i].modify_tid = (intptr_t)child; /* MEDIA DEBUG */ + base[i].check.debug.sync_tid = trans->sync_tid; /* MEDIA DEBUG */ + ++parent->nremoves; /* DEBUG */ /* * We can only optimize core->live_zero for live chains. @@ -4308,7 +4258,7 @@ hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, */ key_next = 0; /* max range */ i = hammer2_base_find(parent, base, count, cache_indexp, - &key_next, elm->key, elm->key); + &key_next, elm->key, elm->key, 0); /* * Shortcut fill optimization, typical ordered insertion(s) may not @@ -4323,14 +4273,23 @@ hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { i = core->live_zero++; base[i] = *elm; + ++parent->ninserts; /* DEBUG */ return; } } xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { - panic("insert base %p overlapping elements at %d elm %p\n", - base, i, elm); + if (child->flags & HAMMER2_CHAIN_FLUSH_TEMPORARY) { + kprintf("child %p special replace\n", child); + base[i] = *elm; + return; + } else { + spin_unlock(&core->cst.spin); + panic("insert base %p overlapping " + "elements at %d elm %p\n", + base, i, elm); + } } /* @@ -4348,6 +4307,7 @@ hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, (i - j - 1) * sizeof(*base)); base[i - 1] = *elm; } + ++parent->ninserts; /* DEBUG */ goto validate; } ++k; @@ -4365,6 +4325,7 @@ hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, core->live_zero = k + 1; } u = 2; + ++parent->ninserts; /* DEBUG */ goto validate; } } @@ -4576,6 +4537,7 @@ hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp) break; } } + if (waiting & HAMMER2_DIRTYCHAIN_WAITING) wakeup(&pmp->inmem_dirty_chains); } diff --git a/sys/vfs/hammer2/hammer2_disk.h b/sys/vfs/hammer2/hammer2_disk.h index ec62c81773..558f0ab74d 100644 --- a/sys/vfs/hammer2/hammer2_disk.h +++ b/sys/vfs/hammer2/hammer2_disk.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_flush.c b/sys/vfs/hammer2/hammer2_flush.c index f07e8c2c00..7091aae73e 100644 --- a/sys/vfs/hammer2/hammer2_flush.c +++ b/sys/vfs/hammer2/hammer2_flush.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -56,7 +56,6 @@ struct hammer2_flush_info { hammer2_trans_t *trans; int depth; int diddeferral; - int pass; int cache_index; int domodify; struct h2_flush_deferral_list flush_list; @@ -65,15 +64,16 @@ struct hammer2_flush_info { typedef struct hammer2_flush_info hammer2_flush_info_t; -static void hammer2_chain_flush_core(hammer2_flush_info_t *info, - hammer2_chain_t **chainp); -static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); -static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); -static void hammer2_flush_core_update(hammer2_chain_core_t *core, - hammer2_flush_info_t *info); +static void hammer2_flush_core(hammer2_flush_info_t *info, + hammer2_chain_t **chainp, int deleting); +static int hammer2_flush_pass1(hammer2_chain_t *child, void *data); +static int hammer2_flush_pass2(hammer2_chain_t *child, void *data); +static int hammer2_flush_pass3(hammer2_chain_t *child, void *data); +static int hammer2_flush_pass4(hammer2_chain_t *child, void *data); static void hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how); + /* * Can we ignore a chain for the purposes of flushing modifications * to the media? @@ -130,11 +130,6 @@ hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, * buffer-cache flush will run either before or after the current pending * flush depending on its state. * - * sync_tid vs real_tid. For flush transactions ONLY, the flush operation - * actually uses two transaction ids, one for the flush operation itself, - * and for any freemap allocations made as a side-effect. real_tid - * is fixed at , sync_tid is adjusted dynamically as-needed. - * * NOTE: The sync_tid for a flush's freemap allocation will match the * sync_tid of the following transaction(s). * The freemap topology will be out-of-step by one transaction id @@ -171,13 +166,14 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, * We queue ourselves and then wait to become the head * of the queue, allowing all prior flushes to complete. * - * A unique transaction id is required to avoid confusion - * when updating the block tables. + * Multiple normal transactions can share the current + * transaction id but a flush transaction needs its own + * unique TID for proper block table update accounting. */ ++hmp->flushcnt; ++hmp->voldata.alloc_tid; trans->sync_tid = hmp->voldata.alloc_tid; - trans->real_tid = trans->sync_tid; + trans->orig_tid = trans->sync_tid; ++hmp->voldata.alloc_tid; TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); if (TAILQ_FIRST(&hmp->transq) != trans) { @@ -193,7 +189,7 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, */ TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); trans->sync_tid = hmp->voldata.alloc_tid; - trans->real_tid = trans->sync_tid; + trans->orig_tid = trans->sync_tid; /* XXX improve/optimize inode allocation */ } else { @@ -210,11 +206,19 @@ hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, } KKASSERT(head); TAILQ_INSERT_AFTER(&hmp->transq, head, trans, entry); - trans->sync_tid = head->real_tid + 1; - trans->real_tid = trans->sync_tid; + trans->sync_tid = head->orig_tid + 1; + trans->orig_tid = trans->sync_tid; + trans->flags |= HAMMER2_TRANS_CONCURRENT; if ((trans->flags & HAMMER2_TRANS_BUFCACHE) == 0) { - if (TAILQ_FIRST(&hmp->transq) != head) { + /* + * If synchronous flush mode is enabled concurrent + * frontend transactions during the flush are not + * allowed (except we don't have a choice for buffer + * cache ops). + */ + if (hammer2_synchronous_flush > 0 || + TAILQ_FIRST(&hmp->transq) != head) { trans->blocked = 1; while (trans->blocked) { lksleep(&trans->sync_tid, @@ -245,23 +249,44 @@ hammer2_trans_done(hammer2_trans_t *trans) hmp = trans->hmp_single; /* - * Remove and adjust flushcnt + * Remove. */ hammer2_voldata_lock(hmp); TAILQ_REMOVE(&hmp->transq, trans, entry); - if (trans->flags & HAMMER2_TRANS_ISFLUSH) + head = TAILQ_FIRST(&hmp->transq); + + /* + * Adjust flushcnt if this was a flush, clear TRANS_CONCURRENT + * up through the next flush. (If the head is a flush then we + * stop there, unlike the unblock code following this section). + */ + if (trans->flags & HAMMER2_TRANS_ISFLUSH) { --hmp->flushcnt; + scan = head; + while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { + atomic_clear_int(&scan->flags, + HAMMER2_TRANS_CONCURRENT); + scan = TAILQ_NEXT(head, entry); + } + } /* * Unblock the head of the queue and any additional transactions - * up to the next flush. + * up to the next flush. The head can be a flush and it will be + * unblocked along with the non-flush transactions following it + * (which are allowed to run concurrently with it). + * + * In synchronous flush mode we stop if the head transaction is + * a flush. */ - head = TAILQ_FIRST(&hmp->transq); if (head && head->blocked) { head->blocked = 0; wakeup(&head->sync_tid); - scan = TAILQ_NEXT(head, entry); + if (hammer2_synchronous_flush > 0) + scan = head; + else + scan = TAILQ_NEXT(head, entry); while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { if (scan->blocked) { scan->blocked = 0; @@ -289,19 +314,15 @@ hammer2_trans_done(hammer2_trans_t *trans) * these sorts of synchronization points. * * This routine can be called from several places but the most important - * is from the hammer2_vop_reclaim() function. We want to try to completely - * clean out the inode structure to prevent disconnected inodes from - * building up and blowing out the kmalloc pool. However, it is not actually - * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery - * capability. + * is from VFS_SYNC. * * chain is locked on call and will remain locked on return. If a flush - * occured, the chain's MOVED bit will be set indicating that its parent - * (which is not part of the flush) should be updated. The chain may be - * replaced by the call. + * occured, the chain's FLUSH_CREATE and/or FLUSH_DELETE bit will be set + * indicating that its parent (which is not part of the flush) should be + * updated. The chain may be replaced by the call if it was modified. */ void -hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) +hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) { hammer2_chain_t *chain = *chainp; hammer2_chain_t *scan; @@ -324,9 +345,6 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) info.cache_index = -1; core = chain->core; -#if FLUSH_DEBUG - kprintf("CHAIN FLUSH trans %p.%016jx chain %p.%d mod %016jx upd %016jx\n", trans, trans->sync_tid, chain, chain->bref.type, chain->modify_tid, core->update_lo); -#endif /* * Extra ref needed because flush_core expects it when replacing @@ -338,8 +356,8 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) for (;;) { /* * Unwind deep recursions which had been deferred. This - * can leave MOVED set for these chains, which will be - * handled when we [re]flush chain after the unwind. + * can leave the FLUSH_* bits set for these chains, which + * will be handled when we [re]flush chain after the unwind. */ while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); @@ -350,13 +368,13 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) * Now that we've popped back up we can do a secondary * recursion on the deferred elements. * - * NOTE: hammer2_chain_flush() may replace scan. + * NOTE: hammer2_flush() may replace scan. */ if (hammer2_debug & 0x0040) kprintf("deferred flush %p\n", scan); hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); hammer2_chain_drop(scan); /* ref from deferral */ - hammer2_chain_flush(trans, &scan); + hammer2_flush(trans, &scan); hammer2_chain_unlock(scan); } @@ -364,11 +382,7 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) * [re]flush chain. */ info.diddeferral = 0; - hammer2_chain_flush_core(&info, &chain); -#if FLUSH_DEBUG - kprintf("flush_core_done parent= chain=%p.%d %08x\n", - chain, chain->bref.type, chain->flags); -#endif + hammer2_flush_core(&info, &chain, 0); /* * Only loop if deep recursions have been deferred. @@ -377,7 +391,7 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) break; if (++loops % 1000 == 0) { - kprintf("hammer2_chain_flush: excessive loops on %p\n", + kprintf("hammer2_flush: excessive loops on %p\n", chain); if (hammer2_debug & 0x100000) Debugger("hell4"); @@ -390,76 +404,85 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) /* * This is the core of the chain flushing code. The chain is locked by the * caller and must also have an extra ref on it by the caller, and remains - * locked and will have an extra ref on return. + * locked and will have an extra ref on return. Upon return, the caller can + * test the FLUSH_CREATE and FLUSH_DELETE bits to determine what action must + * be taken on the parent. * - * If the flush accomplished any work chain will be flagged MOVED - * indicating a copy-on-write propagation back up is required. - * Deep sub-nodes may also have been entered onto the deferral list. - * MOVED is never set on the volume root. + * (1) Determine if this node is a candidate for the flush, return if it is + * not. fchain and vchain are always candidates for the flush. * - * NOTE: modify_tid is different from MODIFIED. modify_tid is updated - * only when a chain is specifically modified, and not updated - * for copy-on-write propagations. MODIFIED is set on any modification - * including copy-on-write propagations. + * (2) If we recurse too deep the chain is entered onto the deferral list and + * the current flush stack is aborted until after the deferral list is + * run. * - * NOTE: We are responsible for updating chain->bref.mirror_tid and - * core->update_lo The caller is responsible for processing us into - * our parent (if any). + * (3) Recursively flush live children (rbtree). This can create deferrals. + * A successful flush clears the MODIFIED bit in the children. + * + * (4) Recursively flush deleted children (dbtree). Deletions may be + * considered 'live' if the delete_tid is beyond the flush_tid. If + * considered 'dead' the recursion is still needed in order to clean + * up the chain. This can create deferrals. * - * We are also responsible for updating chain->core->update_lo to - * prevent repeated recursions due to deferrals. + * A successful flush clears the MODIFIED bit in the children. * - * WARNING! bref.mirror_tid may only be updated if either the MODIFIED bit - * is already zero or if we clear it. + * (5) Calculate block table updates on chain based on the children scans + * in (3) and (4) by testing the FLUSH_CREATE and FLUSH_DELETE bits, + * modifying chain if necessary to perform the block table updates. + * Deletions must be removed from dbtree when removed from the + * chain's block table. * + * If 'chain' itself is marked DELETED but treated as live, the block + * table update(s) must be propagated to all contemporary chains. In + * fact, all contemporary chains must be locked and updated uninterrupted + * to avoid lookup races. Once MODIFIED and FLUSH_CREATE is cleared, + * a chain can be unloaded from memory with the expectation that it can + * be reloaded later via the block table at any time. + * + * NOTE: chain->bref.modify_tid is different from chain->modify_tid. COW + * propagations for block updates do not update chain->bref.modify_tid, + * only chain->bref.mirror_tid. The MODIFIED bit is set on any + * modified chain, including COW propagations, but the flusher normally + * just keys off of the FLUSH_* bits. FLUSH_CREATE will also be set + * in this situation. + * + * NOTE: We are responsible for updating chain->bref.mirror_tid and + * chain->update_lo. The caller is responsible for processing us into + * our parent (if any). */ static void -hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp) +hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp, + int deleting) { hammer2_chain_t *chain = *chainp; hammer2_chain_t *saved_parent; hammer2_mount_t *hmp; hammer2_chain_core_t *core; -#if 0 - hammer2_blockref_t *bref; - char *bdata; - hammer2_io_t *dio; - int error; -#endif int diddeferral; + int saved_domodify; hmp = chain->hmp; core = chain->core; diddeferral = info->diddeferral; /* - * Check if we even have any work to do. - * - * We do not update core->update_lo because there might be other - * paths to the core and we haven't actually checked it. + * (1) Check if we even have any work to do. * * This bit of code is capable of short-cutting entire sub-trees - * if they have not been touched. + * if they have not been touched or if they have already been + * flushed. */ - if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0 && - (core->update_lo >= info->sync_tid || - chain->bref.mirror_tid >= info->sync_tid || - chain->bref.mirror_tid >= core->update_hi)) { - KKASSERT(chain->modify_tid <= info->sync_tid); - /* don't update update_lo, there may be other paths to core */ - /* don't update bref.mirror_tid, scan2 is not called */ + if (/*(chain->flags & HAMMER2_CHAIN_MODIFIED) == 0 &&*/ + (chain->update_lo >= info->sync_tid || /* already synced */ + chain->update_lo >= chain->update_hi)) { /* old/unchanged */ + /* update_lo/_hi already filters chain out, do not update */ + /* don't update bref.mirror_tid, pass2 is not called */ return; } /* - * Ignore chains which have already been flushed through the current - * synchronization point. + * mirror_tid should not be forward-indexed */ - KKASSERT (chain->bref.mirror_tid <= info->sync_tid); - if (chain->bref.mirror_tid == info->sync_tid) { - /* do not update core->update_lo, there may be another path */ - return; - } + KKASSERT(chain->bref.mirror_tid <= info->sync_tid); /* * Ignore chains modified beyond the current flush point. These @@ -472,442 +495,315 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp) * to 1 during inode flush tid 1 the blockrefs would only be partially * updated (and likely panic). * - * Do not update core->update_lo here, there might be other paths - * to the core and we haven't actually flushed it. + * We must update chain->update_lo here to prevent re-entry in this + * flush transaction. * * (vchain and fchain are exceptions since they cannot be duplicated) */ if (chain->modify_tid > info->sync_tid && chain != &hmp->fchain && chain != &hmp->vchain) { - /* do not update bref.mirror_tid, scan2 ignores chain */ - /* do not update core->update_lo, there may be another path */ + /* do not update bref.mirror_tid, pass2 ignores chain */ + /* chain->update_lo = info->sync_tid; */ return; } - saved_parent = info->parent; - info->parent = chain; -retry: - /* - * Chains deleted as-of the flush synchronization point require some - * special early handling to avoid double flushing because multiple - * deletions are sometimes forced within the same transaction. - * Allowing the flush to proceed through more than one path can wind - * up updating the chain's block table twice and cause an assertion. + * (2) Recurse downward and check recursion depth. + * (3) Flush live children + * (4) Flush deleted children * - * We don't check the 'same transaction' part but simply punt in this - * situation. We must still check for multiple deletions, since any - * terminal (non-stale) deletion still requires processing to at - * least clean up the children, and also (for inodes) there might - * still be an open descriptor. + * We adjust update_lo if not deferring chain to prevent re-entry + * in this flush cycle, but it must be set AFTER the flush in case + * a deeper flush hits the chain. Otherwise the deeper flush cannot + * complete. We re-check the condition after finishing the flushes. * - * Clear MODIFIED but set MOVED to ensure that the parent still - * deals with it. + * update_hi was already checked and prevents initial recursions on + * subtrees which have not been modified. */ - if (chain->delete_tid <= info->sync_tid && - (chain->flags & HAMMER2_CHAIN_DUPLICATED)) { - if (chain->flags & HAMMER2_CHAIN_MODIFIED) { -#if 0 - /* - * XXX should be able to invalidate the buffer here. - * XXX problem if reused, snapshotted, or reactivated. - */ - if (chain->dio) { - hammer2_io_setinval(chain->dio, chain->bytes); - } -#endif - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, - HAMMER2_CHAIN_MOVED); - } - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); - hammer2_chain_drop(chain); + saved_parent = info->parent; + saved_domodify = info->domodify; + info->parent = chain; + info->domodify = 0; + + if (chain->flags & HAMMER2_CHAIN_DEFERRED) { + ++info->diddeferral; + } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { + if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { + hammer2_chain_ref(chain); + TAILQ_INSERT_TAIL(&info->flush_list, + chain, flush_node); + atomic_set_int(&chain->flags, + HAMMER2_CHAIN_DEFERRED); } + ++info->diddeferral; + } else { + hammer2_chain_t *scan; /* - * Update mirror_tid, indicating that chain is synchronized - * on its modification and block table. This probably isn't - * needed since scan2 should ignore deleted chains anyway. + * The flush is queue-agnostic when running pass1, but order + * is important to catch any races where an existing + * flush-visible child is moved from rbtree->dbtree/dbq. * - * NOTE: bref.mirror_tid cannot be updated - * unless MODIFIED is cleared or already - * clear. + * New children added by concurrent operations are not visible + * to the flush anyway so we don't care about those races. + * However, the flush itself can move a child from dbq to + * dbtree (rare in pass1 but it is possible). + * + * pass1 can handle re-execution of a child. */ - if (chain->bref.mirror_tid < info->sync_tid) - chain->bref.mirror_tid = info->sync_tid; - /* do not update core->update_lo, there may be another path */ - info->parent = saved_parent; - return; + spin_lock(&core->cst.spin); + KKASSERT(core->good == 0x1234 && core->sharecnt > 0); + RB_SCAN(hammer2_chain_tree, &core->rbtree, + NULL, hammer2_flush_pass1, info); + RB_SCAN(hammer2_chain_tree, &core->dbtree, + NULL, hammer2_flush_pass1, info); + scan = TAILQ_FIRST(&core->dbq); + while (scan) { + KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ); + hammer2_flush_pass1(scan, info); + if (scan->flags & HAMMER2_CHAIN_ONDBQ) + scan = TAILQ_NEXT(scan, db_entry); + else + scan = TAILQ_FIRST(&core->dbq); + } + spin_unlock(&core->cst.spin); } /* - * Recurse if we are not up-to-date. Once we are done we will - * update update_lo if there were no deferrals. update_lo can become - * higher than update_hi and is used to prevent re-recursions during - * the same flush cycle. - * - * update_hi was already checked and prevents initial recursions on - * subtrees which have not been modified. + * Stop if deferred, do not update update_lo. + */ + if (info->diddeferral) + goto done; + + /* + * If a block table update is needed place the parent in a modified + * state, which might delete-duplicate it. * - * NOTE: We must recurse whether chain is flagged DELETED or not. - * However, if it is flagged DELETED we limit sync_tid to - * delete_tid to ensure that the chain's bref.mirror_tid is - * not fully updated and causes it to miss the non-DELETED - * path. + * - To prevent loops and other confusion, we synchronize update_lo + * for the original chain. * - * NOTE: If a deferral occurs hammer2_chain_flush() will flush the - * deferred chain independently which will update it's - * bref.mirror_tid and prevent it from deferred again. + * - The original parent will not be used by the flush so we can + * clear its MODIFIED bit. */ - if (chain->bref.mirror_tid < info->sync_tid && - chain->bref.mirror_tid < core->update_hi) { - hammer2_chain_layer_t *layer; - int saved_domodify; - int save_gen; - - /* - * Races will bump update_hi above trans->sync_tid and should - * not affect this test. - * - * We don't want to set our chain to MODIFIED gratuitously. - * - * We need an extra ref on chain because we are going to - * release its lock temporarily in our child loop. - */ - - /* - * Run two passes. The first pass handles MODIFIED and - * update_lo recursions while the second pass handles - * MOVED chains on the way back up. - * - * If the stack gets too deep we defer the chain. Since - * hammer2_chain_core's can be shared at multiple levels - * in the tree, we may encounter a chain that we had already - * deferred. We could undefer it but it will probably just - * defer again so it is best to leave it deferred. - * - * Scan1 is recursive. - * - * NOTE: The act of handling a modified/submodified chain can - * cause the MOVED Flag to be set. It can also be set - * via hammer2_chain_delete() and in other situations. - * - * NOTE: RB_SCAN() must be used instead of RB_FOREACH() - * because children can be physically removed during - * the scan. - * - * NOTE: We would normally not care about insertions except - * that some insertions might occur from the flush - * itself, so loop on generation number changes. - */ - saved_domodify = info->domodify; - info->domodify = 0; - - if (chain->flags & HAMMER2_CHAIN_DEFERRED) { - ++info->diddeferral; - } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { - if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { - hammer2_chain_ref(chain); - TAILQ_INSERT_TAIL(&info->flush_list, - chain, flush_node); - atomic_set_int(&chain->flags, - HAMMER2_CHAIN_DEFERRED); - } - ++info->diddeferral; - } else { - spin_lock(&core->cst.spin); - KKASSERT(core->good == 0x1234 && core->sharecnt > 0); - do { - save_gen = core->generation; - TAILQ_FOREACH_REVERSE(layer, &core->layerq, - h2_layer_list, entry) { - ++layer->refs; - KKASSERT(layer->good == 0xABCD); - RB_SCAN(hammer2_chain_tree, - &layer->rbtree, - NULL, hammer2_chain_flush_scan1, - info); - --layer->refs; - } - } while (core->generation != save_gen); - spin_unlock(&core->cst.spin); - } - + if (info->domodify) { + hammer2_chain_modify(info->trans, &info->parent, + HAMMER2_MODIFY_NO_MODIFY_TID); if (info->parent != chain) { - kprintf("ZZZ\n"); + /* + * chain - old + * info->parent - new + * + * NOTE: bref.mirror_tid cannot be updated + * unless MODIFIED is cleared or already + * clear. + */ + chain->inode_reason += 0x10000000; + info->parent->inode_reason += 0x100; + KKASSERT(info->parent->core == chain->core); + if (chain->flags & HAMMER2_CHAIN_MODIFIED) { + atomic_clear_int(&chain->flags, + HAMMER2_CHAIN_MODIFIED); + hammer2_chain_memory_wakeup(chain->pmp); + hammer2_chain_drop(chain); + } +#if 0 + if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) { + atomic_clear_int(&chain->flags, + HAMMER2_CHAIN_FLUSH_CREATE); + hammer2_chain_drop(chain); + } + if (info->parent->flags & HAMMER2_CHAIN_FLUSH_DELETE) { + atomic_clear_int(&info->parent->flags, + HAMMER2_CHAIN_FLUSH_DELETE); + hammer2_chain_drop(info->parent); + } +#endif +#if 0 + if (chain->bref.mirror_tid < info->sync_tid) + chain->bref.mirror_tid = info->sync_tid; +#endif + if (chain->update_lo < info->sync_tid) + chain->update_lo = info->sync_tid; + KKASSERT(info->parent->update_lo < info->sync_tid); hammer2_chain_drop(chain); hammer2_chain_ref(info->parent); } chain = info->parent; + } - /* - * chain was unlocked during the scan1 recursion and may - * have been deleted, destroyed, or even synchronously - * flushed due to aliasing. - * - * The flush continues normally in the first two places as - * the deletion or destruction does NOT affect the flush - * as-of the flush synchronization point. - * - * We must detect the last case and avoid flushing chain twice. - */ -#if 0 - if (chain->delete_tid <= info->sync_tid && - (chain->flags & HAMMER2_CHAIN_DUPLICATED)) { - kprintf("xxx\n"); - goto retry; - } -#endif - if (chain->bref.mirror_tid >= info->sync_tid || - chain->bref.mirror_tid >= core->update_hi) { - kprintf("yyy\n"); - goto retry; - } + /* + * If a blocktable update is needed determine if this is the last + * parent requiring modification (check all parents using the core). + * + * Set bit 1 (0x02) of domodify if this is the last parent, + * which will cause scan2 to clear FLUSH_CREATE and FLUSH_DELETE. + */ + if (1) { + hammer2_chain_t *scan; - /* - * If any deferral occurred we must set domodify to 0 to avoid - * potentially modifying the parent twice (now and when we run - * the deferral list), as doing so could cause the blockref - * update to run on a block array which has already been - * updated. - */ - if (info->domodify && diddeferral != info->diddeferral) - info->domodify = 0; + spin_lock(&core->cst.spin); + TAILQ_FOREACH(scan, &core->ownerq, core_entry) { + /* + * Ignore chains which have already been updated + * Ignore unmodified chains + */ + if ((scan->flags & HAMMER2_CHAIN_MODIFIED) == 0 && + (scan->update_lo >= info->sync_tid || + scan->update_lo >= scan->update_hi)) { + continue; + } - /* - * THIS IS THE ONLY POINT IN THE FLUSH WHERE A PARENT IN THE - * NOMINAL TOPOLOGY, OTHER THAN FREEMAP ALLOCATIONS, IS - * MODIFIED. FREEMAP ALLOCATIONS WILL MODIFY THE FREEMAP - * TOPOLOGY WITH SYNC_TID+1 AND DO NOT AFFECT THE CURRENT - * FLUSH. - * - * Modifying the parent can create issues if the current - * parent is already in a modified state with an earlier - * transaction id. We want to avoid an endless flush loop - * on the original parent so we must clear its modified bit - * after creating the new parent, if they wind up being - * different. Care must also be taken to avoid flushing the - * same parent twice. - * - * We are responsible for setting the parent into a modified - * state before we scan the children to update the parent's - * block table. This must essentially be done as an atomic - * operation (the parent must remain locked throughout the - * operation), otherwise other transactions can squeeze a - * delete-duplicate in and create block table havoc. - * - * NOTE: Blockrefs are only updated on live chains. - * - * NOTE: Modifying the parent generally causes a - * delete-duplicate to occur from within the flush - * itself, with an allocation from the freemap occuring - * as an additional side-effect. - * - * NOTE: If the parent was deleted our modified chain will - * also be marked deleted, but since it inherits the - * parent's delete_tid it will still appear to be - * 'live' for the purposes of the flush. - */ - if (info->domodify && !h2ignore_deleted(info, chain)) { - KKASSERT(chain->modify_tid < info->sync_tid); + /* + * Ignore the current parent being processed (we do + * not adjust update_lo until after the fixup). + */ + if (scan == chain) + continue; /* - * The scan1 loop and/or flush_core is reentrant, - * particularly when core->generation changes. To - * avoid havoc we have to prevent repetitive - * delete-duplicates of the same chain. - * - * After executing the modify set the original chain's - * bref.mirror_tid to prevent any reentrancy during - * the current flush cycle. + * Cannot exhaust all parents if one is not visible + * to the flush. The root chains are special-cased + * because they cannot really be delete-duplicated. */ - hammer2_chain_modify(info->trans, &info->parent, - HAMMER2_MODIFY_NO_MODIFY_TID); - if (info->parent != chain) { - /* - * NOTE: bref.mirror_tid cannot be updated - * unless MODIFIED is cleared or already - * clear. - */ - if (chain->flags & HAMMER2_CHAIN_MODIFIED) { - atomic_clear_int(&chain->flags, - HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); - hammer2_chain_drop(chain); - } - if (chain->bref.mirror_tid < info->sync_tid) - chain->bref.mirror_tid = info->sync_tid; - hammer2_chain_drop(chain); - hammer2_chain_ref(info->parent); + if (scan != &scan->hmp->fchain && + scan != &scan->hmp->vchain && + scan->modify_tid > info->sync_tid) { + break; } - chain = info->parent; + + /* + * Fail if update_lo has not been synchronized to + * at least our sync_tid on any modified parent chain. + */ + if (scan->update_lo < info->sync_tid) + break; } + spin_unlock(&core->cst.spin); + if (scan == NULL) + info->domodify |= 2; + } - KKASSERT(chain == info->parent); + /* + * (5) Calculate block table updates or child cleanups. + * (this whole operation has to be atomic) + * + * domodify 0x01 - block table updates + * 0x02 - child cleanups + * + * pass2 - Process deletions from dbtree and dbq. + * pass3 - Process insertions from rbtree, dbtree, and dbq. + * pass4 - Cleanup child flags on the last parent and + * Adjust queues on the live parent. + */ + if (info->domodify) { + hammer2_chain_t *scan; - /* - * Handle successfully flushed children who are in the MOVED - * state on the way back up the recursion. This can have - * the side-effect of clearing MOVED. - * - * Scan2 may replace info->parent. If it does it will also - * replace the extra ref we made. - * - * Scan2 is non-recursive. - */ - if (diddeferral != info->diddeferral) { - spin_lock(&core->cst.spin); - } else { - KKASSERT(chain == info->parent); - KKASSERT(info->domodify == 0 || - (chain->flags & HAMMER2_CHAIN_FLUSHED) == 0); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSHED); - spin_lock(&core->cst.spin); - KKASSERT(core->good == 0x1234 && core->sharecnt > 0); - KKASSERT(info->parent->core == core); - TAILQ_FOREACH_REVERSE(layer, &core->layerq, - h2_layer_list, entry) { - info->pass = 1; - ++layer->refs; - KKASSERT(layer->good == 0xABCD); - RB_SCAN(hammer2_chain_tree, &layer->rbtree, - NULL, hammer2_chain_flush_scan2, info); - info->pass = 2; - RB_SCAN(hammer2_chain_tree, &layer->rbtree, - NULL, hammer2_chain_flush_scan2, info); - --layer->refs; + spin_lock(&core->cst.spin); + + while ((info->domodify & 1) && info->parent) { + /* PASS2 - Deletions */ + RB_SCAN(hammer2_chain_tree, &core->rbtree, + NULL, hammer2_flush_pass2, info); + RB_SCAN(hammer2_chain_tree, &core->dbtree, + NULL, hammer2_flush_pass2, info); + scan = TAILQ_FIRST(&core->dbq); + TAILQ_FOREACH(scan, &core->dbq, db_entry) { + KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ); + hammer2_flush_pass2(scan, info); } - } - - /* - * info->parent must not have been replaced again - */ - KKASSERT(info->parent == chain); - *chainp = chain; + /* PASS3 - Insertions */ + RB_SCAN(hammer2_chain_tree, &core->rbtree, + NULL, hammer2_flush_pass3, info); + RB_SCAN(hammer2_chain_tree, &core->dbtree, + NULL, hammer2_flush_pass3, info); + TAILQ_FOREACH(scan, &core->dbq, db_entry) { + KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ); + hammer2_flush_pass3(scan, info); + } + info->parent = TAILQ_NEXT(info->parent, core_entry); + if (info->parent) + kprintf("FLUSH SPECIAL UPDATE (%p) %p.%d %08x\n", + chain, info->parent, + info->parent->bref.type, + info->parent->flags); + } + info->parent = chain; + + /* PASS4 - Cleanup */ + RB_SCAN(hammer2_chain_tree, &core->rbtree, + NULL, hammer2_flush_pass4, info); + scan = TAILQ_FIRST(&core->dbq); + while (scan) { + KKASSERT(scan->flags & HAMMER2_CHAIN_ONDBQ); + hammer2_flush_pass4(scan, info); + if (scan->flags & HAMMER2_CHAIN_ONDBQ) + scan = TAILQ_NEXT(scan, db_entry); + else + scan = TAILQ_FIRST(&core->dbq); + } + RB_SCAN(hammer2_chain_tree, &core->dbtree, + NULL, hammer2_flush_pass4, info); - hammer2_chain_layer_check_locked(chain->hmp, core); spin_unlock(&core->cst.spin); - - /* - * Update the core only if no deferrals occurred. Otherwise - * we could end up clearing the MOVED bit in the children - * prematurely. - */ - if (diddeferral == info->diddeferral) - hammer2_flush_core_update(core, info); - - info->domodify = saved_domodify; - KKASSERT(chain->refs > 1); - } else { - /* - * Update the core, no deferrals occurred in this path. - */ - hammer2_flush_core_update(core, info); } - info->parent = saved_parent; - -#if FLUSH_DEBUG - kprintf("POP %p.%d defer=%d\n", chain, chain->bref.type, diddeferral); -#endif /* - * Do not flush the chain if there were any deferrals. It will be - * retried later after the deferrals are independently handled. - * Do not update update_lo or bref.mirror_tid. + * Synchronize update_lo to prevent reentrant block updates of this + * parent. */ - if (diddeferral != info->diddeferral) { - if (hammer2_debug & 0x0008) { - kprintf("%*.*s} %p/%d %04x (deferred)", - info->depth, info->depth, "", - chain, chain->refs, chain->flags); - } - /* do not update core->update_lo */ - /* do not update bref.mirror_tid */ - return; - } + chain->update_lo = info->sync_tid; - KKASSERT(chain->bref.mirror_tid < info->sync_tid); + /* + * Skip the flush if the chain was not placed in a modified state + * or was not already in a modified state. + */ + if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) + goto done; /* - * Non-deferral path, chain is now deterministically being flushed. + * FLUSH THE CHAIN (on the way back up the recursion) + * + * Chain is now deterministically being flushed and not being deferred. * We've finished running the recursion and the blockref update. * * update bref.mirror_tid. update_lo has already been updated. - * - * After this point we MUST dipose of the MODIFIED bit on chain. */ if (chain->bref.mirror_tid < info->sync_tid) chain->bref.mirror_tid = info->sync_tid; /* - * Deal with deleted and destroyed chains on the way back up. - * - * Otherwise a deleted chain can be optimized by clearing MODIFIED - * without bothering to write it out. - * - * NOTE: We optimize this by noting that only 'inode' chains require - * this treatment. When a file with an open descriptor is - * deleted only its inode is marked deleted. Other deletions, - * such as indirect block deletions, will no longer be visible - * to the live filesystem and do not need to be updated. + * Dispose of the modified bit. FLUSH_CREATE should already be + * set. */ - if (h2ignore_deleted(info, chain)) { + KKASSERT(chain->flags & HAMMER2_CHAIN_FLUSH_CREATE); + atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); + hammer2_chain_memory_wakeup(chain->pmp); + + if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) || + chain == &hmp->vchain || + chain == &hmp->fchain) { /* - * At the moment we unconditionally set the MOVED bit because - * there are situations where it might not have been set due - * to similar delete-destroyed optimizations, and the parent - * of the parent still may need to be notified of the deletion. + * Drop the ref from the MODIFIED bit we cleared, + * net -1 ref. */ - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, - HAMMER2_CHAIN_MOVED); - } - if (chain->flags & HAMMER2_CHAIN_MODIFIED) { -#if 0 - /* - * XXX should be able to invalidate the buffer here. - * XXX problem if reused, snapshotted, or reactivated. - */ - if (chain->dio) { - hammer2_io_setinval(chain->dio, chain->bytes); - } -#endif - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); - hammer2_chain_drop(chain); - } - return; + hammer2_chain_drop(chain); + } else { + /* + * Drop the ref from the MODIFIED bit we cleared and + * set a ref for the FLUSH_CREATE bit we are setting. + * Net 0 refs. + */ + atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE); } /* - * A degenerate flush might not have flushed anything and thus not - * processed modified blocks on the way back up. Detect the case. - * - * This case can occur when a create, modify, and rename (to a - * different part of the topology) occurs in the same flush, - * resulting in a parent which effectively needs no modification. + * Skip the actual flush operation if the chain has been deleted + * in our flus hview. There will be no block table entry that + * references it. */ - if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { -#if 0 - kprintf("chain %p.%d %08x recursed but wasn't " - "modified mirr=%016jx " - "update_lo=%016jx synctid=%016jx\n", - chain, chain->bref.type, chain->flags, - chain->bref.mirror_tid, - core->update_lo, info->sync_tid); -#endif -#if 0 - if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { - hammer2_chain_ref(chain); - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); - } -#endif - return; - } + if (h2ignore_deleted(info, chain)) + goto done; /* * Issue flush. @@ -933,25 +829,6 @@ retry: Debugger("Flush hell"); } - atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(chain->pmp); - - if ((chain->flags & HAMMER2_CHAIN_MOVED) || - chain == &hmp->vchain || - chain == &hmp->fchain) { - /* - * Drop the ref from the MODIFIED bit we cleared, - * net -1 ref. - */ - hammer2_chain_drop(chain); - } else { - /* - * Drop the ref from the MODIFIED bit we cleared and - * set a ref for the MOVED bit we are setting. Net 0 refs. - */ - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); - } - /* * If this is part of a recursive flush we can go ahead and write * out the buffer cache buffer and pass a new bref back up the chain @@ -1025,10 +902,6 @@ retry: * Make sure any device buffer(s) have been flushed out here. * (there aren't usually any to flush). */ -#if 0 - /* XXX */ - /* chain and chain->bref, NOWAIT operation */ -#endif break; #if 0 case HAMMER2_BREF_TYPE_INDIRECT: @@ -1064,105 +937,59 @@ retry: panic("hammer2_chain_flush_core: unsupported embedded bref %d", chain->bref.type); /* NOT REACHED */ -#if 0 - /* - * Embedded elements have to be flushed out. - * (Basically just BREF_TYPE_INODE). - */ - KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); - KKASSERT(chain->data != NULL); - KKASSERT(chain->dio == NULL); - bref = &chain->bref; - - KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); - KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == - HAMMER2_CHECK_ISCSI32 || - HAMMER2_DEC_CHECK(chain->bref.methods) == - HAMMER2_CHECK_FREEMAP); - - /* - * The data is embedded, we have to acquire the - * buffer cache buffer and copy the data into it. - */ - error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, - &dio); - KKASSERT(error == 0); - bdata = hammer2_io_data(dio, bref->data_off); + } - /* - * Copy the data to the buffer, mark the buffer - * dirty, and convert the chain to unmodified. - */ - bcopy(chain->data, bdata, chain->bytes); - hammer2_io_bdwrite(&dio); + /* + * Final cleanup after flush + */ +done: + KKASSERT(chain->refs > 1); + info->domodify = saved_domodify; + info->parent = saved_parent; + *chainp = chain; - switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { - case HAMMER2_CHECK_FREEMAP: - chain->bref.check.freemap.icrc32 = - hammer2_icrc32(chain->data, chain->bytes); - break; - case HAMMER2_CHECK_ISCSI32: - chain->bref.check.iscsi32.value = - hammer2_icrc32(chain->data, chain->bytes); - break; - default: - panic("hammer2_flush_core: bad crc type"); - break; /* NOT REACHED */ - } - if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) - ++hammer2_iod_meta_write; - else - ++hammer2_iod_indr_write; -#endif - } + KKASSERT(chain->bref.mirror_tid <= info->sync_tid); } /* - * Flush helper scan1 (recursive) + * Flush helper pass1 (recursive) * - * Flushes the children of the caller's chain (parent) and updates - * the blockref, restricted by sync_tid. + * Flushes the children of the caller's chain (info->parent), restricted + * by sync_tid. Set info->domodify if the child's blockref must propagate + * back up to the parent. * - * Ripouts during the loop should not cause any problems. Because we are - * flushing to a synchronization point, modification races will occur after - * sync_tid and do not have to be flushed anyway. + * Ripouts can move child from rbtree to dbtree or dbq but the caller's + * flush scan order prevents any chains from being lost. A child can be + * executes more than once (update_lo is used to prevent infinite recursions). * - * It is also ok if the parent is chain_duplicate()'d while unlocked because - * the delete/duplication will install a delete_tid that is still larger than - * our current sync_tid. + * WARNING! If we do not call hammer2_flush_core() we must update + * bref.mirror_tid ourselves to indicate that the flush has + * processed the child. * - * WARNING! If we do not call chain_flush_core we must update bref.mirror_tid - * ourselves. + * WARNING! parent->core spinlock is held on entry and return. */ static int -hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) +hammer2_flush_pass1(hammer2_chain_t *child, void *data) { hammer2_flush_info_t *info = data; hammer2_trans_t *trans = info->trans; hammer2_chain_t *parent = info->parent; - int diddeferral; - - if (hammer2_debug & 0x80000) - Debugger("hell3"); - diddeferral = info->diddeferral; /* - * Child is beyond the flush synchronization zone, don't persue. + * Child modified in a later transactions, nothing to flush in this + * transaction. + * * Remember that modifications generally delete-duplicate so if the * sub-tree is dirty another child will get us there. But not this * one. * - * Or MODIFIED is not set and child is already fully synchronized - * with its sub-tree. Don't persue. - * * (child can never be fchain or vchain so a special check isn't * needed). */ if (child->modify_tid > trans->sync_tid) { KKASSERT(child->delete_tid >= child->modify_tid); - /* do not update child->core->update_lo, core not flushed */ - /* do not update core->update_lo, there may be another path */ - /* do not update mirror_tid, scan2 will ignore chain */ + /*child->update_lo = info->sync_tid;*/ + /* do not update mirror_tid, pass2 will ignore chain */ return (0); } @@ -1179,78 +1006,49 @@ hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); /* - * No recursion needed if neither the child or anything under it - * was messed with. + * Recurse and collect deferral data. We only recursively sync + * (basically) if update_lo has not been updated, indicating that + * the child has not already been processed. */ - if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && - child->core->update_lo >= info->sync_tid) { - KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); - if (child->bref.mirror_tid < info->sync_tid) - child->bref.mirror_tid = info->sync_tid; - goto skip; + if ((child->flags & HAMMER2_CHAIN_MODIFIED) || + (child->update_lo < info->sync_tid && + child->update_lo < child->update_hi)) { + ++info->depth; + hammer2_flush_core(info, &child, 0); /* XXX deleting */ + --info->depth; } /* - * XXX delete child if parent is deleted. Propagate deletion - * downward. TODO - */ - - - /* - * Re-check original pre-lock conditions after locking. + * Determine if domodify should be set. Do not otherwise adjust + * the child or pass2 will get confused. + * + * Insertion: + * - child is flagged as possibly needing block table insertion. + * - child not deleted or deletion is beyond transaction id + * - child created beyond parent synchronization point + * - parent not deleted as-of this transaction */ - if (child->modify_tid > trans->sync_tid) { - hammer2_chain_unlock(child); - hammer2_chain_drop(child); - hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); - spin_lock(&parent->core->cst.spin); - return (0); - } - - if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && - child->core->update_lo >= info->sync_tid) { - KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); - if (child->bref.mirror_tid < info->sync_tid) - child->bref.mirror_tid = info->sync_tid; - goto skip; + if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) && + child->delete_tid > trans->sync_tid && + child->modify_tid > parent->update_lo && + parent->delete_tid > trans->sync_tid) { + info->domodify = 1; } /* - * Recurse and collect deferral data. - */ - ++info->depth; - hammer2_chain_flush_core(info, &child); - --info->depth; - -skip: - /* - * Check the conditions that could cause SCAN2 to modify the parent. - * Modify the parent here instead of in SCAN2, which would cause - * rollup chicken-and-egg races. - * - * Scan2 is expected to update bref.mirror_tid in the domodify case, - * but will skip the child otherwise giving us the responsibility to - * update bref.mirror_tid. - * - * WARNING! Do NOT update the child's bref.mirror_tid right here, - * even if there was no deferral. Doing so would cause - * confusion with the child's block array state in a - * future flush. + * Removal: + * - child is flagged as possibly needing block table removal. + * - child deleted before or during this transaction + * - child created prior or during parent synchronization point + * - parent not yet synchronized to child deletion + * - parent not deleted as-of this transaction */ - if (h2ignore_deleted(info, parent)) { - /* - * Special optimization matching similar tests done in - * flush_core, scan1, and scan2. Avoid updating the block - * table in the parent if the parent is no longer visible. - */ - ; - } else if (child->delete_tid <= trans->sync_tid && - child->delete_tid > parent->bref.mirror_tid && - child->modify_tid <= parent->bref.mirror_tid) { + if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) && + child->delete_tid <= trans->sync_tid && + child->modify_tid <= parent->update_lo && + child->delete_tid > parent->update_lo && + parent->delete_tid > trans->sync_tid) { info->domodify = 1; - } else if (child->delete_tid > trans->sync_tid && - child->modify_tid > parent->bref.mirror_tid) { - info->domodify = 1; /* base insertion */ } /* @@ -1266,128 +1064,157 @@ skip: } /* - * Flush helper scan2 (non-recursive) - * - * This pass on a chain's children propagates any MOVED or DELETED - * elements back up the chain towards the root after those elements have - * been fully flushed. Unlike scan1, this function is NOT recursive and - * the parent remains locked across the entire scan. - * - * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. - * We have to do this so base[] elements can be deleted in pass 1 to make - * room for adding new elements in pass 2. - * - * This function also rolls up storage statistics. - * - * NOTE! A deletion is a visbility issue, there can still be references to - * deleted elements (for example, to an unlinked file which is still - * open), and there can also be multiple chains pointing to the same - * bref where some are deleted and some are not (for example due to - * a rename). So a chain marked for deletion is basically considered - * to be live until it is explicitly destroyed or until its ref-count - * reaches zero (also implying that MOVED and MODIFIED are clear). - * - * NOTE! Info->parent will be locked but will only be instantiated/modified - * if it is either MODIFIED or if scan1 determined that block table - * updates will occur. - * - * NOTE! SCAN2 is responsible for updating child->bref.mirror_tid only in - * the case where it modifies the parent (does a base insertion - * or deletion). SCAN1 handled all other cases. + * PASS2 - BLOCKTABLE DELETIONS */ static int -hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) +hammer2_flush_pass2(hammer2_chain_t *child, void *data) { hammer2_flush_info_t *info = data; hammer2_chain_t *parent = info->parent; - hammer2_chain_core_t *above = child->above; hammer2_mount_t *hmp = child->hmp; hammer2_trans_t *trans = info->trans; hammer2_blockref_t *base; int count; - int ok; -#if FLUSH_DEBUG - kprintf("SCAN2 %p.%d %08x mod=%016jx del=%016jx trans=%016jx\n", child, child->bref.type, child->flags, child->modify_tid, child->delete_tid, info->trans->sync_tid); -#endif /* - * Ignore children created after our flush point, treating them as - * if they did not exist). These children will not cause the parent - * to be updated. - * - * Children deleted after our flush point are treated as having been - * created for the purposes of the flush. The parent's update_hi - * will already be higher than our trans->sync_tid so the path for - * the next flush is left intact. - * - * When we encounter such children and the parent chain has not been - * deleted, delete/duplicated, or delete/duplicated-for-move, then - * the parent may be used to funnel through several flush points. - * These chains will still be visible to later flushes due to having - * a higher update_hi than we can set in the current flush. + * Prefilter - Ignore children not flagged as needing a parent + * blocktable update. + */ + if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0) + return (0); + + /* + * Prefilter - Ignore children created after our flush_tid (not + * visible to our flush). */ if (child->modify_tid > trans->sync_tid) { KKASSERT(child->delete_tid >= child->modify_tid); - goto finalize; + return 0; } -#if 0 /* - * Ignore children which have not changed. The parent's block table - * is already correct. + * Prefilter - Don't bother updating the blockrefs for a deleted + * parent (from the flush's perspective). Otherwise, + * we need to be COUNTEDBREFS synchronized for the + * hammer2_base_*() functions. * - * XXX The MOVED bit is only cleared when all multi-homed parents - * have flushed, creating a situation where a re-flush can occur - * via a parent which has already flushed. The hammer2_base_*() - * functions currently have a hack to deal with this case but - * we need something better. + * NOTE: This test must match the similar one in flush_core. */ - if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { - KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); - goto finalize; + if (h2ignore_deleted(info, parent)) + return 0; + + /* + * Calculate blockmap pointer + */ + switch(parent->bref.type) { + case HAMMER2_BREF_TYPE_INODE: + /* + * Access the inode's block array. However, there is no + * block array if the inode is flagged DIRECTDATA. The + * DIRECTDATA case typicaly only occurs when a hardlink has + * been shifted up the tree and the original inode gets + * replaced with an OBJTYPE_HARDLINK placeholding inode. + */ + if (parent->data && + (parent->data->ipdata.op_flags & + HAMMER2_OPFLAG_DIRECTDATA) == 0) { + base = &parent->data->ipdata.u.blockset.blockref[0]; + } else { + base = NULL; + } + count = HAMMER2_SET_COUNT; + break; + case HAMMER2_BREF_TYPE_INDIRECT: + case HAMMER2_BREF_TYPE_FREEMAP_NODE: + if (parent->data) + base = &parent->data->npdata[0]; + else + base = NULL; + count = parent->bytes / sizeof(hammer2_blockref_t); + break; + case HAMMER2_BREF_TYPE_VOLUME: + base = &hmp->voldata.sroot_blockset.blockref[0]; + count = HAMMER2_SET_COUNT; + break; + case HAMMER2_BREF_TYPE_FREEMAP: + base = &parent->data->npdata[0]; + count = HAMMER2_SET_COUNT; + break; + default: + base = NULL; + count = 0; + panic("hammer2_chain_flush_pass2: " + "unrecognized blockref type: %d", + parent->bref.type); } -#endif /* - * Make sure child is referenced before we unlock. + * Removal + * - child is flagged for removal + * - child deleted before or during this transaction + * - child created prior or during parent synchronization point + * - parent not yet synchronized to child's deletion */ - hammer2_chain_ref(child); - spin_unlock(&above->cst.spin); + if (child->delete_tid <= trans->sync_tid && + child->modify_tid <= parent->update_lo && + child->delete_tid > parent->update_lo) { + /* can't assert BMAPPED because state adjustment may occur + * before we are done, and BMAPPED only applies to the live + * parent. + *KKASSERT(child->flags & HAMMER2_CHAIN_BMAPPED);*/ + if (base) { + hammer2_rollup_stats(parent, child, -1); + hammer2_base_delete(trans, parent, base, count, + &info->cache_index, child); + } + } + + return 0; +} + +/* + * PASS3 - BLOCKTABLE INSERTIONS + */ +static int +hammer2_flush_pass3(hammer2_chain_t *child, void *data) +{ + hammer2_flush_info_t *info = data; + hammer2_chain_t *parent = info->parent; + hammer2_mount_t *hmp = child->hmp; + hammer2_trans_t *trans = info->trans; + hammer2_blockref_t *base; + int count; /* - * Parent reflushed after the child has passed them by should skip - * due to the modify_tid test. XXX + * Prefilter - Ignore children not flagged as needing a parent + * blocktable update. */ - hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); - KKASSERT(child->above == above); - KKASSERT(parent->core == above); + if ((child->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) + return (0); /* - * The parent's blockref to the child must be deleted or updated. - * - * This point is not reached on successful DELETED optimizations - * but can be reached on recursive deletions and restricted flushes. - * - * The chain_modify here may delete-duplicate the block. This can - * cause a multitude of issues if the block was already modified - * by a later (post-flush) transaction. Primarily blockrefs in - * the later block can be out-of-date, so if the situation occurs - * we can't throw away the MOVED bit on the current blocks until - * the later blocks are flushed (so as to be able to regenerate all - * the changes that were made). - * - * Because flushes are ordered we do not have to make a - * modify/duplicate of indirect blocks. That is, the flush - * code does not have to kmalloc or duplicate anything. We - * can adjust the indirect block table in-place and reuse the - * chain. It IS possible that the chain has already been duplicated - * or may wind up being duplicated on-the-fly by modifying code - * on the frontend. We simply use the original and ignore such - * chains. However, it does mean we can't clear the MOVED bit. + * Prefilter - Ignore children created after our flush_tid (not + * visible to our flush). + */ + if (child->modify_tid > trans->sync_tid) { + KKASSERT(child->delete_tid >= child->modify_tid); + return 0; + } + + /* + * Prefilter - Don't bother updating the blockrefs for a deleted + * parent (from the flush's perspective). Otherwise, + * we need to be COUNTEDBREFS synchronized for the + * hammer2_base_*() functions. * - * XXX recursive deletions not optimized. + * NOTE: This test must match the similar one in flush_core. */ + if (h2ignore_deleted(info, parent)) + return 0; + /* + * Calculate blockmap pointer + */ switch(parent->bref.type) { case HAMMER2_BREF_TYPE_INODE: /* @@ -1425,212 +1252,237 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) default: base = NULL; count = 0; - panic("hammer2_chain_flush_scan2: " + panic("hammer2_chain_flush_pass2: " "unrecognized blockref type: %d", parent->bref.type); } /* - * Don't bother updating a deleted parent's blockrefs. - * - * Otherwise, we need to be COUNTEDBREFS synchronized for the - * hammer2_base_*() functions. - * - * This test must match the similar one in flush_core. + * Insertion + * - child is flagged as possibly needing block table insertion. + * - child not deleted or deletion is beyond transaction id + * - child created beyond parent synchronization point */ -#if FLUSH_DEBUG - kprintf("SCAN2 base=%p pass=%d PARENT %p.%d DTID=%016jx SYNC=%016jx\n", - base, - info->pass, parent, parent->bref.type, parent->delete_tid, trans->sync_tid); -#endif - if (h2ignore_deleted(info, parent)) - base = NULL; + if (child->delete_tid > trans->sync_tid && + child->modify_tid > parent->update_lo) { + if (base) { + hammer2_rollup_stats(parent, child, 1); + hammer2_base_insert(trans, parent, base, count, + &info->cache_index, child); + } + } + + return 0; +} + +/* + * PASS4 - CLEANUP CHILDREN (non-recursive, but CAN be re-entrant) + * + * Adjust queues and set or clear BMAPPED appropriately if processing + * the live parent. + * + * Cleanup FLUSH_CREATE/FLUSH_DELETE on the last parent. + */ +static int +hammer2_flush_pass4(hammer2_chain_t *child, void *data) +{ + hammer2_flush_info_t *info = data; + hammer2_chain_t *parent = info->parent; + hammer2_chain_t *xchain; + hammer2_chain_core_t *above = child->above; + hammer2_trans_t *trans = info->trans; /* - * Update the parent's blockref table and propagate mirror_tid. - * - * NOTE! Children with modify_tid's beyond our flush point are - * considered to not exist for the purposes of updating the - * parent's blockref array. - * - * NOTE! SCAN1 has already put the parent in a modified state - * so if it isn't we panic. - * - * NOTE! chain->modify_tid vs chain->bref.modify_tid. The chain's - * internal modify_tid is always updated based on creation - * or delete-duplicate. However, the bref.modify_tid is NOT - * updated due to simple blockref updates. + * Prefilter - Ignore children created after our flush_tid (not + * visible to our flush). */ -#if FLUSH_DEBUG - kprintf("chain %p->%p pass %d trans %016jx sync %p.%d %016jx/%d C=%016jx D=%016jx PMIRROR %016jx\n", - parent, child, - info->pass, trans->sync_tid, - child, child->bref.type, - child->bref.key, child->bref.keybits, - child->modify_tid, child->delete_tid, parent->bref.mirror_tid); -#endif + if (child->modify_tid > trans->sync_tid) { + KKASSERT(child->delete_tid >= child->modify_tid); + return 0; + } - if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { + /* + * Ref and lock child for operation, spinlock must be temporarily + * Make sure child is referenced before we unlock. + */ + hammer2_chain_ref(child); + spin_unlock(&above->cst.spin); + hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); + KKASSERT(child->above == above); + KKASSERT(parent->core == above); + + /* + * Adjust BMAPPED state and rbtree/queue only when we hit the + * actual live parent. + */ + if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { /* - * Deleting. The block array is expected to contain the - * child's entry if: - * - * (1) The deletion occurred after the parent's block table - * was last synchronized (delete_tid), and - * - * (2) The creation occurred before or during the parent's - * last block table synchronization. + * Deleting from blockmap, move child out of dbtree + * and clear BMAPPED. Child should not be on RBTREE. */ -#if FLUSH_DEBUG - kprintf("S2A %p.%d b=%p d/b=%016jx/%016jx m/b=%016jx/%016jx\n", - child, child->bref.type, - base, child->delete_tid, parent->bref.mirror_tid, - child->modify_tid, parent->bref.mirror_tid); -#endif - if (base && - child->delete_tid > parent->bref.mirror_tid && - child->modify_tid <= parent->bref.mirror_tid) { - KKASSERT(child->flags & HAMMER2_CHAIN_MOVED); - KKASSERT(parent->modify_tid == trans->sync_tid || - (parent == &hmp->vchain || - parent == &hmp->fchain)); - hammer2_rollup_stats(parent, child, -1); - spin_lock(&above->cst.spin); -#if FLUSH_DEBUG - kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " - "flg=%08x %016jx/%d delete\n", - trans->sync_tid, - parent, parent->bref.type, - child, child->bref.type, - child->modify_tid, child->delete_tid, - child->flags, - child->bref.key, child->bref.keybits); -#endif - hammer2_base_delete(trans, parent, base, count, - &info->cache_index, child); - spin_unlock(&above->cst.spin); + spin_lock(&above->cst.spin); + if (child->delete_tid <= trans->sync_tid && + child->modify_tid <= parent->update_lo && + child->delete_tid > parent->update_lo && + (child->flags & HAMMER2_CHAIN_BMAPPED)) { + KKASSERT(child->flags & HAMMER2_CHAIN_ONDBTREE); + RB_REMOVE(hammer2_chain_tree, &above->dbtree, child); + atomic_clear_int(&child->flags, HAMMER2_CHAIN_ONDBTREE); + atomic_clear_int(&child->flags, HAMMER2_CHAIN_BMAPPED); } - } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { + /* - * Inserting. The block array is expected to NOT contain - * the child's entry if: - * - * (1) The creation occurred after the parent's block table - * was last synchronized (modify_tid), and - * - * (2) The child is not being deleted in the same - * transaction. + * Inserting into blockmap, place child in rbtree or dbtree. */ -#if FLUSH_DEBUG - kprintf("S2B %p.%d b=%p d/b=%016jx/%016jx m/b=%016jx/%016jx\n", - child, child->bref.type, - base, child->delete_tid, parent->bref.mirror_tid, - child->modify_tid, parent->bref.mirror_tid); -#endif - if (base && - child->modify_tid > parent->bref.mirror_tid) { - KKASSERT(child->flags & HAMMER2_CHAIN_MOVED); - KKASSERT(parent->modify_tid == trans->sync_tid || - (parent == &hmp->vchain || - parent == &hmp->fchain)); - hammer2_rollup_stats(parent, child, 1); + if (child->delete_tid > trans->sync_tid && + child->modify_tid > parent->update_lo && + (child->flags & HAMMER2_CHAIN_BMAPPED) == 0) { + if (child->flags & HAMMER2_CHAIN_ONDBQ) { + TAILQ_REMOVE(&above->dbq, child, db_entry); + atomic_clear_int(&child->flags, + HAMMER2_CHAIN_ONDBQ); + } + if ((child->flags & HAMMER2_CHAIN_DELETED) == 0 && + (child->flags & HAMMER2_CHAIN_ONRBTREE) == 0) { + KKASSERT((child->flags & + (HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)) == 0); + xchain = RB_INSERT(hammer2_chain_tree, + &above->rbtree, child); + KKASSERT(xchain == NULL); + atomic_set_int(&child->flags, + HAMMER2_CHAIN_ONRBTREE); + } else + if ((child->flags & HAMMER2_CHAIN_DELETED) && + (child->flags & HAMMER2_CHAIN_ONDBTREE) == 0) { + KKASSERT((child->flags & + (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBQ)) == 0); + xchain = RB_INSERT(hammer2_chain_tree, + &above->dbtree, child); + KKASSERT(xchain == NULL); + atomic_set_int(&child->flags, + HAMMER2_CHAIN_ONDBTREE); + } + atomic_set_int(&child->flags, HAMMER2_CHAIN_BMAPPED); + KKASSERT(child->flags & + (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)); + } + + /* + * Not on any list, place child on DBQ + */ + if ((child->flags & (HAMMER2_CHAIN_ONRBTREE | + HAMMER2_CHAIN_ONDBTREE | + HAMMER2_CHAIN_ONDBQ)) == 0) { + KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0); + TAILQ_INSERT_TAIL(&above->dbq, child, db_entry); + atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ); + } + spin_unlock(&above->cst.spin); + } + +#if 0 + /* + * Adjust queues and adjust BMAPPED on the live parent only + * (there should be only one). + * + * First handle deletions + */ + if (parent->delete_tid > info->sync_tid && + child->delete_tid <= trans->sync_tid && + child->modify_tid <= parent->update_lo && + child->delete_tid > parent->update_lo) { + KKASSERT(child->flags & HAMMER2_CHAIN_FLUSH_DELETE); + if (child->flags & HAMMER2_CHAIN_ONDBTREE) { + KKASSERT(child->flags & HAMMER2_CHAIN_FLUSH_DELETE); + KKASSERT(child->flags & HAMMER2_CHAIN_BMAPPED); spin_lock(&above->cst.spin); -#if FLUSH_DEBUG - kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " - "flg=%08x %016jx/%d insert\n", - trans->sync_tid, - parent, parent->bref.type, - child, child->bref.type, - child->modify_tid, child->delete_tid, - child->flags, - child->bref.key, child->bref.keybits); -#endif - hammer2_base_insert(trans, parent, base, count, - &info->cache_index, child); + RB_REMOVE(hammer2_chain_tree, &above->dbtree, child); + atomic_clear_int(&child->flags, HAMMER2_CHAIN_ONDBTREE); + atomic_clear_int(&child->flags, HAMMER2_CHAIN_BMAPPED); + TAILQ_INSERT_TAIL(&above->dbq, child, db_entry); + atomic_set_int(&child->flags, HAMMER2_CHAIN_ONDBQ); spin_unlock(&above->cst.spin); + } else { + KKASSERT(child->flags & HAMMER2_CHAIN_ONDBQ); } - } else if (info->pass == 3 && - (child->delete_tid == HAMMER2_MAX_TID || - child->delete_tid <= trans->sync_tid) && - (child->flags & HAMMER2_CHAIN_MOVED)) { + } + + /* + * Adjust child state when updating a live parent. + * Adjust child state on the last parent. + */ + if (parent->delete_tid > info->sync_tid && + child->delete_tid > trans->sync_tid && + child->modify_tid > parent->update_lo) { /* - * We can't clear the MOVED bit on children whos modify_tid - * is beyond our current trans (was tested at top of scan2), - * or on deleted children which have not yet been flushed - * (handled above). - * - * Scan all parents of this child and determine if any of - * them still need the child's MOVED bit. + * NOTE: FLUSH_CREATE may have already been cleared and + * BMAPPED may have already been set due to + * reentrancy due to the queue movement below. */ - hammer2_chain_t *scan; - - if (hammer2_debug & 0x4000) - kprintf("CHECKMOVED %p (parent=%p)", child, parent); + atomic_set_int(&child->flags, HAMMER2_CHAIN_BMAPPED); - ok = 1; - spin_lock(&above->cst.spin); - TAILQ_FOREACH(scan, &above->ownerq, core_entry) { + if ((child->flags & HAMMER2_CHAIN_DELETED) && + (child->flags & HAMMER2_CHAIN_ONDBQ)) { /* - * Can't clear child's MOVED until all parent's have - * synchronized with it. + * A deleted child that is on DBQ which has been + * inserted must be moved to DBTREE. However, + * temporary deleted children are only applicable to + * the current flush and must not have any visibility + * beyond it, so temporary children are left on DBQ. * - * Ignore deleted parents as-of this flush TID. - * Ignore the current parent being flushed. + * We run DBQ before DBTREE so this can cause pass4 + * to be called twice on this child. */ - if (h2ignore_deleted(info, scan)) - continue; - if (scan == parent) - continue; - + KKASSERT((child->flags & HAMMER2_CHAIN_ONRBTREE) == 0); + if ((child->flags & (HAMMER2_CHAIN_ONDBQ | + HAMMER2_CHAIN_FLUSH_TEMPORARY)) == + HAMMER2_CHAIN_ONDBQ) { + spin_lock(&above->cst.spin); + + KKASSERT(child->flags & + HAMMER2_CHAIN_FLUSH_CREATE); + TAILQ_REMOVE(&above->dbq, child, db_entry); + atomic_clear_int(&child->flags, + HAMMER2_CHAIN_ONDBQ); + xchain = RB_INSERT(hammer2_chain_tree, + &above->dbtree, child); + KASSERT(xchain == NULL, ("pass4: collision with %p moving %p to dbtree", xchain, child)); + atomic_set_int(&child->flags, + HAMMER2_CHAIN_ONDBTREE); + spin_unlock(&above->cst.spin); + } + } else if (child->flags & HAMMER2_CHAIN_DELETED) { + KKASSERT(child->flags & HAMMER2_CHAIN_ONDBTREE); + } else { /* - * For parents not already synchronized check to see - * if the flush has gotten past them yet or not. - * - * This must roughly mimic the tests that - * hammer2_chain_flush_core() runs or we could leave - * children hanging around with MOVED set and cause - * a memory leak. + * If the child is not deleted it must be on + * the rbtree. */ - if (scan->bref.mirror_tid >= trans->sync_tid) - continue; - if (scan->bref.mirror_tid >= above->update_hi) - continue; - - if (hammer2_debug & 0x4000) { - kprintf("(fail scan %p %016jx/%016jx)", - scan, scan->bref.mirror_tid, - child->modify_tid); - } - ok = 0; - break; + KKASSERT(child->flags & HAMMER2_CHAIN_ONRBTREE); } - if (hammer2_debug & 0x4000) - kprintf("\n"); - spin_unlock(&above->cst.spin); + } +#endif - /* - * Can we finally clear MOVED? - */ - if (ok) { - if (hammer2_debug & 0x4000) - kprintf("clear moved %p.%d %016jx/%d\n", - child, child->bref.type, - child->bref.key, child->bref.keybits); - atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); - if (child->flags & HAMMER2_CHAIN_MODIFIED) { - kprintf("modified child %p all parents updated\n", - child); - atomic_clear_int(&child->flags, - HAMMER2_CHAIN_MODIFIED); - hammer2_chain_memory_wakeup(child->pmp); - hammer2_chain_drop(child);/* cleared MODIFIED */ - } - hammer2_chain_drop(child); /* cleared MOVED */ - } else { - if (hammer2_debug & 0x4000) - kprintf("keep moved %p.%d %016jx/%d\n", - child, child->bref.type, - child->bref.key, child->bref.keybits); + /* + * Cleanup flags on last parent iterated for flush. + */ + if (info->domodify & 2) { + if (child->flags & HAMMER2_CHAIN_FLUSH_CREATE) { + atomic_clear_int(&child->flags, + HAMMER2_CHAIN_FLUSH_CREATE); + hammer2_chain_drop(child); + } + if ((child->flags & HAMMER2_CHAIN_FLUSH_DELETE) && + child->delete_tid <= trans->sync_tid) { + KKASSERT((child->flags & HAMMER2_CHAIN_ONDBTREE) == 0); + KKASSERT((child->flags & HAMMER2_CHAIN_BMAPPED) == 0); + atomic_clear_int(&child->flags, + HAMMER2_CHAIN_FLUSH_DELETE); + hammer2_chain_drop(child); } } @@ -1647,43 +1499,9 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) /* * The parent may have been delete-duplicated. */ - info->parent = parent; -finalize: return (0); } -/* - * Update core->update_lo and attempt to clear the MOVED bit - * for its children. - * - * This routine is only called after a sub-tree has been fully flushed - * up to the current flush synchronization point. Calling it under any - * other condition will blow up flush tracking. - */ -static -void -hammer2_flush_core_update(hammer2_chain_core_t *core, - hammer2_flush_info_t *info) -{ - hammer2_chain_layer_t *layer; - - spin_lock(&core->cst.spin); - if (core->update_lo < info->sync_tid) - core->update_lo = info->sync_tid; - TAILQ_FOREACH_REVERSE(layer, &core->layerq, - h2_layer_list, entry) { - info->pass = 3; - ++layer->refs; - KKASSERT(layer->good == 0xABCD); - RB_SCAN(hammer2_chain_tree, &layer->rbtree, - NULL, hammer2_chain_flush_scan2, info); - --layer->refs; - KKASSERT(info->parent->core == core); - } - spin_unlock(&core->cst.spin); -} - -static void hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) { diff --git a/sys/vfs/hammer2/hammer2_freemap.c b/sys/vfs/hammer2/hammer2_freemap.c index d10bf9ef99..cfd7d654c8 100644 --- a/sys/vfs/hammer2/hammer2_freemap.c +++ b/sys/vfs/hammer2/hammer2_freemap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -45,6 +45,8 @@ #include "hammer2.h" +#define FREEMAP_DEBUG 0 + struct hammer2_fiterate { hammer2_off_t bpref; hammer2_off_t bnext; @@ -203,8 +205,9 @@ hammer2_freemap_reserve(hammer2_trans_t *trans, hammer2_chain_t *chain, break; } bref->data_off = off | radix; -#if 0 - kprintf("-> %016jx\n", bref->data_off); +#if FREEMAP_DEBUG + kprintf("FREEMAP BLOCK TYPE %d %016jx/%d DATA_OFF=%016jx\n", + bref->type, bref->key, bref->keybits, bref->data_off); #endif return (0); } @@ -894,6 +897,12 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, /* XXX handle error */ } +#if FREEMAP_DEBUG + kprintf("FREEMAP ADJUST TYPE %d %016jx/%d DATA_OFF=%016jx\n", + chain->bref.type, chain->bref.key, + chain->bref.keybits, chain->bref.data_off); +#endif + /* * Calculate the bitmask (runs in 2-bit pairs). */ @@ -917,12 +926,15 @@ hammer2_freemap_adjust(hammer2_trans_t *trans, hammer2_mount_t *hmp, /* * [re]load the bmap and bitmap pointers. Each bmap entry covers * a 2MB swath. The bmap itself (LEVEL1) covers 2GB. + * + * Be sure to reset the linear iterator to ensure that the adjustment + * is not ignored. */ again: bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) & (HAMMER2_FREEMAP_COUNT - 1)]; bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7]; - + bmap->linear = 0; while (count) { KKASSERT(bmmask11); diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index d295947b9b..37f7dd3684 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -41,6 +41,8 @@ #include "hammer2.h" +#define INODE_DEBUG 0 + static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans, hammer2_chain_t **chainp, hammer2_tid_t inum); @@ -619,6 +621,10 @@ retry: HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); } +#if INODE_DEBUG + kprintf("CREATE INODE %*.*s chain=%p\n", + (int)name_len, (int)name_len, name, chain); +#endif /* * Cleanup and handle retries. diff --git a/sys/vfs/hammer2/hammer2_io.c b/sys/vfs/hammer2/hammer2_io.c index f2e2db6608..1b52ed5e2f 100644 --- a/sys/vfs/hammer2/hammer2_io.c +++ b/sys/vfs/hammer2/hammer2_io.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c index 76c9724bf6..05bedfcfb0 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.c +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_ioctl.h b/sys/vfs/hammer2/hammer2_ioctl.h index dafa4f3c9a..82b8778348 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.h +++ b/sys/vfs/hammer2/hammer2_ioctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_mount.h b/sys/vfs/hammer2/hammer2_mount.h index 23a169232b..e3c4a73e40 100644 --- a/sys/vfs/hammer2/hammer2_mount.h +++ b/sys/vfs/hammer2/hammer2_mount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_msgops.c b/sys/vfs/hammer2/hammer2_msgops.c index 7d9cabd5ad..e7e7042695 100644 --- a/sys/vfs/hammer2/hammer2_msgops.c +++ b/sys/vfs/hammer2/hammer2_msgops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2012-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index 6aa96142b3..5c934a45c7 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 0edeacdd89..e94eb3d0fd 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -82,6 +82,7 @@ int hammer2_debug; int hammer2_cluster_enable = 1; int hammer2_hardlink_enable = 1; int hammer2_flush_pipe = 100; +int hammer2_synchronous_flush = 0; long hammer2_limit_dirty_chains; long hammer2_iod_file_read; long hammer2_iod_meta_read; @@ -120,6 +121,8 @@ SYSCTL_INT(_vfs_hammer2, OID_AUTO, hardlink_enable, CTLFLAG_RW, &hammer2_hardlink_enable, 0, ""); SYSCTL_INT(_vfs_hammer2, OID_AUTO, flush_pipe, CTLFLAG_RW, &hammer2_flush_pipe, 0, ""); +SYSCTL_INT(_vfs_hammer2, OID_AUTO, synchronous_flush, CTLFLAG_RW, + &hammer2_synchronous_flush, 0, ""); SYSCTL_LONG(_vfs_hammer2, OID_AUTO, limit_dirty_chains, CTLFLAG_RW, &hammer2_limit_dirty_chains, 0, ""); @@ -532,10 +535,16 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, return error; } + /* + * Really important to get these right or flush will get + * confused. + */ hmp->vchain.bref.mirror_tid = hmp->voldata.mirror_tid; hmp->vchain.modify_tid = hmp->voldata.mirror_tid; + hmp->vchain.update_lo = hmp->voldata.mirror_tid; hmp->fchain.bref.mirror_tid = hmp->voldata.freemap_tid; hmp->fchain.modify_tid = hmp->voldata.freemap_tid; + hmp->fchain.update_lo = hmp->voldata.freemap_tid; /* * First locate the super-root inode, which is key 0 @@ -1519,8 +1528,8 @@ hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp) hammer2_voldata_lock(hmp); if (((hmp->vchain.flags | hmp->fchain.flags) & HAMMER2_CHAIN_MODIFIED) || - hmp->vchain.core->update_hi > hmp->voldata.mirror_tid || - hmp->fchain.core->update_hi > hmp->voldata.freemap_tid) { + hmp->vchain.update_hi > hmp->voldata.mirror_tid || + hmp->fchain.update_hi > hmp->voldata.freemap_tid) { hammer2_voldata_unlock(hmp, 0); hammer2_vfs_sync(mp, MNT_WAIT); /*hammer2_vfs_sync(mp, MNT_WAIT);*/ @@ -1530,20 +1539,20 @@ hammer2_vfs_unmount_hmp1(struct mount *mp, hammer2_mount_t *hmp) if (hmp->pmp_count == 0) { if (((hmp->vchain.flags | hmp->fchain.flags) & HAMMER2_CHAIN_MODIFIED) || - (hmp->vchain.core->update_hi > + (hmp->vchain.update_hi > hmp->voldata.mirror_tid) || - (hmp->fchain.core->update_hi > + (hmp->fchain.update_hi > hmp->voldata.freemap_tid)) { kprintf("hammer2_unmount: chains left over " "after final sync\n"); kprintf(" vchain %08x update_hi %jx/%jx\n", hmp->vchain.flags, hmp->voldata.mirror_tid, - hmp->vchain.core->update_hi); + hmp->vchain.update_hi); kprintf(" fchain %08x update_hi %jx/%jx\n", hmp->fchain.flags, hmp->voldata.freemap_tid, - hmp->fchain.core->update_hi); + hmp->fchain.update_hi); if (hammer2_debug & 0x0010) Debugger("entered debugger"); @@ -1576,8 +1585,7 @@ hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp) vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0); hmp->devvp = NULL; - VOP_CLOSE(devvp, - (ronly ? FREAD : FREAD|FWRITE)); + VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL); vn_unlock(devvp); vrele(devvp); devvp = NULL; @@ -1598,9 +1606,9 @@ hammer2_vfs_unmount_hmp2(struct mount *mp, hammer2_mount_t *hmp) * rot). */ dumpcnt = 50; - hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt); + hammer2_dump_chain(&hmp->vchain, 0, &dumpcnt, 'v'); dumpcnt = 50; - hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt); + hammer2_dump_chain(&hmp->fchain, 0, &dumpcnt, 'f'); hammer2_mount_unlock(hmp); hammer2_chain_drop(&hmp->vchain); @@ -1838,7 +1846,7 @@ hammer2_recovery_scan(hammer2_trans_t *trans, hammer2_mount_t *hmp, HAMMER2_LOOKUP_NODATA); while (chain) { atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); - if (chain->bref.mirror_tid >= hmp->voldata.mirror_tid) { + if (chain->bref.mirror_tid >= hmp->voldata.alloc_tid - 1) { error = hammer2_recovery_scan(trans, hmp, chain, list, depth + 1); if (error) @@ -1936,21 +1944,25 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) */ #if 1 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); + kprintf("sync tid test fmap %016jx %016jx\n", + hmp->fchain.update_hi, hmp->voldata.freemap_tid); if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || - hmp->fchain.core->update_hi > hmp->voldata.freemap_tid) { + hmp->fchain.update_hi > hmp->voldata.freemap_tid) { /* this will also modify vchain as a side effect */ chain = &hmp->fchain; - hammer2_chain_flush(&info.trans, &chain); + hammer2_flush(&info.trans, &chain); KKASSERT(chain == &hmp->fchain); } hammer2_chain_unlock(&hmp->fchain); #endif hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); + kprintf("sync tid test vmap %016jx %016jx\n", + hmp->vchain.update_hi, hmp->voldata.mirror_tid); if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) || - hmp->vchain.core->update_hi > hmp->voldata.mirror_tid) { + hmp->vchain.update_hi > hmp->voldata.mirror_tid) { chain = &hmp->vchain; - hammer2_chain_flush(&info.trans, &chain); + hammer2_flush(&info.trans, &chain); KKASSERT(chain == &hmp->vchain); force_fchain = 1; } else { @@ -1961,11 +1973,11 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) #if 0 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || - hmp->fchain.core->update_hi > hmp->voldata.freemap_tid || + hmp->fchain.update_hi > hmp->voldata.freemap_tid || force_fchain) { /* this will also modify vchain as a side effect */ chain = &hmp->fchain; - hammer2_chain_flush(&info.trans, &chain); + hammer2_flush(&info.trans, &chain); KKASSERT(chain == &hmp->fchain); } hammer2_chain_unlock(&hmp->fchain); @@ -2039,12 +2051,7 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) /* * Sync passes. - * - * NOTE: We don't test update_lo/update_hi or MOVED here because the fsync - * code won't flush on those flags. The syncer code above will do a - * general meta-data flush globally that will catch these flags. */ - static int hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) { @@ -2091,7 +2098,7 @@ hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) * below. */ parent = hammer2_inode_lock_ex(ip); - hammer2_chain_flush(&info->trans, &parent); + hammer2_flush(&info->trans, &parent); hammer2_inode_unlock_ex(ip, parent); #endif hammer2_inode_drop(ip); @@ -2460,9 +2467,8 @@ hammer2_lwinprog_wait(hammer2_pfsmount_t *pmp) } void -hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) +hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) { - hammer2_chain_layer_t *layer; hammer2_chain_t *scan; hammer2_chain_t *first_parent; @@ -2474,44 +2480,49 @@ hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp) if (*countp < 0) return; first_parent = chain->core ? TAILQ_FIRST(&chain->core->ownerq) : NULL; - kprintf("%*.*schain %p.%d %016jx/%d mir=%016jx\n", - tab, tab, "", + kprintf("%*.*s%c-chain %p.%d %016jx/%d mir=%016jx\n", + tab, tab, "", pfx, chain, chain->bref.type, chain->bref.key, chain->bref.keybits, chain->bref.mirror_tid); - kprintf("%*.*s [%08x] (%s) dt=%016jx refs=%d\n", + kprintf("%*.*s [%08x] (%s) mod=%016jx del=%016jx " + "lo=%08jx hi=%08jx refs=%d\n", tab, tab, "", chain->flags, ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) ? (char *)chain->data->ipdata.filename : "?"), + chain->modify_tid, chain->delete_tid, + chain->update_lo, + chain->update_hi, chain->refs); - kprintf("%*.*s core %p [%08x] lo=%08jx hi=%08jx fp=%p np=%p", + kprintf("%*.*s core %p [%08x]", tab, tab, "", - chain->core, (chain->core ? chain->core->flags : 0), - (chain->core ? chain->core->update_lo : -1), - (chain->core ? chain->core->update_hi : -1), - first_parent, - (first_parent ? TAILQ_NEXT(chain, core_entry) : NULL)); + chain->core, (chain->core ? chain->core->flags : 0)); if (first_parent) - kprintf(" [fpflags %08x fprefs %d\n", + kprintf("\n%*.*s fp=%p np=%p [fpflags %08x fprefs %d", + tab, tab, "", + first_parent, + (first_parent ? TAILQ_NEXT(first_parent, core_entry) : + NULL), first_parent->flags, first_parent->refs); - if (chain->core == NULL || TAILQ_EMPTY(&chain->core->layerq)) + if (chain->core == NULL || RB_EMPTY(&chain->core->rbtree)) kprintf("\n"); else kprintf(" {\n"); if (chain->core) { - TAILQ_FOREACH(layer, &chain->core->layerq, entry) { - RB_FOREACH(scan, hammer2_chain_tree, &layer->rbtree) { - hammer2_dump_chain(scan, tab + 4, countp); - } - } + RB_FOREACH(scan, hammer2_chain_tree, &chain->core->rbtree) + hammer2_dump_chain(scan, tab + 4, countp, 'a'); + RB_FOREACH(scan, hammer2_chain_tree, &chain->core->dbtree) + hammer2_dump_chain(scan, tab + 4, countp, 'r'); + TAILQ_FOREACH(scan, &chain->core->dbq, db_entry) + hammer2_dump_chain(scan, tab + 4, countp, 'd'); } - if (chain->core && !TAILQ_EMPTY(&chain->core->layerq)) { + if (chain->core && !RB_EMPTY(&chain->core->rbtree)) { if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) kprintf("%*.*s}(%s)\n", tab, tab, "", chain->data->ipdata.filename); diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 38cfb1ca81..1d0b3214ca 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. + * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -305,9 +305,6 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) return(0); /* - * Set update_hi so we can detect and propagate the DELETED - * bit in the flush code. - * * ip->chain might be stale, correct it before checking as older * versions of the chain are likely marked deleted even if the * file hasn't been. XXX ip->chain should never be stale on @@ -327,11 +324,7 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) * * HAMMER2 usually does not try to optimize the freemap by returning * deleted blocks to it as it does not usually know how many snapshots - * might be referencing portions of the file/dir. XXX TODO. - * - * XXX TODO - However, any modified file as-of when a snapshot is made - * cannot use this optimization as some of the modifications - * may wind up being part of the snapshot. + * might be referencing portions of the file/dir. */ vp->v_data = NULL; ip->vp = NULL; @@ -341,11 +334,6 @@ hammer2_vop_reclaim(struct vop_reclaim_args *ap) hammer2_trans_init(&trans, ip->pmp, NULL, HAMMER2_TRANS_BUFCACHE); hammer2_chain_delete(&trans, chain, 0); - hammer2_chain_setsubmod(&trans, chain); - spin_lock(&chain->core->cst.spin); - if (chain->core->update_hi < trans.sync_tid) - chain->core->update_hi = trans.sync_tid; /* needed? */ - spin_unlock(&chain->core->cst.spin); hammer2_trans_done(&trans); } @@ -408,7 +396,7 @@ hammer2_vop_fsync(struct vop_fsync_args *ap) * XXX creates discontinuity w/modify_tid */ if (ap->a_flags & VOP_FSYNC_SYSCALL) { - hammer2_chain_flush(&trans, &chain); + hammer2_flush(&trans, &chain); } #endif hammer2_inode_unlock_ex(ip, chain); diff --git a/sys/vfs/hammer2/mkvntest b/sys/vfs/hammer2/mkvntest deleted file mode 100755 index fad5923c7e..0000000000 --- a/sys/vfs/hammer2/mkvntest +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/csh -# - -vnconfig -u vn0 >& /dev/null -rm -f /usr/obj/hammer2.img -truncate -s 1G /usr/obj/hammer2.img -newfs_hammer2 -L ROOT /usr/obj/hammer2.img -vnconfig -c vn0 /usr/obj/hammer2.img - -echo "hammer2.img on /dev/vn0" -- 2.41.0