From 9797e933b125aaafa99bf816ad3e1c9e0143395f Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 3 May 2013 01:28:00 -0700 Subject: [PATCH] hammer2 - Major restructuring, part 4/several * Add inumber -> inode structure tracking and lookup. This is needed to ensure that only a single inode structure be used to track multiple hardlinks to the same place. * Continue stabilization. Remove modify_tid/delete_tid checks in the flush code and (for now) only flush along the live path. Refactor held chains when creating new chains. The creation of a new chain can move around existing chains, causing the held chain to be marked deleted. When a hardlink is consolidated in a parent directory the source chain used in the duplication is not deleted. Numerous chain->duplink handling code was assuming that the source was always deleted. Fix that. * The shared inode lock now refactors ip->chain (the exclusive inode lock already did so). * Fix most ref-counting of the chain structure, fixing most of the memory leakage issues on unmount. * There are still some issues with small files not inheriting their data on duplication. cpdup /usr/share works but a significant number of small files lose their data references on re-mount. --- sys/vfs/hammer2/hammer2.h | 30 ++- sys/vfs/hammer2/hammer2_ccms.c | 9 +- sys/vfs/hammer2/hammer2_ccms.h | 3 +- sys/vfs/hammer2/hammer2_chain.c | 162 ++++++++------- sys/vfs/hammer2/hammer2_flush.c | 130 ++++++++++-- sys/vfs/hammer2/hammer2_inode.c | 341 +++++++++++++++++++++---------- sys/vfs/hammer2/hammer2_ioctl.c | 3 +- sys/vfs/hammer2/hammer2_subr.c | 43 +++- sys/vfs/hammer2/hammer2_vfsops.c | 43 +++- sys/vfs/hammer2/hammer2_vnops.c | 36 ++-- 10 files changed, 567 insertions(+), 233 deletions(-) diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 432db1fa17..c62d5ddaa2 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -243,6 +243,8 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); * detached. */ +RB_HEAD(hammer2_inode_tree, hammer2_inode); + /* * A hammer2 inode. * @@ -250,6 +252,7 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); * is embedded in the chain (chain.cst) and aliased w/ attr_cst. */ struct hammer2_inode { + RB_ENTRY(hammer2_inode) rbnode; /* inumber lookup (HL) */ ccms_cst_t topo_cst; /* directory topology cst */ struct hammer2_mount *hmp; /* Global mount */ struct hammer2_pfsmount *pmp; /* PFS mount */ @@ -257,6 +260,7 @@ struct hammer2_inode { struct vnode *vp; hammer2_chain_t *chain; /* NOTE: rehomed on rename */ struct lockf advlock; + hammer2_tid_t inum; u_int flags; u_int refs; /* +vpref, +flushref */ }; @@ -266,6 +270,11 @@ typedef struct hammer2_inode hammer2_inode_t; #define HAMMER2_INODE_MODIFIED 0x0001 #define HAMMER2_INODE_DIRTYEMBED 0x0002 #define HAMMER2_INODE_RENAME_INPROG 0x0004 +#define HAMMER2_INODE_ONRBTREE 0x0008 + +int hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2); +RB_PROTOTYPE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, + hammer2_tid_t); /* * A hammer2 transaction placeholder. @@ -338,6 +347,8 @@ struct hammer2_pfsmount { int ronly; /* read-only mount */ struct malloc_type *mmsg; kdmsg_iocom_t iocom; + struct spinlock inum_spin; /* inumber lookup */ + struct hammer2_inode_tree inum_tree; }; typedef struct hammer2_pfsmount hammer2_pfsmount_t; @@ -400,8 +411,9 @@ void hammer2_inode_unlock_sh(hammer2_inode_t *ip); void hammer2_voldata_lock(hammer2_mount_t *hmp); void hammer2_voldata_unlock(hammer2_mount_t *hmp, int modify); ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip); +void hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate); ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip); -void hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate); +void hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate); void hammer2_mount_exlock(hammer2_mount_t *hmp); void hammer2_mount_shlock(hammer2_mount_t *hmp); @@ -429,6 +441,8 @@ struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp); void hammer2_inode_lock_nlinks(hammer2_inode_t *ip); void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip); +hammer2_inode_t *hammer2_inode_lookup(hammer2_pfsmount_t *pmp, + hammer2_tid_t inum); hammer2_inode_t *hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, hammer2_chain_t *chain); @@ -445,18 +459,15 @@ hammer2_inode_t *hammer2_inode_create(hammer2_trans_t *trans, struct vattr *vap, struct ucred *cred, const uint8_t *name, size_t name_len, int *errorp); -hammer2_chain_t *hammer2_inode_duplicate(hammer2_trans_t *trans, - hammer2_chain_t *ochain, - hammer2_inode_t *dip, int *errorp); -int hammer2_inode_connect(hammer2_trans_t *trans, - hammer2_inode_t *dip, - hammer2_chain_t **chainp, +int hammer2_inode_connect(hammer2_trans_t *trans, int hlink, + hammer2_inode_t *dip, hammer2_chain_t **chainp, const uint8_t *name, size_t name_len); hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip); int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, - const uint8_t *name, size_t name_len, int isdir); + const uint8_t *name, size_t name_len, int isdir, + int *hlinkp); int hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip, hammer2_chain_t **chainp, hammer2_inode_t *tdip, int linkcnt); @@ -502,7 +513,7 @@ hammer2_chain_t *hammer2_chain_next(hammer2_chain_t **parentp, hammer2_key_t key_beg, hammer2_key_t key_end, int flags); int hammer2_chain_create(hammer2_trans_t *trans, - hammer2_chain_t *parent, + hammer2_chain_t **parentp, hammer2_chain_t **chainp, hammer2_key_t key, int keybits, int type, size_t bytes); @@ -540,6 +551,7 @@ int hammer2_msg_adhoc_input(kdmsg_msg_t *msg); void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom); void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index); void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp); +void hammer2_dump_chain(hammer2_chain_t *chain, int tab); /* * hammer2_freemap.c diff --git a/sys/vfs/hammer2/hammer2_ccms.c b/sys/vfs/hammer2/hammer2_ccms.c index f86171312e..81850f9ebf 100644 --- a/sys/vfs/hammer2/hammer2_ccms.c +++ b/sys/vfs/hammer2/hammer2_ccms.c @@ -270,6 +270,12 @@ ccms_thread_lock_temp_release(ccms_cst_t *cst) return (CCMS_STATE_INVALID); } +void +ccms_thread_lock_temp_restore(ccms_cst_t *cst, ccms_state_t ostate) +{ + ccms_thread_lock(cst, ostate); +} + /* * Temporarily upgrade a thread lock for making local structural changes. * No new shared or exclusive locks can be acquired by others while we are @@ -308,7 +314,7 @@ ccms_thread_lock_upgrade(ccms_cst_t *cst) } void -ccms_thread_lock_restore(ccms_cst_t *cst, ccms_state_t ostate) +ccms_thread_lock_downgrade(ccms_cst_t *cst, ccms_state_t ostate) { if (ostate == CCMS_STATE_SHARED) { KKASSERT(cst->td == curthread); @@ -325,6 +331,7 @@ ccms_thread_lock_restore(ccms_cst_t *cst, ccms_state_t ostate) spin_unlock(&cst->spin); } } + /* else nothing to do if excl->excl */ } /* diff --git a/sys/vfs/hammer2/hammer2_ccms.h b/sys/vfs/hammer2/hammer2_ccms.h index 0b010c9fa9..5ade494a5e 100644 --- a/sys/vfs/hammer2/hammer2_ccms.h +++ b/sys/vfs/hammer2/hammer2_ccms.h @@ -236,8 +236,9 @@ void ccms_cst_uninit(ccms_cst_t *cst); void ccms_thread_lock(ccms_cst_t *cst, ccms_state_t state); int ccms_thread_lock_nonblock(ccms_cst_t *cst, ccms_state_t state); ccms_state_t ccms_thread_lock_temp_release(ccms_cst_t *cst); +void ccms_thread_lock_temp_restore(ccms_cst_t *cst, ccms_state_t ostate); ccms_state_t ccms_thread_lock_upgrade(ccms_cst_t *cst); -void ccms_thread_lock_restore(ccms_cst_t *cst, ccms_state_t ostate); +void ccms_thread_lock_downgrade(ccms_cst_t *cst, ccms_state_t ostate); void ccms_thread_unlock(ccms_cst_t *cst); int ccms_thread_unlock_zero(ccms_cst_t *cst); int ccms_thread_lock_owned(ccms_cst_t *cst); diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 17e3653a0a..80e7ae490b 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -124,8 +124,13 @@ hammer2_chain_parent_setsubmod(hammer2_chain_t *chain) while ((parent = chain->parent) != NULL) { core = parent->core; spin_lock(&core->cst.spin); - while (parent->duplink) + /* + * XXX flush synchronization + */ + while (parent->duplink && + (parent->flags & HAMMER2_CHAIN_DELETED)) { parent = parent->duplink; + } if (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) { spin_unlock(&core->cst.spin); break; @@ -363,6 +368,7 @@ hammer2_chain_t * hammer2_chain_lastdrop(hammer2_chain_t *chain) { hammer2_chain_t *parent; + hammer2_chain_t *tmp; hammer2_chain_core_t *parent_core; parent = chain->parent; @@ -375,9 +381,9 @@ hammer2_chain_lastdrop(hammer2_chain_t *chain) atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); chain->parent = NULL; /* NULL field, must drop implied ref */ spin_unlock(&parent_core->cst.spin); - if (chain->duplink) { - hammer2_chain_drop(chain->duplink); + if ((tmp = chain->duplink) != NULL) { chain->duplink = NULL; + hammer2_chain_drop(tmp); } hammer2_chain_dealloc(chain); chain = parent; /* recursively drop parent */ @@ -493,7 +499,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) */ ostate = ccms_thread_lock_upgrade(&core->cst); if (chain->data) { - ccms_thread_lock_restore(&core->cst, ostate); + ccms_thread_lock_downgrade(&core->cst, ostate); return (0); } @@ -540,7 +546,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) (intmax_t)pbase, error); bqrelse(chain->bp); chain->bp = NULL; - ccms_thread_lock_restore(&core->cst, ostate); + ccms_thread_lock_downgrade(&core->cst, ostate); return (error); } @@ -611,7 +617,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) */ if (chain->bp) BUF_KERNPROC(chain->bp); - ccms_thread_lock_restore(&core->cst, ostate); + ccms_thread_lock_downgrade(&core->cst, ostate); return (0); } @@ -764,16 +770,10 @@ hammer2_chain_unlock(hammer2_chain_t *chain) * exclusively locked chain at the same index and unlocks the old chain. * Flushes the buffer if necessary. * - * If you want the resize code to copy the data to the new block then the - * caller should lock the chain RESOLVE_MAYBE or RESOLVE_ALWAYS. - * - * If the caller already holds a logical buffer containing the data and - * intends to bdwrite() that buffer resolve with RESOLVE_NEVER. The resize - * operation will then not copy the (stale) data from the media. - * * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order * to avoid instantiating a device buffer that conflicts with the vnode - * data buffer. + * data buffer. That is, the passed-in bp is a logical buffer, whereas + * any chain-oriented bp would be a device buffer. * * XXX flags currently ignored, uses chain->bp to detect data/no-data. * XXX return error if cannot resize. @@ -786,14 +786,16 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, { hammer2_mount_t *hmp = trans->hmp; hammer2_chain_t *chain = *chainp; +#if 0 struct buf *nbp; + char *bdata; + int error; +#endif hammer2_off_t pbase; size_t obytes; size_t nbytes; size_t bbytes; int boff; - char *bdata; - int error; /* * Only data and indirect blocks can be resized for now. @@ -838,8 +840,10 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); hammer2_chain_ref(chain); } else { +#if 0 hammer2_freemap_free(hmp, chain->bref.data_off, chain->bref.type); +#endif } /* @@ -859,9 +863,14 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1); boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1); + KKASSERT(chain->bp == NULL); +#if 0 /* * Only copy the data if resolved, otherwise the caller is * responsible. + * + * XXX handle device-buffer resizing case too. Right now we + * only handle logical buffer resizing. */ if (chain->bp) { KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || @@ -911,6 +920,7 @@ hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, chain->data = (void *)bdata; hammer2_chain_modify(trans, chain, 0); } +#endif /* * Make sure the chain is marked MOVED and SUBMOD is set in the @@ -1450,16 +1460,20 @@ hammer2_chain_lookup(hammer2_chain_t **parentp, ((hammer2_key_t)1 << parent->bref.keybits) - 1; if (key_beg >= scan_beg && key_end <= scan_end) break; - hammer2_chain_ref(parent); /* ref old parent */ - hammer2_chain_unlock(parent); /* unlock old parent */ - parent = parent->parent; - while (parent->duplink) - parent = parent->duplink; - - /* lock new parent */ - hammer2_chain_lock(parent, how_maybe); - hammer2_chain_drop(*parentp); /* drop old parent */ - *parentp = parent; /* new parent */ + /* + * XXX flush synchronization + */ + tmp = parent->parent; + while (tmp->duplink && + (tmp->flags & HAMMER2_CHAIN_DELETED)) { + tmp = tmp->duplink; + } + hammer2_chain_ref(tmp); /* ref new parent */ + hammer2_chain_unlock(parent); /* unlock old parent */ + /* lock new parent */ + hammer2_chain_lock(tmp, how_maybe | + HAMMER2_RESOLVE_NOREF); + *parentp = parent = tmp; /* new parent */ } again: @@ -1526,8 +1540,10 @@ again: for (i = 0; i < count; ++i) { tmp = hammer2_chain_find(parent, i); if (tmp) { - if (tmp->flags & HAMMER2_CHAIN_DELETED) + if (tmp->flags & HAMMER2_CHAIN_DELETED) { + hammer2_chain_drop(tmp); continue; + } bref = &tmp->bref; KKASSERT(bref->type != 0); } else if (base == NULL || base[i].type == 0) { @@ -1576,8 +1592,8 @@ again: hammer2_chain_unlock(parent); *parentp = parent = chain; if (flags & HAMMER2_LOOKUP_NOLOCK) { - hammer2_chain_lock(chain, how_maybe); - hammer2_chain_drop(chain); /* excess ref */ + hammer2_chain_lock(chain, how_maybe | + HAMMER2_RESOLVE_NOREF); } else if ((flags & HAMMER2_LOOKUP_NODATA) && chain->data == NULL) { hammer2_chain_ref(chain); @@ -1662,8 +1678,6 @@ again: * Continue iteration with next parent unless the current * parent covers the range. */ - hammer2_chain_t *nparent; - scan_beg = parent->bref.key; scan_end = scan_beg + ((hammer2_key_t)1 << parent->bref.keybits) - 1; @@ -1671,15 +1685,20 @@ again: return (NULL); i = parent->index + 1; - nparent = parent->parent; - while (nparent->duplink) - nparent = nparent->duplink; - hammer2_chain_ref(nparent); /* ref new parent */ + /* + * XXX flush synchronization + */ + tmp = parent->parent; + while (tmp->duplink && + (tmp->flags & HAMMER2_CHAIN_DELETED)) { + tmp = tmp->duplink; + } + hammer2_chain_ref(tmp); /* ref new parent */ hammer2_chain_unlock(parent); /* unlock old parent */ /* lock new parent */ - hammer2_chain_lock(nparent, how_maybe); - hammer2_chain_drop(nparent); /* drop excess ref */ - *parentp = parent = nparent; + hammer2_chain_lock(tmp, how_maybe | + HAMMER2_RESOLVE_NOREF); + *parentp = parent = tmp; } again2: @@ -1732,6 +1751,7 @@ again2: tmp = hammer2_chain_find(parent, i); if (tmp) { if (tmp->flags & HAMMER2_CHAIN_DELETED) { + hammer2_chain_drop(tmp); ++i; continue; } @@ -1781,8 +1801,8 @@ again2: *parentp = parent = chain; chain = NULL; if (flags & HAMMER2_LOOKUP_NOLOCK) { - hammer2_chain_lock(parent, how_maybe); - hammer2_chain_drop(parent); /* excess ref */ + hammer2_chain_lock(parent, how_maybe | + HAMMER2_RESOLVE_NOREF); } else if ((flags & HAMMER2_LOOKUP_NODATA) && parent->data == NULL) { hammer2_chain_ref(parent); @@ -1834,16 +1854,16 @@ again2: * Requires an exclusively locked parent. */ int -hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t *parent, +hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, hammer2_chain_t **chainp, hammer2_key_t key, int keybits, int type, size_t bytes) { hammer2_mount_t *hmp; hammer2_chain_t *chain; hammer2_chain_t *child; + hammer2_chain_t *parent = *parentp; hammer2_blockref_t dummy; hammer2_blockref_t *base; - int unlock_parent = 0; int allocated = 0; int error = 0; int count; @@ -1995,10 +2015,8 @@ again: goto done; } if (parent != nparent) { - if (unlock_parent) - hammer2_chain_unlock(parent); - parent = nparent; - unlock_parent = 1; + hammer2_chain_unlock(parent); + parent = *parentp = nparent; } goto again; } @@ -2078,8 +2096,7 @@ again: done: *chainp = chain; - if (unlock_parent) - hammer2_chain_unlock(parent); + return (error); } @@ -2224,19 +2241,13 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent, int i, * Insert nchain at the end of the duplication list. */ hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER); + /* extra ref still present from original allocation */ - for (;;) { - for (scan = ochain; scan->duplink; scan = scan->duplink) - ; - spin_lock(&scan->core->cst.spin); - if (scan->duplink == NULL) { - /* inherits excess ref from alloc */ - scan->duplink = nchain; - spin_unlock(&scan->core->cst.spin); - break; - } - spin_unlock(&scan->core->cst.spin); - } + spin_lock(&ochain->core->cst.spin); + KKASSERT(nchain->duplink == NULL); + nchain->duplink = ochain->duplink; + ochain->duplink = nchain; /* inherits excess ref from alloc */ + spin_unlock(&ochain->core->cst.spin); hammer2_chain_unlock(ochain); *chainp = nchain; @@ -2285,15 +2296,21 @@ hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t *parent, int i, break; } KKASSERT(i >= 0 && i < count); - KKASSERT(base == NULL || base[i].type == 0); nchain->parent = parent; nchain->index = i; KKASSERT((nchain->flags & HAMMER2_CHAIN_DELETED) == 0); KKASSERT(parent->refs > 0); + spin_lock(&parent->core->cst.spin); - if (RB_INSERT(hammer2_chain_tree, &parent->core->rbtree, nchain)) + scan = hammer2_chain_find_locked(parent, i); + KKASSERT(base == NULL || base[i].type == 0 || + scan == NULL || + (scan->flags & HAMMER2_CHAIN_DELETED)); + if (RB_INSERT(hammer2_chain_tree, &parent->core->rbtree, + nchain)) { panic("hammer2_chain_link: collision"); + } atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONRBTREE); hammer2_chain_ref(parent); /* nchain->parent ref */ spin_unlock(&parent->core->cst.spin); @@ -2722,11 +2739,13 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, * Sets CHAIN_DELETED and CHAIN_MOVED in the chain being deleted and * set chain->delete_tid. * - * This function does NOT generate a modification to the parent. Such - * modifications are handled by the flush code and are properly merged - * using the flush synchronization point. The find/get code will - * properly overload the RBTREE check on top of the bref check to detect - * deleted entries prior to flush. + * This function does NOT generate a modification to the parent. It + * would be nearly impossible to figure out which parent to modify anyway. + * Such modifications are handled by the flush code and are properly merged + * using the flush synchronization point. + * + * The find/get code will properly overload the RBTREE check on top of + * the bref check to detect deleted entries. * * This function is NOT recursive. Any entity already pushed into the * chain (such as an inode) may still need visibility into its contents, @@ -2736,15 +2755,16 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, * NOTE: This function does NOT set chain->modify_tid, allowing future * code to distinguish between live and deleted chains by testing * sync_tid. + * + * NOTE: Deletions normally do not occur in the middle of a duplication + * chain but we use a trick for hardlink migration that refactors + * the originating inode without deleting it, so we make no assumptions + * here. */ void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent, hammer2_chain_t *chain) { - if (chain->parent->core != parent->core) - panic("hammer2_chain_delete: parent mismatch"); - if (parent->duplink) - panic("hammer2_chain_delete: deleting via stale path"); KKASSERT(ccms_thread_lock_owned(&parent->core->cst)); /* diff --git a/sys/vfs/hammer2/hammer2_flush.c b/sys/vfs/hammer2/hammer2_flush.c index 4e883c3090..ae43cca972 100644 --- a/sys/vfs/hammer2/hammer2_flush.c +++ b/sys/vfs/hammer2/hammer2_flush.c @@ -184,6 +184,10 @@ hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) */ info.diddeferral = 0; hammer2_chain_flush_core(&info, chain); +#if FLUSH_DEBUG + kprintf("flush_core_done parent= chain=%p.%d %08x\n", + chain, chain->bref.type, chain->flags); +#endif /* * Only loop if deep recursions have been deferred. @@ -241,6 +245,22 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) hmp = info->hmp; +#if FLUSH_DEBUG + if (info->parent) + kprintf("flush_core %p->%p.%d %08x (%s)\n", + info->parent, chain, chain->bref.type, + chain->flags, + ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? + chain->data->ipdata.filename : "?")); + else + kprintf("flush_core NULL->%p.%d %08x (%s)\n", + chain, chain->bref.type, + chain->flags, + ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? + chain->data->ipdata.filename : "?")); +#endif + +#if 0 /* * A chain modified beyond our flush point is ignored by the current * flush. We could safely flush such chains if we wanted to (they @@ -254,6 +274,7 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) */ if (chain->modify_tid > info->sync_tid) return; +#endif /* * Restrict the synchronization point when we encounter a @@ -264,8 +285,12 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) * only changes made through that deletion point. */ saved_sync = info->sync_tid; - if (chain->duplink && chain->delete_tid < saved_sync) +#if 0 + if (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED) && + chain->delete_tid < saved_sync) { info->sync_tid = chain->delete_tid; + } +#endif /* * If SUBMODIFIED is set we recurse the flush and adjust the @@ -348,10 +373,16 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) * * Scan2 is non-recursive. */ +#if FLUSH_DEBUG + kprintf("scan2_start parent %p %08x\n", chain, chain->flags); +#endif spin_lock(&chain->core->cst.spin); RB_SCAN(hammer2_chain_tree, &chain->core->rbtree, NULL, hammer2_chain_flush_scan2, info); spin_unlock(&chain->core->cst.spin); +#if FLUSH_DEBUG + kprintf("scan2_stop parent %p %08x\n", chain, chain->flags); +#endif chain->bref.mirror_tid = info->mirror_tid; info->mirror_tid = saved_mirror; info->parent = saved_parent; @@ -401,7 +432,10 @@ hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) * DESTROYED chains stop processing here. */ if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && + (chain->flags & HAMMER2_CHAIN_DELETED)) { +#if 0 (chain->delete_tid <= info->sync_tid)) { +#endif if (chain->flags & HAMMER2_CHAIN_MODIFIED) { if (chain->bp) chain->bp->b_flags |= B_INVAL|B_RELBUF; @@ -697,7 +731,10 @@ hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) * the destruction occurred after our flush sync_tid. */ if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && + (child->flags & HAMMER2_CHAIN_DELETED) && +#if 0 child->delete_tid <= info->sync_tid && +#endif (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { KKASSERT(child->duplink == NULL); atomic_set_int(&child->flags, @@ -711,6 +748,10 @@ hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) diddeferral = info->diddeferral; ++info->depth; hammer2_chain_flush_core(info, child); +#if FLUSH_DEBUG + kprintf("flush_core_done parent=%p flags=%08x child=%p.%d %08x\n", + parent, parent->flags, child, child->bref.type, child->flags); +#endif --info->depth; info->diddeferral += diddeferral; @@ -767,6 +808,39 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) int count; int child_flags; + /* + * Check update conditions prior to locking child. + * We may not be able to safely test the 64-bit TIDs + * but we can certainly test the flags. + * + * NOTE: DELETED always also sets MOVED. + */ +#if FLUSH_DEBUG + kprintf(" scan2 parent %p %08x child %p %08x ", parent, parent->flags, child, child->flags); +#endif + if (parent->flags & HAMMER2_CHAIN_DELETED) { +#if FLUSH_DEBUG + kprintf("A"); +#endif + child_flags = 0; + goto finalize; + } + + /* + * Inodes with stale children that have been converted to DIRECTDATA + * mode (file extension or hardlink conversion typically) need to + * skipped right now before we start messing with a non-existant + * block table. + */ + if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && + (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { +#if FLUSH_DEBUG + kprintf("B"); +#endif + child_flags = 0; + goto finalize; + } + /* * Ignore children modified beyond our flush point. If the parent * is deleted within our flush we don't have to re-set SUBMODIFIED, @@ -779,23 +853,24 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) * * XXX spin-lock on child->modify_tid ? */ +#if 0 if (child->modify_tid > info->sync_tid) { if (parent->delete_tid <= info->sync_tid) child_flags = 0; else child_flags = child->flags; +#if FLUSH_DEBUG + kprintf("C"); +#endif goto finalize; } +#endif - /* - * Check update conditions prior to locking child. - * We may not be able to safely test the 64-bit TIDs - * but we can certainly test the flags. - * - * NOTE: DELETED always also sets MOVED. - */ if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { child_flags = child->flags; +#if FLUSH_DEBUG + kprintf("D"); +#endif goto finalize; } @@ -815,12 +890,17 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) */ hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); +#if 0 if (child->parent != parent) { child_flags = child->flags; hammer2_chain_unlock(child); spin_lock(&parent->core->cst.spin); +#if FLUSH_DEBUG + kprintf("E"); +#endif goto finalize; } +#endif /* * The parent's blockref to the child must be deleted or updated. @@ -831,9 +911,6 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) * XXX recursive deletions not optimized. */ hammer2_chain_modify(info->trans, parent, HAMMER2_MODIFY_NO_MODIFY_TID); - if (parent->flags & HAMMER2_CHAIN_INITIAL) - kprintf("parent %p INITIAL still set! data %p\n", - parent, parent->data); switch(parent->bref.type) { case HAMMER2_BREF_TYPE_INODE: @@ -896,7 +973,10 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) * We adjust the parent's bref pointer to the child but * we do not modify the contents of the child. */ +#if 0 if (child->delete_tid <= info->sync_tid) { +#endif + if (child->flags & HAMMER2_CHAIN_DELETED) { if (base) { KKASSERT(child->index < count); bzero(&base[child->index], sizeof(child->bref)); @@ -924,20 +1004,29 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) hmp->voldata.mirror_tid = child->bref.mirror_tid; } +/*cleanup:*/ /* - * We can only clear the MOVED flag when the child has progressed + * Cleanup the children. Clear the MOVED flag + * + * XXXWe can only clear the MOVED flag when the child has progressed * to the last parent in the duplication chain. * - * MOVED might not be set if we are reflushing this chain due to + * XXXMOVED might not be set if we are reflushing this chain due to * the previous chain overwriting the same index in the parent. */ - if (child->parent == parent && parent->duplink) { +#if 0 + if (child->parent == parent && + parent->duplink && (parent->flags & HAMMER2_CHAIN_DELETED)) { hammer2_chain_ref(parent->duplink); child->parent = parent->duplink; child->modify_tid = child->parent->modify_tid; hammer2_chain_drop(parent); } - if ((child->flags & HAMMER2_CHAIN_MOVED) && parent->duplink == NULL) { +#endif + if (child->flags & HAMMER2_CHAIN_MOVED) { + atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); + hammer2_chain_drop(child); /* flag */ +#if 0 if (child->delete_tid == HAMMER2_MAX_TID) { atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); hammer2_chain_drop(child); /* flag */ @@ -945,6 +1034,7 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); hammer2_chain_drop(child); /* flag */ } +#endif } /* @@ -957,6 +1047,9 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) hammer2_chain_unlock(child); hammer2_chain_drop(child); spin_lock(&parent->core->cst.spin); +#if FLUSH_DEBUG + kprintf("F"); +#endif /* * The parent cleared SUBMODIFIED prior to the scan. If the child @@ -965,10 +1058,13 @@ hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) * up. */ finalize: +#if FLUSH_DEBUG + kprintf("G child %08x act=%08x\n", child_flags, child->flags); +#endif if (child_flags & (HAMMER2_CHAIN_MOVED | - HAMMER2_CHAIN_DELETED | + HAMMER2_CHAIN_DELETED /* | HAMMER2_CHAIN_MODIFIED | - HAMMER2_CHAIN_SUBMODIFIED)) { + HAMMER2_CHAIN_SUBMODIFIED*/)) { atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); } diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index 7bcdb64723..9ddececfb5 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -41,6 +41,36 @@ #include "hammer2.h" +RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, + hammer2_tid_t, inum); + +int +hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) +{ + if (ip1->inum < ip2->inum) + return(-1); + if (ip1->inum > ip2->inum) + return(1); + return(0); +} + +hammer2_inode_t * +hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum) +{ + hammer2_inode_t *ip; + + if (pmp) { + spin_lock(&pmp->inum_spin); + ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); + if (ip) + hammer2_inode_ref(ip); + spin_unlock(&pmp->inum_spin); + } else { + ip = NULL; + } + return(ip); +} + /* * Adding a ref to an inode is only legal if the inode already has at least * one ref. @@ -67,8 +97,26 @@ hammer2_inode_drop(hammer2_inode_t *ip) refs = ip->refs; cpu_ccfence(); if (refs == 1) { + /* + * Transition to zero, must interlock with + * the inode inumber lookup tree (if applicable). + * + * NOTE: The super-root inode has no pmp. + */ + if (ip->pmp) + spin_lock(&ip->pmp->inum_spin); + if (atomic_cmpset_int(&ip->refs, 1, 0)) { KKASSERT(ip->topo_cst.count == 0); + if (ip->flags & HAMMER2_INODE_ONRBTREE) { + atomic_clear_int(&ip->flags, + HAMMER2_INODE_ONRBTREE); + RB_REMOVE(hammer2_inode_tree, + &ip->pmp->inum_tree, + ip); + } + if (ip->pmp) + spin_unlock(&ip->pmp->inum_spin); hmp = ip->hmp; ip->hmp = NULL; @@ -87,8 +135,14 @@ hammer2_inode_drop(hammer2_inode_t *ip) kfree(ip, hmp->minode); ip = pip; /* continue with pip (can be NULL) */ + } else { + if (ip->pmp) + spin_unlock(&ip->pmp->inum_spin); } } else { + /* + * Non zero transition + */ if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) break; } @@ -138,10 +192,10 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) ostate = hammer2_inode_lock_temp_release(ip); if (vget(vp, LK_EXCLUSIVE)) { vdrop(vp); - hammer2_inode_lock_restore(ip, ostate); + hammer2_inode_lock_temp_restore(ip, ostate); continue; } - hammer2_inode_lock_restore(ip, ostate); + hammer2_inode_lock_temp_restore(ip, ostate); vdrop(vp); /* vp still locked and ref from vget */ if (ip->vp != vp) { @@ -174,7 +228,7 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) if (ip->vp != NULL) { vp->v_type = VBAD; vx_put(vp); - hammer2_inode_lock_restore(ip, ostate); + hammer2_inode_lock_downgrade(ip, ostate); continue; } @@ -211,7 +265,7 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) vp->v_data = ip; ip->vp = vp; hammer2_inode_ref(ip); /* vp association */ - hammer2_inode_lock_restore(ip, ostate); + hammer2_inode_lock_downgrade(ip, ostate); break; } @@ -227,17 +281,12 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp) /* * The passed-in chain must be locked and the returned inode will also be - * locked. A ref is added to both the chain and the inode. + * locked. A ref is added to both the chain and the inode. The chain lock + * is inherited by the inode structure and should not be separately released. * * The hammer2_inode structure regulates the interface between the high level * kernel VNOPS API and the filesystem backend (the chains). * - * NOTE! This routine allocates the hammer2_inode structure - * unconditionally, and thus there might be several which - * are associated with the same chain. Particularly for hardlinks - * but this can also happen temporarily for normal files and - * directories. - * * WARNING! This routine sucks up the chain's lock (makes it part of the * inode lock from the point of view of the inode lock API), * so callers need to be careful. @@ -250,11 +299,44 @@ hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, hammer2_chain_t *chain) { hammer2_inode_t *nip; + hammer2_chain_t *ochain; KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE); - nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO); + /* + * Interlocked lookup/ref of the inode. This code is only needed + * when looking up inodes with nlinks != 0 (TODO: optimize out + * otherwise and test for duplicates). + */ +again: + for (;;) { + nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum); + if (nip == NULL) + break; + ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); + if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { + ccms_thread_unlock(&nip->topo_cst); + hammer2_inode_drop(nip); + continue; + } + if (nip->chain != chain) { + hammer2_chain_ref(chain); /* new nip->chain */ + ochain = nip->chain; + nip->chain = chain; /* fully locked */ + hammer2_chain_drop(ochain); /* old nip->chain */ + } + /* + * Consolidated nip/nip->chain is locked (chain locked + * by caller). + */ + return nip; + } + /* + * We couldn't find the inode number, create a new inode. + */ + nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO); + nip->inum = chain->data->ipdata.inum; nip->chain = chain; hammer2_chain_ref(chain); /* nip->chain */ nip->pip = dip; /* can be NULL */ @@ -273,6 +355,22 @@ hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); /* combination of thread lock and chain lock == inode lock */ + /* + * Attempt to add the inode. If it fails we raced another inode + * get. Undo all the work and try again. + */ + if (pmp) { + spin_lock(&pmp->inum_spin); + if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) { + spin_unlock(&pmp->inum_spin); + ccms_thread_unlock(&nip->topo_cst); + hammer2_inode_drop(nip); + goto again; + } + atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE); + spin_unlock(&pmp->inum_spin); + } + return (nip); } @@ -353,6 +451,8 @@ hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, * Locate the inode or indirect block to create the new * entry in. At the same time check for key collisions * and iterate until we don't get one. + * + * NOTE: hidden inodes do not have iterators. */ retry: hammer2_inode_lock_ex(dip); @@ -376,7 +476,7 @@ retry: ++lhc; } if (error == 0) { - error = hammer2_chain_create(trans, parent, &chain, + error = hammer2_chain_create(trans, &parent, &chain, lhc, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); @@ -411,6 +511,7 @@ retry: * transaction. If the need arises we can adjust * hammer2_trans_init() to allow more. */ + chain->data->ipdata.inum = trans->sync_tid; nip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain); nipdata = &chain->data->ipdata; @@ -474,23 +575,55 @@ retry: } /* - * Create a duplicate of (ochain) in the specified target directory (dip). - * ochain must represent an inode. The new chain is returned locked and - * referenced and its filename is changed to a representation of the - * inode number "0xBLAH", as a hidden directory entry. + * chain may have been moved around by the create. + */ +static +void +hammer2_chain_refactor(hammer2_chain_t **chainp) +{ + hammer2_chain_t *chain = *chainp; + hammer2_chain_t *tmp; + + while (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) { + tmp = chain->duplink; + while (tmp->duplink && (tmp->flags & HAMMER2_CHAIN_DELETED)) + tmp = tmp->duplink; + hammer2_chain_ref(chain); + hammer2_chain_unlock(chain); + hammer2_chain_lock(tmp, HAMMER2_RESOLVE_ALWAYS); + hammer2_chain_drop(chain); + chain = tmp; + *chainp = chain; + } +} + + +/* + * ochain represents the target file inode. We need to move it to the + * specified common parent directory (dip) and rename it to a special + * invisible "0xINODENUMBER" filename. + * + * We use chain_duplicate and duplicate ochain at the new location, + * renaming it appropriately. We create a temporary chain and + * then delete it to placemark where the duplicate will go. Both of + * these use the inode number for (lhc) (the key), generating the + * invisible filename. */ +static hammer2_chain_t * -hammer2_inode_duplicate(hammer2_trans_t *trans, hammer2_chain_t *ochain, +hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **ochainp, hammer2_inode_t *dip, int *errorp) { hammer2_inode_data_t *nipdata; hammer2_mount_t *hmp; hammer2_chain_t *parent; - hammer2_chain_t *chain; - hammer2_chain_t *tmpchain; + hammer2_chain_t *ochain; + hammer2_chain_t *nchain; + hammer2_chain_t *tmp; hammer2_key_t lhc; hammer2_blockref_t bref; + ochain = *ochainp; *errorp = 0; hmp = dip->hmp; lhc = ochain->data->ipdata.inum; @@ -505,22 +638,30 @@ hammer2_inode_duplicate(hammer2_trans_t *trans, hammer2_chain_t *ochain, */ retry: parent = hammer2_chain_lookup_init(dip->chain, 0); - chain = hammer2_chain_lookup(&parent, lhc, lhc, 0); - if (chain) { - hammer2_chain_unlock(chain); - chain = NULL; + nchain = hammer2_chain_lookup(&parent, lhc, lhc, 0); + if (nchain) { + kprintf("X3 chain %p parent %p dip %p dip->chain %p\n", + nchain, parent, dip, dip->chain); + hammer2_chain_unlock(nchain); + nchain = NULL; *errorp = ENOSPC; +#if 0 + Debugger("X3"); +#endif } /* - * Create entry in common parent directory. + * Create entry in common parent directory using the seek position + * calculated above. */ if (*errorp == 0) { - KKASSERT(chain == NULL); - *errorp = hammer2_chain_create(trans, parent, &chain, + KKASSERT(nchain == NULL); + *errorp = hammer2_chain_create(trans, &parent, &nchain, lhc, 0, HAMMER2_BREF_TYPE_INODE,/* n/a */ HAMMER2_INODE_BYTES); /* n/a */ + hammer2_chain_refactor(&ochain); + *ochainp = ochain; } /* @@ -538,81 +679,64 @@ retry: * Handle the error case */ if (*errorp) { - KKASSERT(chain == NULL); + KKASSERT(nchain == NULL); hammer2_chain_lookup_done(parent); return (NULL); } /* - * Use chain as a placeholder for our duplication. + * Use chain as a placeholder for (lhc), delete it and replace + * it with our duplication. * * Gain a second lock on ochain for the duplication function to - * unlock, maintain the original lock across the calls. + * unlock, maintain the caller's original lock across the call. * * This is a bit messy. */ - hammer2_chain_delete(trans, parent, chain); + hammer2_chain_delete(trans, parent, nchain); hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS); - tmpchain = ochain; - bref = tmpchain->bref; - bref.key = lhc; /* key for create placeholder must match */ + tmp = ochain; + bref = tmp->bref; + bref.key = lhc; /* invisible dir entry key */ bref.keybits = 0; - hammer2_chain_duplicate(trans, parent, chain->index, &tmpchain, &bref); + hammer2_chain_duplicate(trans, parent, nchain->index, &tmp, &bref); hammer2_chain_lookup_done(parent); - - hammer2_chain_unlock(chain); - chain = tmpchain; - tmpchain = NULL; /* safety */ + hammer2_chain_unlock(nchain); /* no longer needed */ /* + * Now set chain to our duplicate and modify it appropriately. + * * Directory entries are inodes but this is a hidden hardlink * target. The name isn't used but to ease debugging give it * a name after its inode number. */ - hammer2_chain_modify(trans, chain, 0); - nipdata = &chain->data->ipdata; + nchain = tmp; + tmp = NULL; /* safety */ + + hammer2_chain_modify(trans, nchain, 0); + nipdata = &nchain->data->ipdata; ksnprintf(nipdata->filename, sizeof(nipdata->filename), "0x%016jx", (intmax_t)nipdata->inum); nipdata->name_len = strlen(nipdata->filename); nipdata->name_key = lhc; - return (chain); + return (nchain); } /* - * Connect the target inode to the media topology at (dip, name, len). - * This function creates a directory entry and replace (*chainp). - * - * The caller usually holds the related inode exclusive locked through this - * call and is also responsible for replacing ip->chain after we return. - * - * If (*chainp) was marked DELETED then it represents a terminus inode - * with no other nlinks, we can simply duplicate the chain (in-memory - * chain structures cannot be moved within the in-memory topology, only - * duplicated, but the duplicate uses the same bref). - * - * if (*chainp) is not marked DELETED then it represents a hardlink - * terminus which still has a non-zero nlink count. Instead of duplicating - * it (which would be like a snapshot), we need to create a - * OBJTYPE_HARDLINK directory entry which references (*chainp)'s inode - * number and bump (*chainp)'s nlinks. In this situation we return - * the terminus as *chainp. - * - * (*chainp) is adjusted if necessary and returned locked. If different, - * the original (*chainp) is unlocked. Note that the (*chainp) that is - * returned is always the hardlink terminus (the actual inode), which - * might reside in some parent directory. It will not be the - * OBJTYPE_HARDLINK pointer. + * Connect the target inode represented by (*chainp) to the media topology + * at (dip, name, len). * - * WARNING! The caller is likely holding ip/ip->chain locked exclusively. - * Replacing ip->chain here would create confusion so we leave - * it to the caller to do that. + * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory + * entry instead of connecting (*chainp). * - * (The caller is expected to hold the related inode exclusively) + * If hlink is FALSE this function uses chain_duplicate() to make a copy + * if (*chainp) in the directory entry. (*chainp) is likely to be deleted + * by the caller in this case (e.g. rename). */ int -hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, - hammer2_chain_t **chainp, +hammer2_inode_connect(hammer2_trans_t *trans, int hlink, + hammer2_inode_t *dip, hammer2_chain_t **chainp, const uint8_t *name, size_t name_len) { hammer2_inode_data_t *ipdata; @@ -622,7 +746,6 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, hammer2_chain_t *ochain; hammer2_key_t lhc; int error; - int hlink; hmp = dip->hmp; @@ -636,18 +759,6 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, parent = hammer2_chain_lookup_init(dip->chain, 0); lhc = hammer2_dirhash(name, name_len); - hlink = ((ochain->flags & HAMMER2_CHAIN_DELETED) == 0); - kprintf("reconnect hlink=%d name=%*.*s\n", - hlink, (int)name_len, (int)name_len, name); - - /* - * In fake mode flush oip so we can just snapshot it downbelow. - * A flush is not otherwise needed as the new chain inherits - * all active children of the old chain (they will share the same - * chain_core). - */ - if (hlink && hammer2_hardlink_enable < 0) - hammer2_chain_flush(trans, ochain); /* * Locate the inode or indirect block to create the new @@ -666,11 +777,6 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, ++lhc; } - /* - * Passing a non-NULL chain to hammer2_chain_create() reconnects the - * existing chain instead of creating a new one. The chain's bref - * will be properly updated. - */ if (error == 0) { if (hlink) { /* @@ -678,20 +784,22 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, * directory entry. */ KKASSERT(nchain == NULL); - error = hammer2_chain_create(trans, parent, &nchain, + error = hammer2_chain_create(trans, &parent, &nchain, lhc, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); + hammer2_chain_refactor(&ochain); } else { /* - * Original inode reconnected, duplicate as a - * new directory entry, leave unconnected and - * then call chain_create() to connect it. + * Reconnect the original chain and rename. Use + * chain_duplicate(). The caller will likely delete + * or has already deleted the original chain in + * this case. */ nchain = ochain; ochain = NULL; hammer2_chain_duplicate(trans, NULL, -1, &nchain, NULL); - error = hammer2_chain_create(trans, parent, &nchain, + error = hammer2_chain_create(trans, &parent, &nchain, lhc, 0, HAMMER2_BREF_TYPE_INODE, HAMMER2_INODE_BYTES); @@ -739,8 +847,6 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, ipdata->type = HAMMER2_OBJTYPE_HARDLINK; ipdata->inum = ochain->data->ipdata.inum; ipdata->nlinks = 1; - kprintf("created hardlink %*.*s\n", - (int)name_len, (int)name_len, name); hammer2_chain_unlock(nchain); nchain = ochain; ochain = NULL; @@ -764,21 +870,17 @@ hammer2_inode_connect(hammer2_trans_t *trans, hammer2_inode_t *dip, (int)name_len, (int)name_len, name); } else { /* - * We are reconnecting a previously DELETED node in a new - * location. nchain is a duplication of the deleted node. - * - * We must fixup the name stored in oip. + * nchain is a duplicate of ochain at the new location. + * We must fixup the name stored in oip. The bref key + * has already been set up. */ hammer2_chain_modify(trans, nchain, 0); ipdata = &nchain->data->ipdata; - if (ipdata->name_len != name_len || - bcmp(ipdata->filename, name, name_len) != 0) { - KKASSERT(name_len < HAMMER2_INODE_MAXNAME); - bcopy(name, ipdata->filename, name_len); - ipdata->name_key = lhc; - ipdata->name_len = name_len; - } + KKASSERT(name_len < HAMMER2_INODE_MAXNAME); + bcopy(name, ipdata->filename, name_len); + ipdata->name_key = lhc; + ipdata->name_len = name_len; ipdata->nlinks = 1; } @@ -867,7 +969,8 @@ hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, */ int hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, - const uint8_t *name, size_t name_len, int isdir) + const uint8_t *name, size_t name_len, + int isdir, int *hlinkp) { hammer2_inode_data_t *ipdata; hammer2_mount_t *hmp; @@ -890,6 +993,8 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, /* * Search for the filename in the directory */ + if (hlinkp) + *hlinkp = 0; hammer2_inode_lock_ex(dip); parent = hammer2_chain_lookup_init(dip->chain, 0); @@ -916,8 +1021,11 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, error = ENOENT; goto done; } - if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) + if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) { + if (hlinkp) + *hlinkp = 1; type = chain->data->ipdata.target_type; + } if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { error = ENOTDIR; @@ -1133,6 +1241,10 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip, * If no change in the hardlink's target directory is required and * this is already a hardlink target, all we need to do is adjust * the link count. + * + * XXX The common parent is a big wiggly due to duplication from + * renames. Compare the core (RBTREE) pointer instead of the + * ip's. */ if (cdip == fdip && (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) { @@ -1152,7 +1264,7 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip, * Hardlink targets are hidden inodes in a parent directory common * to all directory entries referencing the hardlink. */ - nchain = hammer2_inode_duplicate(trans, chain, cdip, &error); + nchain = hammer2_hardlink_shiftup(trans, &chain, cdip, &error); if (error == 0) { /* @@ -1170,6 +1282,13 @@ hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip, * If the old chain IS a hardlink target then delete it. */ if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) { + /* + * Replace original non-hardlink that's been dup'd + * with a special hardlink directory entry. We must + * set the DIRECTDATA flag to prevent sub-chains + * from trying to synchronize to the inode if the + * file is extended afterwords. + */ hammer2_chain_modify(trans, chain, 0); ipdata = &chain->data->ipdata; ipdata->target_type = ipdata->type; @@ -1373,6 +1492,8 @@ hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) return(scan1); } scan2 = scan2->pip; + if (scan2 == NULL) + break; } } panic("hammer2_inode_common_parent: no common parent %p %p\n", diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c index 3c685880da..9aecf00c17 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.c +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -486,7 +486,8 @@ hammer2_ioctl_pfs_delete(hammer2_inode_t *ip, void *data) hammer2_trans_init(&trans, hmp); error = hammer2_unlink_file(&trans, hmp->sroot, - pfs->name, strlen(pfs->name), 0); + pfs->name, strlen(pfs->name), + 0, NULL); hammer2_trans_done(&trans); return (error); diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index f65889c850..1fc174a832 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -55,9 +55,6 @@ * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. - * - * WARNING! hammer2_inode_repoint() can replace ip->chain and assumes that - * there is only one 'inode' lock. XXX */ void hammer2_inode_lock_ex(hammer2_inode_t *ip) @@ -67,8 +64,16 @@ hammer2_inode_lock_ex(hammer2_inode_t *ip) hammer2_inode_ref(ip); ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); + /* + * ip->chain fixup. Certain duplications used to move inodes + * into indirect blocks (for example) can cause ip->chain to + * become stale. + * + * + */ +again: chain = ip->chain; - while (chain->duplink) + while (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) chain = chain->duplink; if (chain != ip->chain) { hammer2_chain_ref(chain); @@ -78,6 +83,14 @@ hammer2_inode_lock_ex(hammer2_inode_t *ip) KKASSERT(chain != NULL); /* for now */ hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); + + /* + * Resolve duplication races + */ + if (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) { + hammer2_chain_unlock(chain); + goto again; + } } void @@ -116,6 +129,7 @@ hammer2_inode_lock_sh(hammer2_inode_t *ip) hammer2_chain_t *chain; hammer2_inode_ref(ip); +again: ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); chain = ip->chain; @@ -123,6 +137,17 @@ hammer2_inode_lock_sh(hammer2_inode_t *ip) hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | HAMMER2_RESOLVE_SHARED); + /* + * Resolve duplication races + */ + if (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) { + hammer2_chain_unlock(chain); + ccms_thread_unlock(&ip->topo_cst); + hammer2_inode_lock_ex(ip); + hammer2_inode_unlock_ex(ip); + goto again; + } + } void @@ -140,6 +165,12 @@ hammer2_inode_lock_temp_release(hammer2_inode_t *ip) return(ccms_thread_lock_temp_release(&ip->topo_cst)); } +void +hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate) +{ + ccms_thread_lock_temp_restore(&ip->topo_cst, ostate); +} + ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip) { @@ -147,9 +178,9 @@ hammer2_inode_lock_upgrade(hammer2_inode_t *ip) } void -hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate) +hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate) { - ccms_thread_lock_restore(&ip->topo_cst, ostate); + ccms_thread_lock_downgrade(&ip->topo_cst, ostate); } /* diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 19086b5cb4..a179d4be7f 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -355,6 +355,9 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, mp->mnt_data = (qaddr_t)pmp; pmp->mp = mp; + spin_init(&pmp->inum_spin); + RB_INIT(&pmp->inum_tree); + kmalloc_create(&pmp->mmsg, "HAMMER2-pfsmsg"); kdmsg_iocom_init(&pmp->iocom, pmp, KDMSG_IOCOMF_AUTOCONN | @@ -660,6 +663,8 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) } hammer2_mount_unlock(hmp); + hammer2_dump_chain(&hmp->vchain, 0); + /* * Final drop of embedded volume root chain to clean up * vchain.core (vchain structure is not flagged ALLOCATED @@ -796,6 +801,10 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) int i; hmp = MPTOHMP(mp); +#if 0 + if ((waitfor & MNT_LAZY) == 0) + hammer2_dump_chain(&hmp->vchain, 0); +#endif flags = VMSC_GETVP; if (waitfor & MNT_LAZY) @@ -908,7 +917,8 @@ hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) ip = VTOI(vp); if (vp->v_type == VNON || ip == NULL || - ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && + ((ip->flags & (HAMMER2_INODE_MODIFIED | + HAMMER2_INODE_DIRTYEMBED)) == 0 && RB_EMPTY(&vp->v_rbdirty_tree))) { return(-1); } @@ -924,7 +934,8 @@ hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) ip = VTOI(vp); if (vp->v_type == VNON || vp->v_type == VBAD || - ((ip->flags & HAMMER2_INODE_MODIFIED) == 0 && + ((ip->flags & (HAMMER2_INODE_MODIFIED | + HAMMER2_INODE_DIRTYEMBED)) == 0 && RB_EMPTY(&vp->v_rbdirty_tree))) { return(0); } @@ -1236,3 +1247,31 @@ hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index) kdmsg_msg_write(msg); } } + +void +hammer2_dump_chain(hammer2_chain_t *chain, int tab) +{ + hammer2_chain_t *scan; + + kprintf("%*.*schain[%d] %p.%d [%08x][core=%p] (%s) dl=%p refs=%d", + tab, tab, "", + chain->index, chain, chain->bref.type, chain->flags, + chain->core, + ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && + chain->data) ? (char *)chain->data->ipdata.filename : "?"), + chain->duplink, chain->refs); + if (chain->core == NULL || RB_EMPTY(&chain->core->rbtree)) + kprintf("\n"); + else + kprintf(" {\n"); + RB_FOREACH(scan, hammer2_chain_tree, &chain->core->rbtree) { + hammer2_dump_chain(scan, tab + 4); + } + if (chain->core && !RB_EMPTY(&chain->core->rbtree)) { + if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) + kprintf("%*.*s}(%s)\n", tab, tab, "", + chain->data->ipdata.filename); + else + kprintf("%*.*s}\n", tab, tab, ""); + } +} diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 016bf5bef5..1a6d2743e9 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -1075,7 +1075,7 @@ retry: * NOTE: DATA chains are created without device backing * store (nor do we want any). */ - *errorp = hammer2_chain_create(trans, parent, &chain, + *errorp = hammer2_chain_create(trans, &parent, &chain, lbase, HAMMER2_PBUFRADIX, HAMMER2_BREF_TYPE_DATA, lblksize); @@ -1413,7 +1413,7 @@ retry: obase, obase, HAMMER2_LOOKUP_NODATA); if (chain == NULL) { - error = hammer2_chain_create(trans, parent, &chain, + error = hammer2_chain_create(trans, &parent, &chain, obase, nblksize, HAMMER2_BREF_TYPE_DATA, nblksize); @@ -1827,19 +1827,23 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) /* * Create a directory entry connected to the specified chain. - * This function unlocks and NULL's chain on return. + * The hardlink consolidation code has already adjusted ip->pip + * to the common parent directory containing the actual hardlink + * + * (which may be different from dip where we created our hardlink + * entry. ip->chain always represents the actual hardlink and not + * any of the pointers to the actual hardlink). */ - error = hammer2_inode_connect(&trans, dip, &chain, name, name_len); + error = hammer2_inode_connect(&trans, 1, + dip, &chain, + name, name_len); if (error == 0) { - hammer2_inode_repoint(ip, ip->pip, chain); cache_setunresolved(ap->a_nch); cache_setvp(ap->a_nch, ap->a_vp); } done: - if (chain) { + if (chain) hammer2_chain_unlock(chain); - chain = NULL; - } hammer2_inode_unlock_ex(ip); hammer2_trans_done(&trans); @@ -2004,7 +2008,7 @@ hammer2_vop_nremove(struct vop_nremove_args *ap) name = ncp->nc_name; name_len = ncp->nc_nlen; hammer2_trans_init(&trans, hmp); - error = hammer2_unlink_file(&trans, dip, name, name_len, 0); + error = hammer2_unlink_file(&trans, dip, name, name_len, 0, NULL); hammer2_trans_done(&trans); if (error == 0) { cache_unlink(ap->a_nch); @@ -2037,7 +2041,7 @@ hammer2_vop_nrmdir(struct vop_nrmdir_args *ap) name_len = ncp->nc_nlen; hammer2_trans_init(&trans, hmp); - error = hammer2_unlink_file(&trans, dip, name, name_len, 1); + error = hammer2_unlink_file(&trans, dip, name, name_len, 1, NULL); hammer2_trans_done(&trans); if (error == 0) { cache_unlink(ap->a_nch); @@ -2065,6 +2069,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) const uint8_t *tname; size_t tname_len; int error; + int hlink; if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount) return(EXDEV); @@ -2109,7 +2114,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) /* * Remove target if it exists */ - error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1); + error = hammer2_unlink_file(&trans, tdip, tname, tname_len, -1, NULL); if (error && error != ENOENT) goto done; cache_setunresolved(ap->a_tnch); @@ -2142,7 +2147,7 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * The target chain may be marked DELETED but will not be destroyed * since we retain our hold on ip and chain. */ - error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1); + error = hammer2_unlink_file(&trans, fdip, fname, fname_len, -1, &hlink); KKASSERT(error != EAGAIN); if (error) goto done; @@ -2156,11 +2161,12 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) * deadlocks we want to unlock before issuing a cache_*() * op (that might have to lock a vnode). */ - error = hammer2_inode_connect(&trans, tdip, - &chain, tname, tname_len); + error = hammer2_inode_connect(&trans, hlink, + tdip, &chain, + tname, tname_len); if (error == 0) { KKASSERT(chain != NULL); - hammer2_inode_repoint(ip, tdip, chain); + hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), chain); cache_rename(ap->a_fnch, ap->a_tnch); } done: -- 2.41.0