From 2efe493b366f9b5f683af645f94e85f257fec556 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 17 Nov 2015 17:21:14 -0800 Subject: [PATCH] hammer2 - Present hardlink solution, misc stability work * The H2 design has had a long-standing problem of losing track of hardlinks when intermediate directories are renamed, breaking the common-parent-directory design for the inode target. Fix this issue by placing the hardlink target in the first common parent directory chflagged 'xlink' (or the root of the mount if no parent is chflagged 'xlink'). Thus, by default, hardlink targets for cross-directory situations will be placed in the root of the mount and thus no confusion can ever occur if a mid-path directory is moved within the hierarchy. * This change does not modify the behavior of placing the hardlink target in the same directory as the hardlink pointers when they are all in the same directory. * installworld now chflags /*, /usr/*, /var/*, and /home/* 'xlink'. * Note that hardlink and rename operations across 'xlink' boundaries are disallowed (for obvious reasons), but system operators have very rarely used hardlinks in major directory crossings. However, we might be pushing it a bit much to automatically chflag /home/*. In the past there have been a few minor situations where major-crossing hardlinks are attempted. 'lpr' for example tries to hardlink a user file into /var/spool, but will copy it if it cannot. Similarly there have been situations in the past where hardlinks between home directories have been used to save space, but franklyl softlinks can be used almost as easily and such situations have fallen into disfavor. * Stability improvements to the XOP FIFO mechanics, locking cleanups. --- sys/sys/mutex2.h | 12 +++++++ sys/vfs/hammer2/hammer2.h | 19 +++------- sys/vfs/hammer2/hammer2_chain.c | 60 +++++++++++++++++++++----------- sys/vfs/hammer2/hammer2_inode.c | 54 +++++++++++++++++++++------- sys/vfs/hammer2/hammer2_thread.c | 31 +++++++++++++++-- sys/vfs/hammer2/hammer2_vnops.c | 33 +++++++++++++----- sys/vfs/hammer2/hammer2_xops.c | 7 ++-- 7 files changed, 153 insertions(+), 63 deletions(-) diff --git a/sys/sys/mutex2.h b/sys/sys/mutex2.h index 8b1162781f..282eef76ed 100644 --- a/sys/sys/mutex2.h +++ b/sys/sys/mutex2.h @@ -186,6 +186,18 @@ mtx_lock_sh_quick(mtx_t *mtx) return(0); } +/* + * Adds a shared lock reference to a lock already locked shared, + * does not block on pending exclusive request. + */ +static __inline void +mtx_lock_sh_again(mtx_t *mtx) +{ + KKASSERT((mtx->mtx_lock & MTX_EXCLUSIVE) == 0 && + (mtx->mtx_lock & MTX_MASK) > 0); + atomic_add_int(&mtx->mtx_lock, 1); +} + /* * Short-form exclusive spinlock a mutex. Must be paired with * mtx_spinunlock(). diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 41804abba8..e3270e509f 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -120,7 +120,9 @@ typedef struct spinlock hammer2_spin_t; #define hammer2_mtx_ex mtx_lock_ex_quick #define hammer2_mtx_sh mtx_lock_sh_quick +#define hammer2_mtx_sh_again mtx_lock_sh_again #define hammer2_mtx_unlock mtx_unlock +#define hammer2_mtx_downgrade mtx_downgrade #define hammer2_mtx_owned mtx_owned #define hammer2_mtx_init mtx_init #define hammer2_mtx_temp_release mtx_lock_temp_release @@ -157,19 +159,6 @@ hammer2_mtx_upgrade(hammer2_mtx_t *mtx) return wasexclusive; } -/* - * Downgrade an inode lock from exclusive to shared only if the inode - * lock was previously shared. If the inode lock was previously exclusive, - * this is a NOP. - */ -static __inline -void -hammer2_mtx_downgrade(hammer2_mtx_t *mtx, int wasexclusive) -{ - if (wasexclusive == 0) - mtx_downgrade(mtx); -} - /* * The xid tracks internal transactional updates. * @@ -849,6 +838,7 @@ struct hammer2_xop_fifo { typedef struct hammer2_xop_fifo hammer2_xop_fifo_t; #define HAMMER2_XOP_FIFO_RUN 0x0001 +#define HAMMER2_XOP_FIFO_STALL 0x0002 struct hammer2_xop_head { hammer2_xop_func_t func; @@ -1346,7 +1336,7 @@ int hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *ip, const char *name, size_t name_len, hammer2_key_t lhc); hammer2_inode_t *hammer2_inode_common_parent(hammer2_inode_t *fdip, - hammer2_inode_t *tdip); + hammer2_inode_t *tdip, int *errorp, int ishardlink); void hammer2_inode_chain_sync(hammer2_inode_t *ip); int hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen); void hammer2_inode_install_hidden(hammer2_pfs_t *pmp); @@ -1364,6 +1354,7 @@ void hammer2_chain_core_init(hammer2_chain_t *chain); void hammer2_chain_ref(hammer2_chain_t *chain); void hammer2_chain_drop(hammer2_chain_t *chain); void hammer2_chain_lock(hammer2_chain_t *chain, int how); +void hammer2_chain_lock_downgrade(hammer2_chain_t *chain); void hammer2_chain_push_shared_lock(hammer2_chain_t *chain); void hammer2_chain_pull_shared_lock(hammer2_chain_t *chain); void hammer2_chain_load_data(hammer2_chain_t *chain); diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 446b1e6638..73e8ff9bd1 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -678,16 +678,31 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) hammer2_chain_load_data(chain); } +/* + * Downgrade an exclusive chain lock to a shared chain lock. + * + * NOTE: There is no upgrade equivalent due to the ease of + * deadlocks in that direction. + */ +void +hammer2_chain_lock_downgrade(hammer2_chain_t *chain) +{ + hammer2_mtx_downgrade(&chain->lock); +} + /* * Obtains a second shared lock on the chain, does not account the second * shared lock as being owned by the current thread. * * Caller must already own a shared lock on this chain. + * + * The lock function is required to obtain the second shared lock without + * blocking on pending exclusive requests. */ void hammer2_chain_push_shared_lock(hammer2_chain_t *chain) { - hammer2_mtx_sh(&chain->lock); + hammer2_mtx_sh_again(&chain->lock); atomic_add_int(&chain->lockcnt, 1); /* do not count in td_tracker for this thread */ } @@ -4276,7 +4291,8 @@ hammer2_chain_hardlink_find(hammer2_inode_t *dip, rchain = NULL; for (;;) { - int nloops; + int again; + rchain = hammer2_chain_lookup(parentp, &key_dummy, lhc, lhc, &cache_index, flags); @@ -4287,41 +4303,43 @@ hammer2_chain_hardlink_find(hammer2_inode_t *dip, * Iterate parents, handle parent rename races by retrying * the operation. */ - nloops = -1; - while (nloops) { - --nloops; + again = 0; + for (;;) { parent = *parentp; - if (nloops < 0 && - parent->bref.type == HAMMER2_BREF_TYPE_INODE) { - nloops = 1; + if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { + if (again == 1) + break; + ++again; } if (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) goto done; - if (parent->parent == NULL) - goto done; - parent = parent->parent; - hammer2_chain_ref(parent); - hammer2_chain_unlock(*parentp); - hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | + for (;;) { + if (parent->parent == NULL) + goto done; + parent = parent->parent; + hammer2_chain_ref(parent); + hammer2_chain_unlock(*parentp); + hammer2_chain_lock(parent, + HAMMER2_RESOLVE_ALWAYS | resolve_flags); - if ((*parentp)->parent == parent) { - hammer2_chain_drop(*parentp); - *parentp = parent; - } else { + if ((*parentp)->parent == parent) { + hammer2_chain_drop(*parentp); + *parentp = parent; + break; + } hammer2_chain_unlock(parent); hammer2_chain_drop(parent); hammer2_chain_lock(*parentp, HAMMER2_RESOLVE_ALWAYS | resolve_flags); - parent = NULL; /* safety */ - /* retry */ + parent = *parentp; } } } done: *chainp = rchain; - return (rchain ? EINVAL : 0); + return (rchain ? 0 : EINVAL); } /* diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index 9ceb13a523..3bccf89caf 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -1255,16 +1255,22 @@ hammer2_inode_install_hidden(hammer2_pfs_t *pmp) } /* - * Find the directory common to both fdip and tdip. + * Find the directory common to both fdip and tdip that satisfies the + * conditions. The common directory is not allowed to cross a XLINK + * boundary. If ishardlink is non-zero and we successfully find the + * common parent, we will continue to iterate parents until we hit a + * XLINK boundary. * * Returns a held but not locked inode. Caller typically locks the inode, * and when through unlocks AND drops it. */ hammer2_inode_t * -hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) +hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip, + int *errorp, int ishardlink) { hammer2_inode_t *scan1; hammer2_inode_t *scan2; + int state; /* * We used to have a depth field but it complicated matters too @@ -1273,33 +1279,55 @@ hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) * * XXX need a bottom-up topology stability lock */ - if (fdip == tdip || fdip == tdip->pip) { + if (fdip == tdip) { hammer2_inode_ref(fdip); return(fdip); } - if (fdip->pip == tdip) { - hammer2_inode_ref(tdip); - return(tdip); - } /* * XXX not MPSAFE + * + * state: -1 sub-scan failed + * 0 + * +1 sub-scan succeeded (find xlink boundary if rename) */ for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { scan2 = tdip; + state = 0; while (scan2->pmp == tdip->pmp) { - if (scan1 == scan2) { - hammer2_inode_ref(scan1); - return(scan1); + if (state == 0 && scan1 == scan2) { + /* + * Found common parent, stop here on rename, + * continue if creating a hardlink. + */ + if (ishardlink == 0) { + hammer2_inode_ref(scan1); + return(scan1); + } + state = 1; } + if (state == 1) { + /* + * Search for XLINK boundary when hardlink. + */ + if ((scan2->meta.uflags & + (SF_XLINK | UF_XLINK)) || + scan2->pip == NULL || + scan2->pip->pmp != scan1->pmp) { + hammer2_inode_ref(scan2); + return(scan2); + } + } + if (scan2->meta.uflags & (SF_XLINK | UF_XLINK)) + break; scan2 = scan2->pip; if (scan2 == NULL) break; } + if (scan1->meta.uflags & (SF_XLINK | UF_XLINK)) + break; } - panic("hammer2_inode_common_parent: no common parent %p %p\n", - fdip, tdip); - /* NOT REACHED */ + *errorp = EXDEV; return(NULL); } diff --git a/sys/vfs/hammer2/hammer2_thread.c b/sys/vfs/hammer2/hammer2_thread.c index 06d9b3846b..8b657fc6fe 100644 --- a/sys/vfs/hammer2/hammer2_thread.c +++ b/sys/vfs/hammer2/hammer2_thread.c @@ -478,22 +478,35 @@ hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain, { hammer2_xop_fifo_t *fifo; + /* + * Early termination (typicaly of xop_readir) + */ + if (hammer2_xop_active(xop) == 0) { + error = EINTR; + goto done; + } + /* * Multi-threaded entry into the XOP collector. We own the * fifo->wi for our clindex. */ fifo = &xop->collect[clindex]; + if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) + lwkt_yield(); while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { + atomic_set_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); tsleep_interlock(xop, 0); if (hammer2_xop_active(xop) == 0) { error = EINTR; goto done; } + cpu_lfence(); if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { tsleep(xop, PINTERLOCKED, "h2feed", hz*60); } } + atomic_clear_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); if (chain) { hammer2_chain_ref(chain); hammer2_chain_push_shared_lock(chain); @@ -600,8 +613,14 @@ loop: xop->cluster.array[i].flags |= HAMMER2_CITEM_NULL; } - if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2) - wakeup(xop); /* XXX optimize */ + if (fifo->wi - fifo->ri <= HAMMER2_XOPFIFO / 2) { + if (fifo->flags & HAMMER2_XOP_FIFO_STALL) { + atomic_clear_int(&fifo->flags, + HAMMER2_XOP_FIFO_STALL); + wakeup(xop); + lwkt_yield(); + } + } --i; /* loop on same index */ } else { /* @@ -783,6 +802,14 @@ hammer2_xop_dequeue(hammer2_thread_t *thr, hammer2_xop_head_t *xop) atomic_clear_int(&xop->collect[clindex].flags, HAMMER2_XOP_FIFO_RUN); hammer2_spin_unex(&pmp->xop_spin); + + /* + * We do not necessarily need to wakeup a dependent waiter, we will + * loop up and process any unlocked dependency immediately. However, + * if there is more than one waking up other threads might improve + * performance. + */ + /*wakeup_one(thr->xopq);*/ } /* diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index d8619c703b..6dbde4df61 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -1337,12 +1337,18 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) nlink_locked = 0; } fdip = ip->pip; - cdip = hammer2_inode_common_parent(fdip, tdip); - hammer2_inode_lock(cdip, 0); + error = 0; + + /* + * Can return NULL and error == EXDEV if the common parent + * crosses a directory with the xlink flag set. + */ + cdip = hammer2_inode_common_parent(fdip, tdip, &error, 1); + if (cdip) + hammer2_inode_lock(cdip, 0); hammer2_inode_lock(fdip, 0); hammer2_inode_lock(tdip, 0); hammer2_inode_lock(ip, 0); - error = 0; /* * Dispatch xop_nlink unconditionally since we have to update nlinks. @@ -1358,7 +1364,7 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) #if 0 if (fdip != cdip || (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) #endif - { + if (error == 0) { xop1 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING); hammer2_xop_setip2(&xop1->head, ip); hammer2_xop_setip3(&xop1->head, cdip); @@ -1401,8 +1407,10 @@ hammer2_vop_nlink(struct vop_nlink_args *ap) hammer2_inode_unlock(ip); hammer2_inode_unlock(tdip); hammer2_inode_unlock(fdip); - hammer2_inode_unlock(cdip); - hammer2_inode_drop(cdip); + if (cdip) { + hammer2_inode_unlock(cdip); + hammer2_inode_drop(cdip); + } if (nlink_locked) lockmgr(&ip->pmp->lock_nlink, LK_RELEASE); @@ -1784,12 +1792,20 @@ hammer2_vop_nrename(struct vop_nrename_args *ap) nlink_locked = 0; } - cdip = hammer2_inode_common_parent(ip->pip, tdip); + /* + * Can return NULL and error == EXDEV if the common parent + * crosses a directory with the xlink flag set. + */ + error = 0; + cdip = hammer2_inode_common_parent(ip->pip, tdip, &error, 0); + if (cdip == NULL) { + tnch_error = error; + goto done3; + } hammer2_inode_lock(cdip, 0); hammer2_inode_lock(fdip, 0); hammer2_inode_lock(tdip, 0); hammer2_inode_ref(ip); /* extra ref */ - error = 0; /* * If ip is a hardlink target and fdip != cdip we must shift the @@ -1950,6 +1966,7 @@ done2: hammer2_inode_unlock(cdip); hammer2_inode_drop(ip); hammer2_inode_drop(cdip); +done3: hammer2_inode_run_sideq(fdip->pmp); if (nlink_locked) diff --git a/sys/vfs/hammer2/hammer2_xops.c b/sys/vfs/hammer2/hammer2_xops.c index 8884067fd1..7e7f6094f4 100644 --- a/sys/vfs/hammer2/hammer2_xops.c +++ b/sys/vfs/hammer2/hammer2_xops.c @@ -399,11 +399,8 @@ hammer2_xop_unlink(hammer2_xop_t *arg, int clindex) /* * Chains passed to feed are expected to be locked shared. */ - if (chain) { - hammer2_chain_unlock(chain); - hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | - HAMMER2_RESOLVE_SHARED); - } + if (chain) + hammer2_chain_lock_downgrade(chain); /* * We always return the hardlink target (the real inode) for -- 2.41.0