From: Matthew Dillon Date: Tue, 26 May 2015 04:42:00 +0000 (-0700) Subject: hammer2 - cleanup data load, unlink optimization X-Git-Tag: v4.2.0rc~112 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/f1c7c22451833bded1d0e54b96abd903f42ae75d hammer2 - cleanup data load, unlink optimization * Clean up data loading on chain lock. Use chain flags to interlock data loading with either a shared or exclusive lock. * We no longer upgrade a shared lock to exclusive in order to load data, preventing a potentially unexpected deadlock from occuring. * Clean up the chain_core (chain->core) structure. Remove flags and move the main lock from the core to the main chain structure proper. * Attempt to avoid I/O when unlinking files by not updating the inode on the final 1->0 transition of nlinks. This greatly reduces the amount of write I/O occuring during a rm -rf and improves performance by a factor of 3x. --- diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 7c2ad880d2..93e30ffc3b 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -242,12 +242,13 @@ TAILQ_HEAD(h2_iocb_list, hammer2_iocb); #define CHAIN_CORE_DELETE_BMAP_ENTRIES \ (HAMMER2_PBUFSIZE / sizeof(hammer2_blockref_t) / sizeof(uint32_t)) +/* + * Core topology for chain (embedded in chain). Protected by a spinlock. + */ struct hammer2_chain_core { - hammer2_mtx_t lock; hammer2_spin_t spin; struct hammer2_chain_tree rbtree; /* sub-chains */ int live_zero; /* blockref array opt */ - u_int flags; u_int live_count; /* live (not deleted) chains in tree */ u_int chain_count; /* live + deleted chains under core */ int generation; /* generation number (inserts only) */ @@ -255,9 +256,6 @@ struct hammer2_chain_core { typedef struct hammer2_chain_core hammer2_chain_core_t; -#define HAMMER2_CORE_UNUSED0001 0x0001 -#define HAMMER2_CORE_COUNTEDBREFS 0x0002 - RB_HEAD(hammer2_io_tree, hammer2_io); /* @@ -319,6 +317,7 @@ typedef struct hammer2_io hammer2_io_t; * Primary chain structure keeps track of the topology in-memory. */ struct hammer2_chain { + hammer2_mtx_t lock; hammer2_chain_core_t core; RB_ENTRY(hammer2_chain) rbnode; /* live chain(s) */ hammer2_blockref_t bref; @@ -381,15 +380,15 @@ RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); #define HAMMER2_CHAIN_FICTITIOUS 0x00000400 /* unsuitable for I/O */ #define HAMMER2_CHAIN_VOLUMESYNC 0x00000800 /* needs volume sync */ #define HAMMER2_CHAIN_DELAYED 0x00001000 /* delayed flush */ -#define HAMMER2_CHAIN_UNUSED00002000 0x00002000 +#define HAMMER2_CHAIN_COUNTEDBREFS 0x00002000 /* block table stats */ #define HAMMER2_CHAIN_ONRBTREE 0x00004000 /* on parent RB tree */ #define HAMMER2_CHAIN_UNUSED00008000 0x00008000 #define HAMMER2_CHAIN_EMBEDDED 0x00010000 /* embedded data */ #define HAMMER2_CHAIN_RELEASE 0x00020000 /* don't keep around */ #define HAMMER2_CHAIN_BMAPPED 0x00040000 /* present in blkmap */ #define HAMMER2_CHAIN_BMAPUPD 0x00080000 /* +needs updating */ -#define HAMMER2_CHAIN_UNUSED00100000 0x00100000 -#define HAMMER2_CHAIN_UNUSED00200000 0x00200000 +#define HAMMER2_CHAIN_IOINPROG 0x00100000 /* I/O interlock */ +#define HAMMER2_CHAIN_IOSIGNAL 0x00200000 /* I/O interlock */ #define HAMMER2_CHAIN_PFSBOUNDARY 0x00400000 /* super->pfs inode */ #define HAMMER2_CHAIN_FLUSH_MASK (HAMMER2_CHAIN_MODIFIED | \ @@ -682,8 +681,8 @@ struct hammer2_inode { u_int flags; u_int refs; /* +vpref, +flushref */ uint8_t comp_heuristic; - hammer2_off_t size; - uint64_t mtime; + hammer2_off_t size; /* cache file size */ + uint64_t mtime; /* cache mtime */ }; typedef struct hammer2_inode hammer2_inode_t; @@ -1144,6 +1143,7 @@ void hammer2_chain_core_init(hammer2_chain_t *chain); void hammer2_chain_ref(hammer2_chain_t *chain); void hammer2_chain_drop(hammer2_chain_t *chain); void hammer2_chain_lock(hammer2_chain_t *chain, int how); +void hammer2_chain_load_data(hammer2_chain_t *chain); const hammer2_media_data_t *hammer2_chain_rdata(hammer2_chain_t *chain); hammer2_media_data_t *hammer2_chain_wdata(hammer2_chain_t *chain); diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 97c811e445..8d16305434 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -231,14 +231,12 @@ hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, void hammer2_chain_core_init(hammer2_chain_t *chain) { - hammer2_chain_core_t *core = &chain->core; - /* * Fresh core under nchain (no multi-homing of ochain's * sub-tree). */ - RB_INIT(&core->rbtree); /* live chains */ - hammer2_mtx_init(&core->lock, "h2chain"); + RB_INIT(&chain->core.rbtree); /* live chains */ + hammer2_mtx_init(&chain->lock, "h2chain"); } /* @@ -554,35 +552,30 @@ hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) * a logical file buffer. However, if ALWAYS is specified the * device buffer will be instantiated anyway. * - * WARNING! If data must be fetched a shared lock will temporarily be - * upgraded to exclusive. However, a deadlock can occur if - * the caller owns more than one shared lock. + * WARNING! This function blocks on I/O if data needs to be fetched. This + * blocking can run concurrent with other compatible lock holders + * who do not need data returning. The lock is not upgraded to + * exclusive during a data fetch, a separate bit is used to + * interlock I/O. However, an exclusive lock holder can still count + * on being interlocked against an I/O fetch managed by a shared + * lock holder. */ void hammer2_chain_lock(hammer2_chain_t *chain, int how) { - hammer2_dev_t *hmp; - hammer2_blockref_t *bref; - hammer2_mtx_state_t ostate; - char *bdata; - int error; - /* * Ref and lock the element. Recursive locks are allowed. */ KKASSERT(chain->refs > 0); atomic_add_int(&chain->lockcnt, 1); - hmp = chain->hmp; - KKASSERT(hmp != NULL); - /* * Get the appropriate lock. */ if (how & HAMMER2_RESOLVE_SHARED) - hammer2_mtx_sh(&chain->core.lock); + hammer2_mtx_sh(&chain->lock); else - hammer2_mtx_ex(&chain->core.lock); + hammer2_mtx_ex(&chain->lock); /* * If we already have a valid data pointer no further action is @@ -614,16 +607,69 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) } /* - * Upgrade to an exclusive lock so we can safely manipulate the - * buffer cache. If another thread got to it before us we - * can just return. + * Caller requires data + */ + hammer2_chain_load_data(chain); +} + +/* + * Issue I/O and install chain->data. Caller must hold a chain lock, lock + * may be of any type. + * + * Once chain->data is set it cannot be disposed of until all locks are + * released. + */ +void +hammer2_chain_load_data(hammer2_chain_t *chain) +{ + hammer2_blockref_t *bref; + hammer2_dev_t *hmp; + char *bdata; + int error; + + /* + * Degenerate case, data already present. */ - ostate = hammer2_mtx_upgrade(&chain->core.lock); - if (chain->data) { - hammer2_mtx_downgrade(&chain->core.lock, ostate); + if (chain->data) return; + + hmp = chain->hmp; + KKASSERT(hmp != NULL); + + /* + * Gain the IOINPROG bit, interlocked block. + */ + for (;;) { + u_int oflags; + u_int nflags; + + oflags = chain->flags; + cpu_ccfence(); + if (oflags & HAMMER2_CHAIN_IOINPROG) { + nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; + tsleep_interlock(&chain->flags, 0); + if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { + tsleep(&chain->flags, PINTERLOCKED, + "h2iocw", 0); + } + /* retry */ + } else { + nflags = oflags | HAMMER2_CHAIN_IOINPROG; + if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { + break; + } + /* retry */ + } } + /* + * We own CHAIN_IOINPROG + * + * Degenerate case if we raced another load. + */ + if (chain->data) + goto done; + /* * We must resolve to a device buffer, either by issuing I/O or * by creating a zero-fill element. We do not mark the buffer @@ -656,8 +702,7 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", (intmax_t)bref->data_off, error); hammer2_io_bqrelse(&chain->dio); - hammer2_mtx_downgrade(&chain->core.lock, ostate); - return; + goto done; } chain->error = 0; @@ -702,6 +747,9 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) * The buffer is not retained when copying to an embedded data * structure in order to avoid potential deadlocks or recursions * on the same physical buffer. + * + * WARNING! Other threads can start using the data the instant we + * set chain->data non-NULL. */ switch (bref->type) { case HAMMER2_BREF_TYPE_VOLUME: @@ -723,7 +771,25 @@ hammer2_chain_lock(hammer2_chain_t *chain, int how) chain->data = (void *)bdata; break; } - hammer2_mtx_downgrade(&chain->core.lock, ostate); + + /* + * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. + */ +done: + for (;;) { + u_int oflags; + u_int nflags; + + oflags = chain->flags; + nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | + HAMMER2_CHAIN_IOSIGNAL); + KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); + if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { + if (oflags & HAMMER2_CHAIN_IOSIGNAL) + wakeup(&chain->flags); + break; + } + } } /* @@ -752,7 +818,7 @@ hammer2_chain_unlock(hammer2_chain_t *chain) if (lockcnt > 1) { if (atomic_cmpset_int(&chain->lockcnt, lockcnt, lockcnt - 1)) { - hammer2_mtx_unlock(&chain->core.lock); + hammer2_mtx_unlock(&chain->lock); return; } } else { @@ -774,9 +840,9 @@ hammer2_chain_unlock(hammer2_chain_t *chain) * all that will happen is that the chain will be reloaded after we * unload it. */ - ostate = hammer2_mtx_upgrade(&chain->core.lock); + ostate = hammer2_mtx_upgrade(&chain->lock); if (chain->lockcnt) { - hammer2_mtx_unlock(&chain->core.lock); + hammer2_mtx_unlock(&chain->lock); return; } @@ -789,7 +855,7 @@ hammer2_chain_unlock(hammer2_chain_t *chain) if (chain->dio == NULL) { if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) hammer2_chain_drop_data(chain, 0); - hammer2_mtx_unlock(&chain->core.lock); + hammer2_mtx_unlock(&chain->lock); return; } @@ -863,7 +929,7 @@ hammer2_chain_unlock(hammer2_chain_t *chain) } else { hammer2_io_bqrelse(&chain->dio); } - hammer2_mtx_unlock(&chain->core.lock); + hammer2_mtx_unlock(&chain->lock); } /* @@ -885,7 +951,7 @@ hammer2_chain_countbrefs(hammer2_chain_t *chain, hammer2_blockref_t *base, int count) { hammer2_spin_ex(&chain->core.spin); - if ((chain->core.flags & HAMMER2_CORE_COUNTEDBREFS) == 0) { + if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { if (base) { while (--count >= 0) { if (base[count].type) @@ -902,7 +968,7 @@ hammer2_chain_countbrefs(hammer2_chain_t *chain, chain->core.live_zero = 0; } /* else do not modify live_count */ - atomic_set_int(&chain->core.flags, HAMMER2_CORE_COUNTEDBREFS); + atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); } hammer2_spin_unex(&chain->core.spin); } @@ -1682,7 +1748,7 @@ again: * We need to hold the spinlock to access the block array and RB tree * and to interlock chain creation. */ - if ((parent->core.flags & HAMMER2_CORE_COUNTEDBREFS) == 0) + if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) hammer2_chain_countbrefs(parent, base, count); /* @@ -2022,7 +2088,7 @@ again: * We need to hold the spinlock to access the block array and RB tree * and to interlock chain creation. */ - if ((parent->core.flags & HAMMER2_CORE_COUNTEDBREFS) == 0) + if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) hammer2_chain_countbrefs(parent, base, count); next_key = 0; @@ -2156,7 +2222,7 @@ hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, * Topology may be crossing a PFS boundary. */ parent = *parentp; - KKASSERT(hammer2_mtx_owned(&parent->core.lock)); + KKASSERT(hammer2_mtx_owned(&parent->lock)); KKASSERT(parent->error == 0); hmp = parent->hmp; chain = *chainp; @@ -2182,7 +2248,7 @@ hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, * to 1 by chain_alloc() for us, but lockcnt is not). */ chain->lockcnt = 1; - hammer2_mtx_ex(&chain->core.lock); + hammer2_mtx_ex(&chain->lock); allocated = 1; /* @@ -2301,7 +2367,7 @@ again: /* * Make sure we've counted the brefs */ - if ((parent->core.flags & HAMMER2_CORE_COUNTEDBREFS) == 0) + if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) hammer2_chain_countbrefs(parent, base, count); KKASSERT(parent->core.live_count >= 0 && @@ -2487,7 +2553,7 @@ hammer2_chain_rename(hammer2_trans_t *trans, hammer2_blockref_t *bref, * Having both chains locked is extremely important for atomicy. */ if (parentp && (parent = *parentp) != NULL) { - KKASSERT(hammer2_mtx_owned(&parent->core.lock)); + KKASSERT(hammer2_mtx_owned(&parent->lock)); KKASSERT(parent->refs > 0); KKASSERT(parent->error == 0); @@ -2734,7 +2800,7 @@ hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, */ hmp = parent->hmp; *errorp = 0; - KKASSERT(hammer2_mtx_owned(&parent->core.lock)); + KKASSERT(hammer2_mtx_owned(&parent->lock)); /*hammer2_chain_modify(trans, &parent, HAMMER2_MODIFY_OPTDATA);*/ if (parent->flags & HAMMER2_CHAIN_INITIAL) { @@ -3328,7 +3394,7 @@ void hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *parent, hammer2_chain_t *chain, int flags) { - KKASSERT(hammer2_mtx_owned(&chain->core.lock)); + KKASSERT(hammer2_mtx_owned(&chain->lock)); /* * Nothing to do if already marked. @@ -3388,7 +3454,7 @@ hammer2_base_find(hammer2_chain_t *parent, * Require the live chain's already have their core's counted * so we can optimize operations. */ - KKASSERT(parent->core.flags & HAMMER2_CORE_COUNTEDBREFS); + KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); /* * Degenerate case diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index abb8fd11bb..4dd6958ec4 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -1273,6 +1273,7 @@ hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, hammer2_key_t key_dummy; hammer2_key_t key_next; hammer2_key_t lhc; + int last_link; int error; int hlink; uint8_t type; @@ -1418,7 +1419,9 @@ again: * or directory in (cparent, cluster). * * Delete the target when nlinks reaches 0 with special handling - * if (isopen) is set. + * to avoid I/O (to avoid actually updating the inode) for the 1->0 + * transition, if possible. This optimization makes rm -rf very + * fast. * * NOTE! In DragonFly the vnops function calls cache_unlink() after * calling us here to clean out the namecache association, @@ -1431,16 +1434,15 @@ again: * will bump nlinks. */ KKASSERT(cluster != NULL); - hammer2_cluster_modify(trans, cluster, 0); - wipdata = &hammer2_cluster_wdata(cluster)->ipdata; - ripdata = wipdata; - wipdata->nlinks += nlinks; - if ((int64_t)wipdata->nlinks < 0) { /* XXX debugging */ - wipdata->nlinks = 0; - } - hammer2_cluster_modsync(cluster); - if (wipdata->nlinks == 0) { + /* + * Note: nlinks is negative when decrementing, positive when + * incrementing. + */ + ripdata = &hammer2_cluster_rdata(cluster)->ipdata; + last_link = (ripdata->nlinks + nlinks == 0); + + if (last_link) { /* * Target nlinks has reached 0, file now unlinked (but may * still be open). @@ -1457,6 +1459,19 @@ again: */ hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED); if (nch && cache_isopen(nch)) { + /* + * If an unlinked file is still open we must update + * the inodes link count. + */ + hammer2_cluster_modify(trans, cluster, 0); + wipdata = &hammer2_cluster_wdata(cluster)->ipdata; + ripdata = wipdata; + wipdata->nlinks += nlinks; + /* XXX debugging */ + if ((int64_t)wipdata->nlinks < 0) { + wipdata->nlinks = 0; + } + hammer2_cluster_modsync(cluster); hammer2_inode_move_to_hidden(trans, &cparent, &cluster, wipdata->inum); } else { @@ -1476,13 +1491,26 @@ again: * a nlinks adjustment of 0, and only wishes to remove file * in order to be able to reconnect it under a different name. * - * In this situation we do a non-permanent deletion of the + * In this situation we do a temporary deletion of the * chain in order to allow the file to be reconnected in * a different location. */ KKASSERT(nlinks == 0); hammer2_cluster_delete(trans, cparent, cluster, 0); + } else { + /* + * Links remain, must update the inode link count. + */ + hammer2_cluster_modify(trans, cluster, 0); + wipdata = &hammer2_cluster_wdata(cluster)->ipdata; + ripdata = wipdata; + wipdata->nlinks += nlinks; + if ((int64_t)wipdata->nlinks < 0) { /* XXX debugging */ + wipdata->nlinks = 0; + } + hammer2_cluster_modsync(cluster); } + error = 0; done: if (cparent) { diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index f7a48b9a0a..212145cb83 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -48,19 +48,19 @@ void hammer2_dev_exlock(hammer2_dev_t *hmp) { - hammer2_mtx_ex(&hmp->vchain.core.lock); + hammer2_mtx_ex(&hmp->vchain.lock); } void hammer2_dev_shlock(hammer2_dev_t *hmp) { - hammer2_mtx_sh(&hmp->vchain.core.lock); + hammer2_mtx_sh(&hmp->vchain.lock); } void hammer2_dev_unlock(hammer2_dev_t *hmp) { - hammer2_mtx_unlock(&hmp->vchain.core.lock); + hammer2_mtx_unlock(&hmp->vchain.lock); } /* diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 5bff3aad18..f2e6c35069 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -3064,17 +3064,13 @@ hammer2_dump_chain(hammer2_chain_t *chain, int tab, int *countp, char pfx) chain->bref.key, chain->bref.keybits, chain->bref.mirror_tid); - kprintf("%*.*s [%08x] (%s) refs=%d\n", + kprintf("%*.*s [%08x] (%s) refs=%d", tab, tab, "", chain->flags, ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) ? (char *)chain->data->ipdata.filename : "?"), chain->refs); - kprintf("%*.*s core [%08x]", - tab, tab, "", - chain->core.flags); - parent = chain->parent; if (parent) kprintf("\n%*.*s p=%p [pflags %08x prefs %d", diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 51bbcda746..662b1209b6 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -2437,8 +2437,13 @@ hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp) kprintf("hammer2: unlink on reclaim: %s refs=%d\n", ripdata->filename, ip->refs); } - KKASSERT(ripdata->nlinks == 0); + /* + * NOTE: Due to optimizations to avoid I/O on the inode for + * the last unlink, ripdata->nlinks is not necessarily + * 0 here. + */ + /* KKASSERT(ripdata->nlinks == 0); (see NOTE) */ cparent = hammer2_cluster_parent(cluster); hammer2_cluster_delete(trans, cparent, cluster, HAMMER2_DELETE_PERMANENT);