From 0729c8c846bbed334033de1cf717c06a8202685a Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 29 Apr 2008 01:10:37 +0000 Subject: [PATCH] HAMMER 39/Many: Parallel operations optimizations * Implement a per-direct cache of new object IDs. Up to 128 directories will be managed in LRU fashion. The cached provides a pool of object IDs to better localize the object ids of files created in a directory, so parallel operations on the filesystem do not create a fragmented object id space. * Cache numerous fields in the root volume's header to avoid creating undo records for them, creatly improving (ultimately we can sync an undo space representing the volume header using a direct comparison mechanic but for now we assume the write of the volume header to be atomic). * Implement a zone limit for the blockmap which newfs_hammer can install. The blockmap zones have an ultimate limit of 2^60 bytes, or around one million terrabytes. If you create a 100G filesystem there is no reason to let the blockmap iterate over its entire range as that would result in a lot of fragmentation and blockmap overhead. By default newfs_hammer sets the zone limit to 100x the size of the filesystem. * Fix a bug in the crash recovery code. Do not sync newly added inodes once the flusher is running, otherwise the volume header can get out of sync. Just create a dummy marker structure and move it to the tail of the inode flush_list when the flush starts, and stop when we hit it. * Adjust hammer_vfs_sync() to sync twice. The second sync is needed to update the volume header's undo fifo indices, otherwise HAMMER will believe that it must undo the last fully synchronized flush. --- sys/vfs/hammer/hammer.h | 30 ++++++++- sys/vfs/hammer/hammer_blockmap.c | 15 +++-- sys/vfs/hammer/hammer_disk.h | 10 +-- sys/vfs/hammer/hammer_flusher.c | 42 ++++++++++--- sys/vfs/hammer/hammer_freemap.c | 9 ++- sys/vfs/hammer/hammer_inode.c | 12 +++- sys/vfs/hammer/hammer_recover.c | 9 ++- sys/vfs/hammer/hammer_transaction.c | 98 +++++++++++++++++++++++------ sys/vfs/hammer/hammer_undo.c | 6 +- sys/vfs/hammer/hammer_vfsops.c | 58 ++++++++++++++--- 10 files changed, 227 insertions(+), 62 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index e5a2093441..a0a88333fe 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.53 2008/04/27 00:45:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.54 2008/04/29 01:10:37 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -146,6 +146,24 @@ typedef struct hammer_depend { TAILQ_HEAD(hammer_depend_list, hammer_depend); +/* + * Cache object ids. A fixed number of objid cache structures are + * created to reserve object id's for newly created files in multiples + * of 100,000, localized to a particular directory, and recycled as + * needed. This allows parallel create operations in different + * directories to retain fairly localized object ids which in turn + * improves reblocking performance and layout. + */ +#define OBJID_CACHE_SIZE 128 +#define OBJID_CACHE_BULK 100000 + +typedef struct hammer_objid_cache { + TAILQ_ENTRY(hammer_objid_cache) entry; + struct hammer_inode *dip; + hammer_tid_t next_tid; + int count; +} *hammer_objid_cache_t; + /* * Structure used to represent an inode in-memory. * @@ -190,6 +208,7 @@ struct hammer_inode { u_int64_t obj_id; /* (key) object identifier */ hammer_tid_t obj_asof; /* (key) snapshot or 0 */ struct hammer_mount *hmp; + hammer_objid_cache_t objid_cache; int flags; int error; /* flush error */ int depend_count; @@ -506,6 +525,7 @@ struct hammer_mount { struct hammer_io_list lose_list; /* loose buffers */ int locked_dirty_count; /* meta/volu count */ int io_running_count; + int objid_cache_count; hammer_tid_t asof; hammer_off_t next_tid; u_int32_t namekey_iterator; @@ -513,8 +533,12 @@ struct hammer_mount { struct netexport export; struct hammer_lock sync_lock; struct lock blockmap_lock; + hammer_inode_t flusher_demark; + struct hammer_blockmap blockmap[HAMMER_MAX_ZONES]; struct hammer_holes holes[HAMMER_MAX_ZONES]; TAILQ_HEAD(, hammer_inode) flush_list; + TAILQ_HEAD(, hammer_inode) flush_alt_list; + TAILQ_HEAD(, hammer_objid_cache) objid_cache_list; }; typedef struct hammer_mount *hammer_mount_t; @@ -601,9 +625,11 @@ u_int32_t hammer_to_unix_xid(uuid_t *uuid); void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid); void hammer_to_timespec(hammer_tid_t tid, struct timespec *ts); hammer_tid_t hammer_timespec_to_transid(struct timespec *ts); -hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans); hammer_tid_t hammer_now_tid(void); hammer_tid_t hammer_str_to_tid(const char *str); +hammer_tid_t hammer_alloc_objid(hammer_transaction_t trans, hammer_inode_t dip); +void hammer_clear_objid(hammer_inode_t dip); +void hammer_destroy_objid_cache(hammer_mount_t hmp); enum vtype hammer_get_vnode_type(u_int8_t obj_type); int hammer_get_dtype(u_int8_t obj_type); diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c index c40db777ac..35f8535999 100644 --- a/sys/vfs/hammer/hammer_blockmap.c +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.8 2008/04/25 21:49:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.9 2008/04/29 01:10:37 dillon Exp $ */ /* @@ -71,7 +71,7 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone, root_volume = hammer_get_root_volume(trans->hmp, errorp); if (*errorp) return(0); - rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + rootmap = &trans->hmp->blockmap[zone]; KKASSERT(rootmap->phys_offset != 0); KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == HAMMER_ZONE_RAW_BUFFER_INDEX); @@ -245,8 +245,7 @@ again: * be big-block aligned. */ if (used_hole == 0) { - hammer_modify_volume(trans, root_volume, - rootmap, sizeof(*rootmap)); + hammer_modify_volume(trans, root_volume, NULL, 0); rootmap->next_offset = next_offset + bytes; if (rootmap->alloc_offset < rootmap->next_offset) { rootmap->alloc_offset = @@ -295,7 +294,7 @@ hammer_blockmap_free(hammer_transaction_t trans, lockmgr(&trans->hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY); - rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + rootmap = &trans->hmp->blockmap[zone]; KKASSERT(rootmap->phys_offset != 0); KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == HAMMER_ZONE_RAW_BUFFER_INDEX); @@ -370,7 +369,7 @@ hammer_blockmap_free(hammer_transaction_t trans, */ #if 0 hammer_modify_volume(trans, root_volume, - rootmap, sizeof(*rootmap)); + NULL, 0); rootmap->next_offset &= ~HAMMER_LARGEBLOCK_MASK64; hammer_modify_volume_done(root_volume); #endif @@ -412,7 +411,7 @@ hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t bmap_off, *curp = 0; return(0); } - rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + rootmap = &hmp->blockmap[zone]; KKASSERT(rootmap->phys_offset != 0); KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == HAMMER_ZONE_RAW_BUFFER_INDEX); @@ -484,7 +483,7 @@ hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp) root_volume = hammer_get_root_volume(hmp, errorp); if (*errorp) return(0); - rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + rootmap = &hmp->blockmap[zone]; KKASSERT(rootmap->phys_offset != 0); KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == HAMMER_ZONE_RAW_BUFFER_INDEX); diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index a31f2fcaa9..20cc923bfc 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.29 2008/04/28 09:38:38 swildner Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.30 2008/04/29 01:10:37 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -154,7 +154,7 @@ typedef u_int32_t hammer_crc_t; * to deal with by preventing an iterator overflow. */ #define HAMMER_ZONE_LIMIT \ - (0x1000000000000000ULL - HAMMER_BLOCKMAP_LAYER2) + (0x1000000000000000ULL - HAMMER_BLOCKMAP_LAYER2 * 2) #define HAMMER_MAX_ZONES 16 @@ -468,11 +468,11 @@ struct hammer_volume_ondisk { int64_t vol0_stat_records; /* total records in filesystem */ hammer_off_t vol0_btree_root; /* B-Tree root */ hammer_tid_t vol0_next_tid; /* highest synchronized TID */ - u_int32_t vol0_reserved00; - u_int32_t vol0_reserved01; + hammer_off_t vol0_zone_limit; /* limit the zone size */ /* - * Blockmaps for zones. Not all zones use a blockmap. + * Blockmaps for zones. Not all zones use a blockmap. Note that + * the entire root blockmap is cached in the hammer_mount structure. */ struct hammer_blockmap vol0_blockmap[HAMMER_MAX_ZONES]; diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index cc5c5fe715..f74092c770 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.6 2008/04/27 00:45:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.7 2008/04/29 01:10:37 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -45,7 +45,7 @@ static void hammer_flusher_thread(void *arg); static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp); static void hammer_flusher_flush(hammer_mount_t hmp); -static int hammer_must_finalize_undo(hammer_volume_t root_volume); +static int hammer_must_finalize_undo(hammer_mount_t hmp); static void hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume, hammer_off_t start_offset); @@ -96,6 +96,10 @@ hammer_flusher_thread(void *arg) hammer_mount_t hmp = arg; int seq; + hmp->flusher_demark = kmalloc(sizeof(struct hammer_inode), + M_HAMMER, M_WAITOK | M_ZERO); + TAILQ_INSERT_TAIL(&hmp->flush_list, hmp->flusher_demark, flush_entry); + for (;;) { seq = hmp->flusher_seq; hammer_flusher_clean_loose_ios(hmp); @@ -106,8 +110,11 @@ hammer_flusher_thread(void *arg) if (hmp->flusher_exiting) break; while (hmp->flusher_seq == hmp->flusher_act) - tsleep(&hmp->flusher_seq, 0, "hmrflt", 0); + tsleep(&hmp->flusher_seq, 0, "hmrwwa", 0); } + TAILQ_REMOVE(&hmp->flush_list, hmp->flusher_demark, flush_entry); + kfree(hmp->flusher_demark, M_HAMMER); + hmp->flusher_demark = NULL; hmp->flusher_td = NULL; wakeup(&hmp->flusher_exiting); lwkt_exit(); @@ -147,13 +154,16 @@ hammer_flusher_flush(hammer_mount_t hmp) int error; root_volume = hammer_get_root_volume(hmp, &error); - rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; + rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; start_offset = rootmap->next_offset; if (hammer_debug_general & 0x00010000) kprintf("x"); - while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) { + TAILQ_REMOVE(&hmp->flush_list, hmp->flusher_demark, flush_entry); + TAILQ_INSERT_TAIL(&hmp->flush_list, hmp->flusher_demark, flush_entry); + + while ((ip = TAILQ_FIRST(&hmp->flush_list)) != hmp->flusher_demark) { TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry); /* @@ -162,7 +172,7 @@ hammer_flusher_flush(hammer_mount_t hmp) ip->error = hammer_sync_inode(ip, (ip->vp ? 0 : 1)); hammer_flush_inode_done(ip); if (hmp->locked_dirty_count > 64 || - hammer_must_finalize_undo(root_volume)) { + hammer_must_finalize_undo(hmp)) { hammer_flusher_finalize(hmp, root_volume, start_offset); start_offset = rootmap->next_offset; } @@ -178,13 +188,13 @@ hammer_flusher_flush(hammer_mount_t hmp) */ static int -hammer_must_finalize_undo(hammer_volume_t root_volume) +hammer_must_finalize_undo(hammer_mount_t hmp) { hammer_blockmap_t rootmap; int bytes; int max_bytes; - rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; + rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; if (rootmap->first_offset <= rootmap->next_offset) { bytes = (int)(rootmap->next_offset - rootmap->first_offset); @@ -257,11 +267,25 @@ hammer_flusher_finalize(hammer_mount_t hmp, hammer_volume_t root_volume, /* * Update the volume header */ - rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; + rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; if (rootmap->first_offset != start_offset) { hammer_modify_volume(NULL, root_volume, NULL, 0); rootmap->first_offset = start_offset; hammer_modify_volume_done(root_volume); + } + if (root_volume->ondisk->vol0_next_tid != hmp->next_tid) { + hammer_modify_volume(NULL, root_volume, NULL, 0); + root_volume->ondisk->vol0_next_tid = hmp->next_tid; + hammer_modify_volume_done(root_volume); + } + + /* + * Sync our cached blockmap array with the one in the root + * volume header. + */ + if (root_volume->io.modified) { + bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap, + sizeof(hmp->blockmap)); hammer_io_flush(&root_volume->io); } diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c index cff1bf31ca..f3b7157351 100644 --- a/sys/vfs/hammer/hammer_freemap.c +++ b/sys/vfs/hammer/hammer_freemap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.6 2008/04/25 21:49:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.7 2008/04/29 01:10:37 dillon Exp $ */ /* @@ -65,7 +65,7 @@ hammer_freemap_alloc(hammer_transaction_t trans, hammer_off_t owner, *errorp = 0; ondisk = trans->rootvol->ondisk; - blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; + blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; result_offset = blockmap->next_offset; vol_no = HAMMER_VOL_DECODE(result_offset); for (;;) { @@ -126,8 +126,7 @@ new_volume: } } kprintf("hammer_freemap_alloc %016llx\n", result_offset); - hammer_modify_volume(trans, trans->rootvol, - blockmap, sizeof(*blockmap)); + hammer_modify_volume(trans, trans->rootvol, NULL, 0); blockmap->next_offset = result_offset + HAMMER_LARGEBLOCK_SIZE; hammer_modify_volume_done(trans->rootvol); done: @@ -159,7 +158,7 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, *errorp = 0; ondisk = trans->rootvol->ondisk; - blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX]; + blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; layer1_offset = blockmap->phys_offset + HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1); diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index eeb44b46dc..097f3fcfc0 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.42 2008/04/27 21:07:15 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.43 2008/04/29 01:10:37 dillon Exp $ */ #include "hammer.h" @@ -321,7 +321,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, hmp = trans->hmp; ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); ++hammer_count_inodes; - ip->obj_id = hammer_alloc_tid(trans); + ip->obj_id = hammer_alloc_objid(trans, dip); KKASSERT(ip->obj_id != 0); ip->obj_asof = hmp->asof; ip->hmp = hmp; @@ -657,6 +657,8 @@ hammer_unload_inode(struct hammer_inode *ip) hammer_uncache_node(&ip->cache[0]); hammer_uncache_node(&ip->cache[1]); + if (ip->objid_cache) + hammer_clear_objid(ip); --hammer_count_inodes; kfree(ip, M_HAMMER); @@ -870,6 +872,12 @@ hammer_flush_inode_done(hammer_inode_t ip) if (ip->flags & HAMMER_INODE_REFLUSH) { ip->flags &= ~HAMMER_INODE_REFLUSH; hammer_flush_inode(ip, 0); + if (ip->flush_state == HAMMER_FST_IDLE) { + if (ip->flags & HAMMER_INODE_FLUSHW) { + ip->flags &= ~HAMMER_INODE_FLUSHW; + wakeup(&ip->flags); + } + } } else { if (ip->flags & HAMMER_INODE_FLUSHW) { ip->flags &= ~HAMMER_INODE_FLUSHW; diff --git a/sys/vfs/hammer/hammer_recover.c b/sys/vfs/hammer/hammer_recover.c index 81126d86f5..f0e198f745 100644 --- a/sys/vfs/hammer/hammer_recover.c +++ b/sys/vfs/hammer/hammer_recover.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.12 2008/04/26 19:08:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.13 2008/04/29 01:10:37 dillon Exp $ */ #include "hammer.h" @@ -48,6 +48,10 @@ static int hammer_recover_undo(hammer_mount_t hmp, hammer_fifo_undo_t undo, /* * Recover a filesystem on mount + * + * NOTE: No information from the root volume has been cached in the + * hammer_mount structure yet, so we need to access the root volume's + * buffer directly. */ int hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume) @@ -63,6 +67,9 @@ hammer_recover(hammer_mount_t hmp, hammer_volume_t root_volume) /* * Examine the UNDO FIFO. If it is empty the filesystem is clean * and no action need be taken. + * + * NOTE: hmp->blockmap has not been initialized yet so use the + * root volume's ondisk buffer directly. */ rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; if (rootmap->first_offset == rootmap->next_offset) diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index 1369c0d048..4aebba6709 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,11 +31,14 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.13 2008/04/25 21:49:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.14 2008/04/29 01:10:37 dillon Exp $ */ #include "hammer.h" +static hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans, int count); + + /* * Start a standard transaction. */ @@ -50,7 +53,7 @@ hammer_start_transaction(struct hammer_transaction *trans, trans->rootvol = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); trans->tid = 0; - trans->time = hammer_alloc_tid(trans); + trans->time = hammer_alloc_tid(trans, 1); } /* @@ -67,7 +70,7 @@ hammer_simple_transaction(struct hammer_transaction *trans, trans->rootvol = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); trans->tid = 0; - trans->time = hammer_alloc_tid(trans); + trans->time = hammer_alloc_tid(trans, 1); } /* @@ -84,7 +87,7 @@ hammer_start_transaction_fls(struct hammer_transaction *trans, trans->hmp = hmp; trans->rootvol = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); - trans->tid = hammer_alloc_tid(trans); + trans->tid = hammer_alloc_tid(trans, 1); trans->time = trans->tid; } @@ -100,8 +103,8 @@ hammer_done_transaction(struct hammer_transaction *trans) * B-Tree code can make a separator that does not match either the * left or right hand sides. */ -hammer_tid_t -hammer_alloc_tid(hammer_transaction_t trans) +static hammer_tid_t +hammer_alloc_tid(hammer_transaction_t trans, int count) { struct timespec ts; hammer_tid_t tid; @@ -110,23 +113,82 @@ hammer_alloc_tid(hammer_transaction_t trans) tid = ts.tv_sec * 1000000000LL + ts.tv_nsec; if (tid < trans->hmp->next_tid) tid = trans->hmp->next_tid; -#if 0 - hammer_modify_volume(trans, trans->rootvol, NULL, 0); - ondisk = trans->rootvol->ondisk; - if (tid < ondisk->vol0_next_tid) - tid = ondisk->vol0_next_tid; -#endif - if (tid >= 0xFFFFFFFFFFFFFFF0ULL) + if (tid >= 0xFFFFFFFFFFFFF000ULL) panic("hammer_start_transaction: Ran out of TIDs!"); + trans->hmp->next_tid = tid + count * 2; if (hammer_debug_tid) { kprintf("alloc_tid %016llx (0x%08x)\n", tid, (int)(tid / 1000000000LL)); } -#if 0 - ondisk->vol0_next_tid = tid + 2; - hammer_modify_volume_done(trans->rootvol); -#endif - trans->hmp->next_tid = tid + 2; return(tid); } +/* + * Allocate an object id + */ +hammer_tid_t +hammer_alloc_objid(hammer_transaction_t trans, hammer_inode_t dip) +{ + hammer_objid_cache_t ocp; + hammer_tid_t tid; + + while ((ocp = dip->objid_cache) == NULL) { + if (trans->hmp->objid_cache_count < OBJID_CACHE_SIZE) { + ocp = kmalloc(sizeof(*ocp), M_HAMMER, M_WAITOK|M_ZERO); + ocp->next_tid = hammer_alloc_tid(trans, + OBJID_CACHE_BULK); + ocp->count = OBJID_CACHE_BULK; + TAILQ_INSERT_HEAD(&trans->hmp->objid_cache_list, ocp, + entry); + ++trans->hmp->objid_cache_count; + /* may have blocked, recheck */ + if (dip->objid_cache == NULL) { + dip->objid_cache = ocp; + ocp->dip = dip; + } + } else { + ocp = TAILQ_FIRST(&trans->hmp->objid_cache_list); + if (ocp->dip) + ocp->dip->objid_cache = NULL; + dip->objid_cache = ocp; + ocp->dip = dip; + } + } + TAILQ_REMOVE(&trans->hmp->objid_cache_list, ocp, entry); + tid = ocp->next_tid; + ocp->next_tid += 2; + if (--ocp->count == 0) { + dip->objid_cache = NULL; + --trans->hmp->objid_cache_count; + ocp->dip = NULL; + kfree(ocp, M_HAMMER); + } else { + TAILQ_INSERT_TAIL(&trans->hmp->objid_cache_list, ocp, entry); + } + return(tid); +} + +void +hammer_clear_objid(hammer_inode_t dip) +{ + hammer_objid_cache_t ocp; + + if ((ocp = dip->objid_cache) != NULL) { + dip->objid_cache = NULL; + ocp->dip = NULL; + TAILQ_REMOVE(&dip->hmp->objid_cache_list, ocp, entry); + TAILQ_INSERT_HEAD(&dip->hmp->objid_cache_list, ocp, entry); + } +} + +void +hammer_destroy_objid_cache(hammer_mount_t hmp) +{ + hammer_objid_cache_t ocp; + + while ((ocp = TAILQ_FIRST(&hmp->objid_cache_list)) != NULL) { + TAILQ_REMOVE(&hmp->objid_cache_list, ocp, entry); + kfree(ocp, M_HAMMER); + } +} + diff --git a/sys/vfs/hammer/hammer_undo.c b/sys/vfs/hammer/hammer_undo.c index f070fc036c..012e599922 100644 --- a/sys/vfs/hammer/hammer_undo.c +++ b/sys/vfs/hammer/hammer_undo.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.6 2008/04/26 02:54:00 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_undo.c,v 1.7 2008/04/29 01:10:37 dillon Exp $ */ /* @@ -56,7 +56,7 @@ hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t zone3_off, int *errorp) root_volume = hammer_get_root_volume(hmp, errorp); if (*errorp) return(0); - undomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; + undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; KKASSERT(HAMMER_ZONE_DECODE(undomap->alloc_offset) == HAMMER_ZONE_UNDO_INDEX); KKASSERT (zone3_off < undomap->alloc_offset); @@ -92,7 +92,7 @@ hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io, root_volume = trans->rootvol; ondisk = root_volume->ondisk; - undomap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]; + undomap = &trans->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX]; /* no undo recursion */ hammer_modify_volume(NULL, root_volume, NULL, 0); diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 59c26a7c32..50f16f6302 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.29 2008/04/27 00:45:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.30 2008/04/29 01:10:37 dillon Exp $ */ #include @@ -197,14 +197,19 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, hmp->sync_lock.refs = 1; TAILQ_INIT(&hmp->flush_list); - + TAILQ_INIT(&hmp->objid_cache_list); + + /* + * Set default zone limits. This value can be reduced + * further by the zone limit specified in the root volume. + * + * The sysctl can force a small zone limit for debugging + * purposes. + */ for (i = 0; i < HAMMER_MAX_ZONES; ++i) { hmp->zone_limits[i] = HAMMER_ZONE_ENCODE(i, HAMMER_ZONE_LIMIT); - /* - * Sysctl override for debugging (force the zone - * the cycle more quickly then every 2^60 bytes). - */ + if (hammer_zone_limit) { hmp->zone_limits[i] = HAMMER_ZONE_ENCODE(i, hammer_zone_limit); @@ -302,8 +307,12 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, goto failed; /* - * Perform any necessary UNDO operations + * Perform any necessary UNDO operations. The recover code does + * call hammer_undo_lookup() so we have to pre-cache the blockmap, + * and then re-copy it again after recovery is complete. */ + bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, + sizeof(hmp->blockmap)); error = hammer_recover(hmp, rootvol); if (error) { kprintf("Failed to recover HAMMER filesystem on mount\n"); @@ -321,8 +330,28 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, mp->mnt_stat.f_fsid.val[1] = crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8); + /* + * Certain often-modified fields in the root volume are cached in + * the hammer_mount structure so we do not have to generate lots + * of little UNDO structures for them. + */ hmp->next_tid = rootvol->ondisk->vol0_next_tid; - kprintf("on-disk next_tid %016llx\n", hmp->next_tid); + bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, + sizeof(hmp->blockmap)); + + /* + * Use the zone limit set by newfs_hammer, or the zone limit set by + * sysctl (for debugging), whichever is smaller. + */ + if (rootvol->ondisk->vol0_zone_limit) { + hammer_off_t vol0_zone_limit; + + vol0_zone_limit = rootvol->ondisk->vol0_zone_limit; + for (i = 0; i < HAMMER_MAX_ZONES; ++i) { + if (hmp->zone_limits[i] > vol0_zone_limit) + hmp->zone_limits[i] = vol0_zone_limit; + } + } hammer_flusher_create(hmp); @@ -424,6 +453,7 @@ hammer_free_hmp(struct mount *mp) mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; hmp->mp = NULL; + hammer_destroy_objid_cache(hmp); kfree(hmp->zbuf, M_HAMMER); lockuninit(&hmp->blockmap_lock); @@ -511,11 +541,21 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) return(0); } +/* + * Sync the filesystem. Currently we have to run it twice, the second + * one will advance the undo start index to the end index, so if a crash + * occurs no undos will be run on mount. + */ static int hammer_vfs_sync(struct mount *mp, int waitfor) { struct hammer_mount *hmp = (void *)mp->mnt_data; - return(hammer_sync_hmp(hmp, waitfor)); + int error; + + error = hammer_sync_hmp(hmp, waitfor); + if (error == 0) + error = hammer_sync_hmp(hmp, waitfor); + return (error); } /* -- 2.41.0