From 4e17f4657577ceed5625f8c8ef5e1d0885645ead Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 3 May 2008 05:28:55 +0000 Subject: [PATCH] HAMMER 40D/Many: Inode/link-count sequencer cleanup pass. * Move the vfsync from the frontend to the backend. This allows the frontend to passively move inodes to the backend without having to actually start the flush, greatly improving performance. * Use an inode lock to deal with directory entry syncing races between the frontend and the backend. It isn't optimal but it's ok for now. * Massively optimize the backend code by initializing a single cursor for an inode and passing the cursor to procedures, instead of having each procedure initialize its own cursor. * Fix a sequencing issue with the backend. While building the flush state for an inode another process could get in and initiate its own flush, screwing up the flush group and creating confusion. (hmp->flusher_lock) * Don't lose track of HAMMER_FLUSH_SIGNAL flush requests. If we get such a requet but have to flag a reflush, also flag that the reflush is to be signaled (done immediately when the current flush is done). * Remove shared inode locks from hammer_vnops.c. Their original purpose no longer exists. * Simplify the arguments passed to numerous procedures (hammer_ip_first(), etc). --- sys/vfs/hammer/hammer.h | 30 +-- sys/vfs/hammer/hammer_blockmap.c | 10 +- sys/vfs/hammer/hammer_cursor.c | 74 +++++- sys/vfs/hammer/hammer_cursor.h | 17 +- sys/vfs/hammer/hammer_flusher.c | 5 +- sys/vfs/hammer/hammer_freemap.c | 5 +- sys/vfs/hammer/hammer_inode.c | 325 +++++++++++++++---------- sys/vfs/hammer/hammer_ioctl.c | 6 +- sys/vfs/hammer/hammer_object.c | 402 ++++++++++++++++--------------- sys/vfs/hammer/hammer_reblock.c | 4 +- sys/vfs/hammer/hammer_vnops.c | 144 ++++------- 11 files changed, 564 insertions(+), 458 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index d00748a29f..6cb7ec7deb 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.56 2008/05/02 06:51:57 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.57 2008/05/03 05:28:55 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -251,6 +251,7 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_INODE_TRUNCATED 0x00010000 #define HAMMER_INODE_DELETING 0x00020000 /* inode delete request (frontend)*/ +#define HAMMER_INODE_RESIGNAL 0x00040000 /* re-signal on re-flush */ #define HAMMER_INODE_MODMASK (HAMMER_INODE_DDIRTY|HAMMER_INODE_RDIRTY| \ HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS| \ @@ -263,8 +264,7 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_MAX_INODE_CURSORS 4 #define HAMMER_FLUSH_SIGNAL 0x0001 -#define HAMMER_FLUSH_FORCE 0x0002 -#define HAMMER_FLUSH_RECURSION 0x0004 +#define HAMMER_FLUSH_RECURSION 0x0002 /* * Structure used to represent an unsynchronized record in-memory. These @@ -522,6 +522,7 @@ struct hammer_mount { int flusher_act; /* currently active flush group */ int flusher_done; /* set to act when complete */ int flusher_next; /* next flush group */ + int flusher_lock; /* lock sequencing of the next flush */ int flusher_exiting; int reclaim_count; thread_t flusher_td; @@ -598,7 +599,7 @@ int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused); int hammer_install_volume(hammer_mount_t hmp, const char *volname); int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip); -int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip); +int hammer_ip_first(hammer_cursor_t cursor); int hammer_ip_next(hammer_cursor_t cursor); int hammer_ip_resolve_record_and_data(hammer_cursor_t cursor); int hammer_ip_resolve_data(hammer_cursor_t cursor); @@ -645,8 +646,9 @@ u_int8_t hammer_get_obj_type(enum vtype vtype); int64_t hammer_directory_namekey(void *name, int len); int hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, - struct hammer_node **cache); - + struct hammer_node **cache, hammer_inode_t ip); +int hammer_reinit_cursor(hammer_cursor_t cursor); +void hammer_normalize_cursor(hammer_cursor_t cursor); void hammer_done_cursor(hammer_cursor_t cursor); void hammer_mem_done(hammer_cursor_t cursor); @@ -676,7 +678,7 @@ void *hammer_bnew(struct hammer_mount *hmp, hammer_off_t off, int *errorp, struct hammer_buffer **bufferp); hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp); -int hammer_dowrite(hammer_transaction_t trans, hammer_inode_t ip, +int hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio); hammer_volume_t hammer_get_volume(hammer_mount_t hmp, @@ -772,14 +774,14 @@ int hammer_ip_del_directory(struct hammer_transaction *trans, hammer_inode_t ip); int hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record); -int hammer_ip_delete_range(struct hammer_transaction *trans, - hammer_inode_t ip, int64_t ran_beg, int64_t ran_end); -int hammer_ip_delete_range_all(struct hammer_transaction *trans, - hammer_inode_t ip, int *countp); -int hammer_ip_sync_data(struct hammer_transaction *trans, - hammer_inode_t ip, int64_t offset, - void *data, int bytes); +int hammer_ip_delete_range(hammer_cursor_t cursor, hammer_inode_t ip, + int64_t ran_beg, int64_t ran_end); +int hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip, + int *countp); +int hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip, + int64_t offset, void *data, int bytes); int hammer_ip_sync_record(hammer_transaction_t trans, hammer_record_t rec); +int hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t rec); int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, struct ucred *cred); diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c index 35f8535999..a8769d17e0 100644 --- a/sys/vfs/hammer/hammer_blockmap.c +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.9 2008/04/29 01:10:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.10 2008/05/03 05:28:55 dillon Exp $ */ /* @@ -219,9 +219,15 @@ again: } } else { /* - * We are appending within a bigblock. + * We are appending within a bigblock. It is possible that + * the blockmap has been marked completely free via a prior + * pruning operation. We no longer reset the append index + * for that case because it compromises the UNDO by allowing + * data overwrites. */ + /* KKASSERT(layer2->u.phys_offset != HAMMER_BLOCKMAP_FREE); + */ } hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2)); diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index f97320f0ad..0edcca81e9 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.22 2008/04/24 21:20:33 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.23 2008/05/03 05:28:55 dillon Exp $ */ /* @@ -47,7 +47,7 @@ static int hammer_load_cursor_parent(hammer_cursor_t cursor); */ int hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, - struct hammer_node **cache) + struct hammer_node **cache, hammer_inode_t ip) { hammer_volume_t volume; hammer_node_t node; @@ -57,6 +57,18 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, cursor->trans = trans; + /* + * If the cursor operation is on behalf of an inode, lock + * the inode. + */ + if ((cursor->ip = ip) != NULL) { + ++ip->cursor_ip_refs; + if (trans->type == HAMMER_TRANS_FLS) + hammer_lock_ex(&ip->lock); + else + hammer_lock_sh(&ip->lock); + } + /* * Step 1 - acquire a locked node from the cache if possible */ @@ -106,6 +118,38 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, return(error); } +#if 0 +int +hammer_reinit_cursor(hammer_cursor_t cursor) +{ + hammer_transaction_t trans; + hammer_inode_t ip; + struct hammer_node **cache; + + trans = cursor->trans; + ip = cursor->ip; + hammer_done_cursor(cursor); + cache = ip ? &ip->cache[0] : NULL; + error = hammer_init_cursor(trans, cursor, cache, ip); + return (error); +} + +#endif + +/* + * Normalize a cursor. Sometimes cursors can be left in a state + * where node is NULL. If the cursor is in this state, cursor up. + */ +void +hammer_normalize_cursor(hammer_cursor_t cursor) +{ + if (cursor->node == NULL) { + KKASSERT(cursor->parent != NULL); + hammer_cursor_up(cursor); + } +} + + /* * We are finished with a cursor. We NULL out various fields as sanity * check, in case the structure is inappropriately used afterwords. @@ -113,6 +157,8 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor, void hammer_done_cursor(hammer_cursor_t cursor) { + hammer_inode_t ip; + if (cursor->parent) { hammer_unlock(&cursor->parent->lock); hammer_rel_node(cursor->parent); @@ -131,8 +177,14 @@ hammer_done_cursor(hammer_cursor_t cursor) hammer_rel_buffer(cursor->record_buffer, 0); cursor->record_buffer = NULL; } - if (cursor->ip) + if ((ip = cursor->ip) != NULL) { hammer_mem_done(cursor); + KKASSERT(ip->cursor_ip_refs > 0); + --ip->cursor_ip_refs; + hammer_unlock(&ip->lock); + cursor->ip = NULL; + } + /* * If we deadlocked this node will be referenced. Do a quick @@ -297,8 +349,14 @@ hammer_cursor_up(hammer_cursor_t cursor) hammer_cursor_downgrade(cursor); /* - * Set the node to its parent. If the parent is NULL we are at - * the root of the filesystem and return ENOENT. + * If the parent is NULL we are at the root of the B-Tree and + * return ENOENT. + */ + if (cursor->parent == NULL) + return (ENOENT); + + /* + * Set the node to its parent. */ hammer_unlock(&cursor->node->lock); hammer_rel_node(cursor->node); @@ -307,11 +365,7 @@ hammer_cursor_up(hammer_cursor_t cursor) cursor->parent = NULL; cursor->parent_index = 0; - if (cursor->node == NULL) { - error = ENOENT; - } else { - error = hammer_load_cursor_parent(cursor); - } + error = hammer_load_cursor_parent(cursor); return(error); } diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 3dac4cf80d..1f843b4539 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.16 2008/05/02 01:00:42 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.17 2008/05/03 05:28:55 dillon Exp $ */ /* @@ -53,14 +53,14 @@ struct hammer_cursor { * indices. */ hammer_transaction_t trans; - hammer_node_t parent; - int parent_index; + hammer_node_t parent; + int parent_index; - hammer_node_t node; - int index; + hammer_node_t node; + int index; /* - * Set of a deadlock occurs. hammer_done_cursor() will block on + * Set if a deadlock occurs. hammer_done_cursor() will block on * this after releasing parent and node, before returning. */ hammer_node_t deadlk_node; @@ -130,3 +130,8 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_ASOF 0x4000 /* as-of lookup */ #define HAMMER_CURSOR_CREATE_CHECK 0x8000 /* as-of lookup */ +/* + * Flags we can clear when reusing a cursor (we can clear all of them) + */ +#define HAMMER_CURSOR_INITMASK (~0) + diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index b777579f3c..d75c7e313f 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.9 2008/05/02 01:00:42 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.10 2008/05/03 05:28:55 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -106,6 +106,8 @@ hammer_flusher_thread(void *arg) hammer_mount_t hmp = arg; for (;;) { + while (hmp->flusher_lock) + tsleep(&hmp->flusher_lock, 0, "hmrhld", 0); hmp->flusher_act = hmp->flusher_next; ++hmp->flusher_next; kprintf("F"); @@ -183,7 +185,6 @@ hammer_flusher_flush(hammer_mount_t hmp) * its reference, sync, and clean-up. */ TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry); - kprintf("s"); ip->error = hammer_sync_inode(ip); hammer_flush_inode_done(ip); diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c index f3b7157351..f15b3521a9 100644 --- a/sys/vfs/hammer/hammer_freemap.c +++ b/sys/vfs/hammer/hammer_freemap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.7 2008/04/29 01:10:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.8 2008/05/03 05:28:55 dillon Exp $ */ /* @@ -125,7 +125,6 @@ new_volume: } } } - kprintf("hammer_freemap_alloc %016llx\n", result_offset); hammer_modify_volume(trans, trans->rootvol, NULL, 0); blockmap->next_offset = result_offset + HAMMER_LARGEBLOCK_SIZE; hammer_modify_volume_done(trans->rootvol); @@ -152,8 +151,6 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, KKASSERT((phys_offset & HAMMER_LARGEBLOCK_MASK64) == 0); - kprintf("hammer_freemap_free %016llx\n", phys_offset); - hammer_uncache_buffer(trans->hmp, phys_offset); *errorp = 0; ondisk = trans->rootvol->ondisk; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 0cfb32a2db..a26d9889af 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.46 2008/05/02 06:51:57 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.47 2008/05/03 05:28:55 dillon Exp $ */ #include "hammer.h" @@ -42,7 +42,7 @@ static int hammer_unload_inode(struct hammer_inode *ip); static void hammer_flush_inode_core(hammer_inode_t ip, int flags); static int hammer_setup_child_callback(hammer_record_t rec, void *data); -static int hammer_inode_unloadable_check(hammer_inode_t ip); +static void hammer_inode_unloadable_check(hammer_inode_t ip); static int hammer_setup_parent_inodes(hammer_record_t record); /* @@ -69,7 +69,10 @@ hammer_vop_inactive(struct vop_inactive_args *ap) * fairly clean, try to recycle it immediately. This can deadlock * in vfsync() if we aren't careful. */ - if (hammer_inode_unloadable_check(ip) && ip->ino_rec.ino_nlinks == 0) + hammer_inode_unloadable_check(ip); + if (ip->flags & HAMMER_INODE_MODMASK) + hammer_flush_inode(ip, 0); + else if (ip->ino_rec.ino_nlinks == 0) vrecycle(ap->a_vp); return(0); } @@ -227,7 +230,7 @@ loop: * Locate the on-disk inode. */ retry: - hammer_init_cursor(trans, &cursor, cache); + hammer_init_cursor(trans, &cursor, cache, NULL); cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.key = 0; cursor.key_beg.create_tid = 0; @@ -379,9 +382,9 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, * Called by hammer_sync_inode(). */ static int -hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip) +hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip) { - struct hammer_cursor cursor; + hammer_transaction_t trans = cursor->trans; hammer_record_t record; int error; @@ -397,25 +400,26 @@ retry: */ if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) == HAMMER_INODE_ONDISK) { - hammer_init_cursor(trans, &cursor, &ip->cache[0]); - cursor.key_beg.obj_id = ip->obj_id; - cursor.key_beg.key = 0; - cursor.key_beg.create_tid = 0; - cursor.key_beg.delete_tid = 0; - cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; - cursor.key_beg.obj_type = 0; - cursor.asof = ip->obj_asof; - cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF; - cursor.flags |= HAMMER_CURSOR_BACKEND; - - error = hammer_btree_lookup(&cursor); + hammer_normalize_cursor(cursor); + cursor->key_beg.obj_id = ip->obj_id; + cursor->key_beg.key = 0; + cursor->key_beg.create_tid = 0; + cursor->key_beg.delete_tid = 0; + cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE; + cursor->key_beg.obj_type = 0; + cursor->asof = ip->obj_asof; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF; + cursor->flags |= HAMMER_CURSOR_BACKEND; + + error = hammer_btree_lookup(cursor); if (error) { kprintf("error %d\n", error); Debugger("hammer_update_inode"); } if (error == 0) { - error = hammer_ip_delete_record(&cursor, trans->tid); + error = hammer_ip_delete_record(cursor, trans->tid); if (error && error != EDEADLK) { kprintf("error %d\n", error); Debugger("hammer_update_inode2"); @@ -423,11 +427,15 @@ retry: if (error == 0) { ip->flags |= HAMMER_INODE_DELONDISK; } - hammer_cache_node(cursor.node, &ip->cache[0]); + hammer_cache_node(cursor->node, &ip->cache[0]); + } + if (error == EDEADLK) { + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, + &ip->cache[0], ip); + if (error == 0) + goto retry; } - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; } /* @@ -449,7 +457,16 @@ retry: record->rec.inode.base.data_len = sizeof(ip->sync_ino_data); record->data = (void *)&ip->sync_ino_data; record->flags |= HAMMER_RECF_INTERLOCK_BE; - error = hammer_ip_sync_record(trans, record); + for (;;) { + error = hammer_ip_sync_record_cursor(cursor, record); + if (error != EDEADLK) + break; + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, + &ip->cache[0], ip); + if (error) + break; + } if (error) { kprintf("error %d\n", error); Debugger("hammer_update_inode3"); @@ -503,9 +520,9 @@ retry: * record is updated in-place on the disk. */ static int -hammer_update_itimes(hammer_transaction_t trans, hammer_inode_t ip) +hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip) { - struct hammer_cursor cursor; + hammer_transaction_t trans = cursor->trans; struct hammer_inode_record *rec; int error; @@ -513,18 +530,19 @@ retry: error = 0; if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) == HAMMER_INODE_ONDISK) { - hammer_init_cursor(trans, &cursor, &ip->cache[0]); - cursor.key_beg.obj_id = ip->obj_id; - cursor.key_beg.key = 0; - cursor.key_beg.create_tid = 0; - cursor.key_beg.delete_tid = 0; - cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; - cursor.key_beg.obj_type = 0; - cursor.asof = ip->obj_asof; - cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF; - cursor.flags |= HAMMER_CURSOR_BACKEND; - - error = hammer_btree_lookup(&cursor); + hammer_normalize_cursor(cursor); + cursor->key_beg.obj_id = ip->obj_id; + cursor->key_beg.key = 0; + cursor->key_beg.create_tid = 0; + cursor->key_beg.delete_tid = 0; + cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE; + cursor->key_beg.obj_type = 0; + cursor->asof = ip->obj_asof; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF; + cursor->flags |= HAMMER_CURSOR_BACKEND; + + error = hammer_btree_lookup(cursor); if (error) { kprintf("error %d\n", error); Debugger("hammer_update_itimes1"); @@ -534,19 +552,23 @@ retry: * Do not generate UNDO records for atime/mtime * updates. */ - rec = &cursor.record->inode; - hammer_modify_buffer(cursor.trans, cursor.record_buffer, + rec = &cursor->record->inode; + hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0); rec->ino_atime = ip->sync_ino_rec.ino_atime; rec->ino_mtime = ip->sync_ino_rec.ino_mtime; - hammer_modify_buffer_done(cursor.record_buffer); + hammer_modify_buffer_done(cursor->record_buffer); ip->sync_flags &= ~HAMMER_INODE_ITIMES; /* XXX recalculate crc */ - hammer_cache_node(cursor.node, &ip->cache[0]); + hammer_cache_node(cursor->node, &ip->cache[0]); + } + if (error == EDEADLK) { + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, + &ip->cache[0], ip); + if (error == 0) + goto retry; } - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; } return(error); } @@ -571,25 +593,17 @@ hammer_rel_inode(struct hammer_inode *ip, int flush) * Determine whether on-disk action is needed for * the inode's final disposition. */ - if (hammer_inode_unloadable_check(ip)) { + hammer_inode_unloadable_check(ip); + if (ip->flags & HAMMER_INODE_MODMASK) { + hammer_flush_inode(ip, 0); + } else if (ip->lock.refs == 1) { hammer_unload_inode(ip); break; } - hammer_flush_inode(ip, 0); } else { - /* - * We gotta flush inodes which do not have vnode - * associations. - */ -#if 0 - if (ip->vp == NULL) { - kprintf("v%d:%04x\n", ip->flush_state, ip->flags); + if (flush) hammer_flush_inode(ip, 0); - } else -#endif - if (flush) { - hammer_flush_inode(ip, 0); - } + /* * The inode still has multiple refs, try to drop * one ref. @@ -654,7 +668,7 @@ hammer_unload_inode(struct hammer_inode *ip) * HAMMER_INODE_RDIRTY: Inode record has been updated * HAMMER_INODE_DDIRTY: Inode data has been updated * HAMMER_INODE_XDIRTY: Dirty in-memory records - * HAMMER_INODE_BUFS: Dirty front-end buffer cache buffers + * HAMMER_INODE_BUFS: Dirty buffer cache buffers * HAMMER_INODE_DELETED: Inode record/data must be deleted * HAMMER_INODE_ITIMES: mtime/atime has been updated */ @@ -689,8 +703,7 @@ hammer_flush_inode(hammer_inode_t ip, int flags) * state we have to put it back into an IDLE state so we can * drop the extra ref. */ - if ((ip->flags & HAMMER_INODE_MODMASK) == 0 && - (flags & HAMMER_FLUSH_FORCE) == 0) { + if ((ip->flags & HAMMER_INODE_MODMASK) == 0) { if (ip->flush_state == HAMMER_FST_SETUP) { ip->flush_state = HAMMER_FST_IDLE; hammer_rel_inode(ip, 0); @@ -732,11 +745,14 @@ hammer_flush_inode(hammer_inode_t ip, int flags) * We can continue if good >= 0. Determine how many records * under our inode can be flushed (and mark them). */ - kprintf("g%d", good); if (good >= 0) { hammer_flush_inode_core(ip, flags); } else { ip->flags |= HAMMER_INODE_REFLUSH; + if (flags & HAMMER_FLUSH_SIGNAL) { + ip->flags |= HAMMER_INODE_RESIGNAL; + hammer_flusher_async(ip->hmp); + } } break; default: @@ -746,6 +762,10 @@ hammer_flush_inode(hammer_inode_t ip, int flags) */ if ((ip->flags & HAMMER_INODE_REFLUSH) == 0) ip->flags |= HAMMER_INODE_REFLUSH; + if (flags & HAMMER_FLUSH_SIGNAL) { + ip->flags |= HAMMER_INODE_RESIGNAL; + hammer_flusher_async(ip->hmp); + } break; } } @@ -874,13 +894,18 @@ static void hammer_flush_inode_core(hammer_inode_t ip, int flags) { int go_count; - int error; + /* + * Set flush state and prevent the flusher from cycling into + * the next flush group. Do not place the ip on the list yet. + * Inodes not in the idle state get an extra reference. + */ KKASSERT(ip->flush_state != HAMMER_FST_FLUSH); if (ip->flush_state == HAMMER_FST_IDLE) hammer_ref(&ip->lock); ip->flush_state = HAMMER_FST_FLUSH; ip->flush_group = ip->hmp->flusher_next; + ++ip->hmp->flusher_lock; /* * Figure out how many in-memory records we can actually flush @@ -903,18 +928,26 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) { ip->flags |= HAMMER_INODE_REFLUSH; ip->flush_state = HAMMER_FST_SETUP; + if (flags & HAMMER_FLUSH_SIGNAL) { + ip->flags |= HAMMER_INODE_RESIGNAL; + hammer_flusher_async(ip->hmp); + } + if (--ip->hmp->flusher_lock == 0) + wakeup(&ip->hmp->flusher_lock); return; } } +#if 0 /* - * Inodes not in an IDLE state get an extra reference. + * XXX - don't sync the buffer cache on the frontend, the backend + * will do it and we do not want to prematurely activate the backend. * - * Place the inode in a flush state and sync all frontend - * information to the backend. + * Sync the buffer cache if the caller wants to flush now, otherwise + * don't (any write bios will wake up the flusher). */ - - if ((flags & HAMMER_FLUSH_RECURSION) == 0) { + if ((flags & HAMMER_FLUSH_RECURSION) == 0 && + (flags & HAMMER_FLUSH_SIGNAL)) { if (ip->vp != NULL) error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL); else @@ -926,6 +959,7 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) * bioq. */ ip->flags |= HAMMER_INODE_WRITE_ALT; +#endif /* * Snapshot the state of the inode for the backend flusher. @@ -941,25 +975,14 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) ip->sync_trunc_off = ip->trunc_off; ip->sync_ino_rec = ip->ino_rec; ip->sync_ino_data = ip->ino_data; - ip->flags &= ~HAMMER_INODE_MODMASK | - HAMMER_INODE_TRUNCATED | HAMMER_INODE_BUFS; - - /* - * Fix up the dirty buffer status. - */ - if (ip->vp == NULL || RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL) { - if (TAILQ_FIRST(&ip->bio_alt_list) == NULL) - ip->flags &= ~HAMMER_INODE_BUFS; - } - if (TAILQ_FIRST(&ip->bio_list)) - ip->sync_flags |= HAMMER_INODE_BUFS; - else - ip->sync_flags &= ~HAMMER_INODE_BUFS; + ip->flags &= ~HAMMER_INODE_MODMASK | HAMMER_INODE_TRUNCATED; /* - * The flusher inherits our inode and reference. + * The flusher list inherits our inode and reference. */ TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry); + if (--ip->hmp->flusher_lock == 0) + wakeup(&ip->hmp->flusher_lock); if (flags & HAMMER_FLUSH_SIGNAL) hammer_flusher_async(ip->hmp); @@ -1112,12 +1135,6 @@ hammer_flush_inode_done(hammer_inode_t ip) * Merge left-over flags back into the frontend and fix the state. */ ip->flags |= ip->sync_flags; - if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) { - ip->flush_state = HAMMER_FST_IDLE; - dorel = 1; - } else { - ip->flush_state = HAMMER_FST_SETUP; - } /* * The backend may have adjusted nlinks, so if the adjusted nlinks @@ -1133,35 +1150,60 @@ hammer_flush_inode_done(hammer_inode_t ip) while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) { TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act); TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act); - kprintf("d"); + } + /* + * Fix up the dirty buffer status. + */ + if (TAILQ_FIRST(&ip->bio_list) || + (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree))) { ip->flags |= HAMMER_INODE_BUFS; - ip->flags |= HAMMER_INODE_REFLUSH; } /* * Re-set the XDIRTY flag if some of the inode's in-memory records * could not be flushed. */ - if (RB_ROOT(&ip->rec_tree)) { + if (RB_ROOT(&ip->rec_tree)) ip->flags |= HAMMER_INODE_XDIRTY; + + /* + * Do not lose track of inodes which no longer have vnode + * assocations, otherwise they may never get flushed again. + */ + if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL) ip->flags |= HAMMER_INODE_REFLUSH; - kprintf("e"); + + /* + * Adjust flush_state. The target state (idle or setup) shouldn't + * be terribly important since we will reflush if we really need + * to do anything. XXX + */ + if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) { + ip->flush_state = HAMMER_FST_IDLE; + dorel = 1; + } else { + ip->flush_state = HAMMER_FST_SETUP; } /* * If the frontend made more changes and requested another flush, - * do it. + * then try to get it running. */ if (ip->flags & HAMMER_INODE_REFLUSH) { ip->flags &= ~HAMMER_INODE_REFLUSH; - hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); - if (ip->flush_state == HAMMER_FST_IDLE) { - if (ip->flags & HAMMER_INODE_FLUSHW) { - ip->flags &= ~HAMMER_INODE_FLUSHW; - wakeup(&ip->flags); - } + if (ip->flags & HAMMER_INODE_RESIGNAL) { + ip->flags &= ~HAMMER_INODE_RESIGNAL; + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); + } else { + hammer_flush_inode(ip, 0); } - } else { + } + + /* + * Finally, if the frontend is waiting for a flush to complete, + * wake it up. + */ + if (ip->flush_state != HAMMER_FST_FLUSH) { if (ip->flags & HAMMER_INODE_FLUSHW) { ip->flags &= ~HAMMER_INODE_FLUSHW; wakeup(&ip->flags); @@ -1178,7 +1220,8 @@ hammer_flush_inode_done(hammer_inode_t ip) static int hammer_sync_record_callback(hammer_record_t record, void *data) { - hammer_transaction_t trans = data; + hammer_cursor_t cursor = data; + hammer_transaction_t trans = cursor->trans; int error; /* @@ -1204,10 +1247,7 @@ hammer_sync_record_callback(hammer_record_t record, void *data) * record out, but the flush completion code converts it to * a delete-on-disk record instead of destroying it. */ - if (record->flags & HAMMER_RECF_INTERLOCK_BE) { - hammer_flush_record_done(record, 0); - return(0); - } + KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0); record->flags |= HAMMER_RECF_INTERLOCK_BE; /* @@ -1229,7 +1269,17 @@ hammer_sync_record_callback(hammer_record_t record, void *data) */ if (record->type != HAMMER_MEM_RECORD_DEL) record->rec.inode.base.base.create_tid = trans->tid; - error = hammer_ip_sync_record(trans, record); + for (;;) { + error = hammer_ip_sync_record_cursor(cursor, record); + if (error != EDEADLK) + break; + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, &record->ip->cache[0], + record->ip); + if (error) + break; + } + record->flags &= ~HAMMER_RECF_CONVERT_DELETE; if (error) { error = -error; @@ -1250,6 +1300,7 @@ int hammer_sync_inode(hammer_inode_t ip) { struct hammer_transaction trans; + struct hammer_cursor cursor; struct bio *bio; hammer_record_t depend; hammer_record_t next; @@ -1260,6 +1311,9 @@ hammer_sync_inode(hammer_inode_t ip) return(0); hammer_start_transaction_fls(&trans, ip->hmp); + error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip); + if (error) + goto done; /* * Any directory records referencing this inode which are not in @@ -1299,6 +1353,34 @@ hammer_sync_inode(hammer_inode_t ip) ip->sync_flags |= HAMMER_INODE_RDIRTY; } + /* + * Queue up any pending dirty buffers then set a flag to cause + * any further BIOs to go to the alternative queue. + */ + if (ip->vp) + error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL); + ip->flags |= HAMMER_INODE_WRITE_ALT; + + /* + * The buffer cache may contain dirty buffers beyond the inode + * state we copied from the frontend to the backend. Because + * we are syncing our buffer cache on the backend, resync + * the truncation point and the file size so we don't wipe out + * any data. + * + * Syncing the buffer cache on the frontend has serious problems + * because it prevents us from passively queueing dirty inodes + * to the backend (the BIO's could stall indefinitely). + */ + if (ip->flags & HAMMER_INODE_TRUNCATED) { + ip->sync_trunc_off = ip->trunc_off; + ip->sync_flags |= HAMMER_INODE_TRUNCATED; + } + if (ip->sync_ino_rec.ino_size != ip->ino_rec.ino_size) { + ip->sync_ino_rec.ino_size = ip->ino_rec.ino_size; + ip->sync_flags |= HAMMER_INODE_RDIRTY; + } + /* * If there is a trunction queued destroy any data past the (aligned) * truncation point. Userland will have dealt with the buffer @@ -1328,7 +1410,7 @@ hammer_sync_inode(hammer_inode_t ip) * while we were blocked so do not just unconditionally * set it to the maximum offset. */ - error = hammer_ip_delete_range(&trans, ip, + error = hammer_ip_delete_range(&cursor, ip, aligned_trunc_off, 0x7FFFFFFFFFFFFFFFLL); if (error) @@ -1352,7 +1434,7 @@ hammer_sync_inode(hammer_inode_t ip) */ if (error == 0) { tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, - hammer_sync_record_callback, &trans); + hammer_sync_record_callback, &cursor); if (tmp_error < 0) tmp_error = -error; if (tmp_error) @@ -1373,7 +1455,7 @@ hammer_sync_inode(hammer_inode_t ip) kprintf("Y"); ip->flags |= HAMMER_INODE_DELETED; - error = hammer_ip_delete_range_all(&trans, ip, &count1); + error = hammer_ip_delete_range_all(&cursor, ip, &count1); if (error == 0) { ip->sync_flags &= ~HAMMER_INODE_DELETING; ip->sync_flags &= ~HAMMER_INODE_TRUNCATED; @@ -1405,7 +1487,7 @@ hammer_sync_inode(hammer_inode_t ip) */ while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) { TAILQ_REMOVE(&ip->bio_list, bio, bio_act); - tmp_error = hammer_dowrite(&trans, ip, bio); + tmp_error = hammer_dowrite(&cursor, ip, bio); if (tmp_error) error = tmp_error; } @@ -1479,23 +1561,24 @@ hammer_sync_inode(hammer_inode_t ip) * If *ONLY* the ITIMES flag is set we can update the record in-place. */ if (ip->flags & HAMMER_INODE_DELETED) { - error = hammer_update_inode(&trans, ip); + error = hammer_update_inode(&cursor, ip); } else if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) { - error = hammer_update_itimes(&trans, ip); + error = hammer_update_itimes(&cursor, ip); } else if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) { - error = hammer_update_inode(&trans, ip); + error = hammer_update_inode(&cursor, ip); } if (error) Debugger("hammer_update_itimes/inode errored"); - +done: /* * Save the TID we used to sync the inode with to make sure we * do not improperly reuse it. */ + hammer_done_cursor(&cursor); hammer_done_transaction(&trans); return(error); } @@ -1508,7 +1591,7 @@ hammer_sync_inode(hammer_inode_t ip) * At this point if the inode's nlinks count is zero we want to destroy * it, which may mean destroying it on-media too. */ -static int +static void hammer_inode_unloadable_check(hammer_inode_t ip) { /* @@ -1525,14 +1608,6 @@ hammer_inode_unloadable_check(hammer_inode_t ip) ip->flags |= HAMMER_INODE_TRUNCATED; ip->trunc_off = 0; } - - /* - * If only one ref remains and the inode is not dirty, telling - * the caller that he can dispose of the inode. - */ - if (ip->lock.refs == 1 && (ip->flags & HAMMER_INODE_MODMASK) == 0) - return(1); - return(0); } void diff --git a/sys/vfs/hammer/hammer_ioctl.c b/sys/vfs/hammer/hammer_ioctl.c index b792d22e80..54ee1f5e2f 100644 --- a/sys/vfs/hammer/hammer_ioctl.c +++ b/sys/vfs/hammer/hammer_ioctl.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.10 2008/04/27 00:45:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.11 2008/05/03 05:28:55 dillon Exp $ */ #include "hammer.h" @@ -107,7 +107,7 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, return(EINVAL); retry: - error = hammer_init_cursor(trans, &cursor, NULL); + error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); return(error); @@ -408,7 +408,7 @@ hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip, * (create_tid of 0) at the moment. A create_tid of 0 has * a special meaning and cannot be specified in the cursor. */ - error = hammer_init_cursor(trans, &cursor, &ip->cache[0]); + error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL); if (error) { hammer_done_cursor(&cursor); return(error); diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 68e367d146..840cd360cb 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,14 +31,14 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.50 2008/05/02 16:41:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.51 2008/05/03 05:28:55 dillon Exp $ */ #include "hammer.h" static int hammer_mem_add(hammer_transaction_t trans, hammer_record_t record); static int hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip); -static int hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip); +static int hammer_mem_first(hammer_cursor_t cursor); /* * Red-black tree support. @@ -194,7 +194,6 @@ void hammer_flush_record_done(hammer_record_t record, int error) { hammer_inode_t target_ip; - int cleanup = 0; KKASSERT(record->flush_state == HAMMER_FST_FLUSH); KKASSERT(record->flags & HAMMER_RECF_INTERLOCK_BE); @@ -205,29 +204,9 @@ hammer_flush_record_done(hammer_record_t record, int error) * record to its media. Leave the record intact. */ Debugger("flush_record_done error"); - } else if (record->flags & HAMMER_RECF_CONVERT_DELETE) { - /* - * deleted-record to delete-on-disk conversion, occurs when - * we sync a record to disk which is marked deleted by the - * frontend, but not deleted from the point of view of the - * backend. - */ - if (record->flags & HAMMER_RECF_DELETED_BE) { - record->flags |= HAMMER_RECF_DELETED_FE; - cleanup = 1; - } else { - KKASSERT(record->type == HAMMER_MEM_RECORD_DEL); - } - } else { - /* - * Normal completion, record has been disposed of (by - * having been synchronized to the media). - */ - record->flags |= HAMMER_RECF_DELETED_FE; - record->flags |= HAMMER_RECF_DELETED_BE; - cleanup = 1; } - if (cleanup) { + + if (record->flags & HAMMER_RECF_DELETED_BE) { if ((target_ip = record->target_ip) != NULL) { TAILQ_REMOVE(&target_ip->target_list, record, target_entry); @@ -241,9 +220,7 @@ hammer_flush_record_done(hammer_record_t record, int error) else record->flush_state = HAMMER_FST_IDLE; } - record->flags &= ~HAMMER_RECF_INTERLOCK_BE; - record->flags &= ~HAMMER_RECF_CONVERT_DELETE; if (record->flags & HAMMER_RECF_WANTED) { record->flags &= ~HAMMER_RECF_WANTED; wakeup(record); @@ -312,8 +289,10 @@ hammer_ip_iterate_mem_good(hammer_cursor_t cursor, hammer_record_t record) if (cursor->flags & HAMMER_CURSOR_BACKEND) { if (record->flags & HAMMER_RECF_DELETED_BE) return(0); +#if 0 if ((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0) return(0); +#endif } else { if (record->flags & HAMMER_RECF_DELETED_FE) return(0); @@ -438,29 +417,18 @@ hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip) */ static int -hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip) +hammer_mem_first(hammer_cursor_t cursor) { + hammer_inode_t ip; + + ip = cursor->ip; + KKASSERT(ip != NULL); + if (cursor->iprec) { hammer_rel_mem_record(cursor->iprec); cursor->iprec = NULL; } - if (cursor->ip) { - KKASSERT(cursor->ip->cursor_ip_refs > 0); - --cursor->ip->cursor_ip_refs; -#if 0 - hammer_rec_rb_tree_scan_info_done(&cursor->scan, - &cursor->ip->rec_tree); -#endif - } - cursor->ip = ip; -#if 0 - hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); -#endif - ++ip->cursor_ip_refs; -#if 0 - cursor->scan.node = NULL; -#endif hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_scan_cmp, hammer_rec_scan_callback, cursor); @@ -468,27 +436,14 @@ hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip) * Adjust scan.node and keep it linked into the RB-tree so we can * hold the cursor through third party modifications of the RB-tree. */ - if (cursor->iprec) { -#if 0 - cursor->scan.node = hammer_rec_rb_tree_RB_NEXT(cursor->iprec); -#endif + if (cursor->iprec) return(0); - } return(ENOENT); } void hammer_mem_done(hammer_cursor_t cursor) { - if (cursor->ip) { - KKASSERT(cursor->ip->cursor_ip_refs > 0); - --cursor->ip->cursor_ip_refs; -#if 0 - hammer_rec_rb_tree_scan_info_done(&cursor->scan, - &cursor->ip->rec_tree); -#endif - cursor->ip = NULL; - } if (cursor->iprec) { hammer_rel_mem_record(cursor->iprec); cursor->iprec = NULL; @@ -694,10 +649,10 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record) * swath of space whether the data is truncated or not. */ int -hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, +hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip, int64_t offset, void *data, int bytes) { - struct hammer_cursor cursor; + hammer_transaction_t trans = cursor->trans; hammer_record_ondisk_t rec; union hammer_btree_elm elm; hammer_off_t rec_offset; @@ -706,29 +661,30 @@ hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, KKASSERT((offset & HAMMER_BUFMASK) == 0); KKASSERT(trans->type == HAMMER_TRANS_FLS); + KKASSERT(bytes != 0); retry: - error = hammer_init_cursor(trans, &cursor, &ip->cache[0]); - if (error) - return(error); - cursor.key_beg.obj_id = ip->obj_id; - cursor.key_beg.key = offset + bytes; - cursor.key_beg.create_tid = trans->tid; - cursor.key_beg.delete_tid = 0; - cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; - cursor.asof = trans->tid; - cursor.flags |= HAMMER_CURSOR_INSERT; - cursor.flags |= HAMMER_CURSOR_BACKEND; + hammer_normalize_cursor(cursor); + cursor->key_beg.obj_id = ip->obj_id; + cursor->key_beg.key = offset + bytes; + cursor->key_beg.create_tid = trans->tid; + cursor->key_beg.delete_tid = 0; + cursor->key_beg.rec_type = HAMMER_RECTYPE_DATA; + cursor->asof = trans->tid; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_INSERT; + cursor->flags |= HAMMER_CURSOR_BACKEND; /* * Issue a lookup to position the cursor. */ - error = hammer_btree_lookup(&cursor); + error = hammer_btree_lookup(cursor); if (error == 0) { kprintf("hammer_ip_sync_data: duplicate data at " "(%lld,%d) tid %016llx\n", offset, bytes, trans->tid); - hammer_print_btree_elm(&cursor.node->ondisk->elms[cursor.index], - HAMMER_BTREE_TYPE_LEAF, cursor.index); + hammer_print_btree_elm(&cursor->node->ondisk-> + elms[cursor->index], + HAMMER_BTREE_TYPE_LEAF, cursor->index); panic("Duplicate data"); error = EIO; } @@ -740,9 +696,9 @@ retry: * can cross buffer boundaries so we may have to split our bcopy. */ rec = hammer_alloc_record(trans, &rec_offset, HAMMER_RECTYPE_DATA, - &cursor.record_buffer, + &cursor->record_buffer, bytes, &bdata, - &cursor.data_buffer, &error); + &cursor->data_buffer, &error); if (rec == NULL) goto done; if (hammer_debug_general & 0x1000) @@ -755,7 +711,7 @@ retry: * buffers as modified. If we do it again we will generate * unnecessary undo elements. */ - hammer_modify_buffer(trans, cursor.record_buffer, NULL, 0); + hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0); rec->base.base.btype = HAMMER_BTREE_TYPE_RECORD; rec->base.base.obj_id = ip->obj_id; rec->base.base.key = offset + bytes; @@ -763,12 +719,12 @@ retry: rec->base.base.delete_tid = 0; rec->base.base.rec_type = HAMMER_RECTYPE_DATA; rec->base.data_crc = crc32(data, bytes); - hammer_modify_buffer_done(cursor.record_buffer); + hammer_modify_buffer_done(cursor->record_buffer); KKASSERT(rec->base.data_len == bytes); - hammer_modify_buffer(trans, cursor.data_buffer, NULL, 0); + hammer_modify_buffer(trans, cursor->data_buffer, NULL, 0); bcopy(data, bdata, bytes); - hammer_modify_buffer_done(cursor.data_buffer); + hammer_modify_buffer_done(cursor->data_buffer); elm.leaf.base = rec->base.base; elm.leaf.rec_offset = rec_offset; @@ -783,15 +739,18 @@ retry: */ ip->flags |= HAMMER_INODE_DONDISK; - error = hammer_btree_insert(&cursor, &elm); + error = hammer_btree_insert(cursor, &elm); if (error == 0) goto done; hammer_blockmap_free(trans, rec_offset, HAMMER_RECORD_SIZE); done: - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; + if (error == EDEADLK) { + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, &ip->cache[0], ip); + if (error == 0) + goto retry; + } return(error); } @@ -801,12 +760,34 @@ done: * * This routine can only be called by the backend and the record * must have been interlocked with BE. It will remain interlocked on - * return. The caller is responsible for the record's disposition. + * return. If no error occurs the record will be marked deleted but + * the caller is responsible for its final disposition. + * + * Multiple calls may be aggregated with the same cursor using + * hammer_ip_sync_record_cursor(). The caller must handle EDEADLK + * in that case. */ int hammer_ip_sync_record(hammer_transaction_t trans, hammer_record_t record) { struct hammer_cursor cursor; + int error; + + do { + error = hammer_init_cursor(trans, &cursor, + &record->ip->cache[0], record->ip); + if (error) + return(error); + error = hammer_ip_sync_record_cursor(&cursor, record); + hammer_done_cursor(&cursor); + } while (error == EDEADLK); + return (error); +} + +int +hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) +{ + hammer_transaction_t trans = cursor->trans; hammer_record_ondisk_t rec; union hammer_btree_elm elm; hammer_off_t rec_offset; @@ -816,23 +797,24 @@ hammer_ip_sync_record(hammer_transaction_t trans, hammer_record_t record) KKASSERT(record->flush_state == HAMMER_FST_FLUSH); KKASSERT(record->flags & HAMMER_RECF_INTERLOCK_BE); -retry: - /* - * Get a cursor, we will either be inserting or deleting. - */ - error = hammer_init_cursor(trans, &cursor, &record->ip->cache[0]); - if (error) - return(error); - cursor.key_beg = record->rec.base.base; - cursor.flags |= HAMMER_CURSOR_BACKEND; + hammer_normalize_cursor(cursor); + cursor->key_beg = record->rec.base.base; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_BACKEND; + cursor->flags &= ~HAMMER_CURSOR_INSERT; /* * If we are deleting an exact match must be found on-disk. */ if (record->type == HAMMER_MEM_RECORD_DEL) { - error = hammer_btree_lookup(&cursor); - if (error == 0) - error = hammer_ip_delete_record(&cursor, trans->tid); + error = hammer_btree_lookup(cursor); + if (error == 0) { + error = hammer_ip_delete_record(cursor, trans->tid); + if (error == 0) { + record->flags |= HAMMER_RECF_DELETED_FE; + record->flags |= HAMMER_RECF_DELETED_BE; + } + } goto done; } @@ -844,10 +826,10 @@ retry: * we may have to iterate the low 32 bits of the key to find an unused * key. */ - cursor.flags |= HAMMER_CURSOR_INSERT; + cursor->flags |= HAMMER_CURSOR_INSERT; for (;;) { - error = hammer_btree_lookup(&cursor); + error = hammer_btree_lookup(cursor); if (error) break; if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) { @@ -861,7 +843,7 @@ retry: ++trans->hmp->namekey_iterator; record->rec.base.base.key &= ~(0xFFFFFFFFLL); record->rec.base.base.key |= trans->hmp->namekey_iterator; - cursor.key_beg.key = record->rec.base.base.key; + cursor->key_beg.key = record->rec.base.base.key; } if (error != ENOENT) goto done; @@ -876,7 +858,7 @@ retry: if (record->data == NULL) { rec = hammer_alloc_record(trans, &rec_offset, record->rec.base.base.rec_type, - &cursor.record_buffer, + &cursor->record_buffer, 0, &bdata, NULL, &error); if (hammer_debug_general & 0x1000) @@ -884,7 +866,7 @@ retry: } else if (record->flags & HAMMER_RECF_INBAND) { rec = hammer_alloc_record(trans, &rec_offset, record->rec.base.base.rec_type, - &cursor.record_buffer, + &cursor->record_buffer, record->rec.base.data_len, &bdata, NULL, &error); if (hammer_debug_general & 0x1000) @@ -892,9 +874,9 @@ retry: } else { rec = hammer_alloc_record(trans, &rec_offset, record->rec.base.base.rec_type, - &cursor.record_buffer, + &cursor->record_buffer, record->rec.base.data_len, &bdata, - &cursor.data_buffer, &error); + &cursor->data_buffer, &error); if (hammer_debug_general & 0x1000) kprintf("OOB RECORD DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len); } @@ -905,7 +887,7 @@ retry: /* * Fill in the remaining fields and insert our B-Tree node. */ - hammer_modify_buffer(trans, cursor.record_buffer, NULL, 0); + hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0); rec->base.base = record->rec.base.base; bcopy(&record->rec.base + 1, &rec->base + 1, HAMMER_RECORD_SIZE - sizeof(record->rec.base)); @@ -918,13 +900,13 @@ retry: bcopy(record->data, bdata, rec->base.data_len); } else if (record->data) { rec->base.data_crc = crc32(record->data, rec->base.data_len); - hammer_modify_buffer(trans, cursor.data_buffer, NULL, 0); + hammer_modify_buffer(trans, cursor->data_buffer, NULL, 0); bcopy(record->data, bdata, rec->base.data_len); - hammer_modify_buffer_done(cursor.data_buffer); + hammer_modify_buffer_done(cursor->data_buffer); } else { rec->base.data_len = record->rec.base.data_len; } - hammer_modify_buffer_done(cursor.record_buffer); + hammer_modify_buffer_done(cursor->record_buffer); elm.leaf.base = record->rec.base.base; elm.leaf.rec_offset = rec_offset; @@ -932,7 +914,7 @@ retry: elm.leaf.data_len = rec->base.data_len; elm.leaf.data_crc = rec->base.data_crc; - error = hammer_btree_insert(&cursor, &elm); + error = hammer_btree_insert(cursor, &elm); /* * This occurs when the frontend creates a record and queues it to @@ -945,26 +927,26 @@ retry: * The DEL record then masks the record synced to disk until another * round can delete it for real. */ - if (error == 0 && (record->flags & HAMMER_RECF_CONVERT_DELETE)) { - KKASSERT(record->type == HAMMER_MEM_RECORD_ADD); - record->flags &= ~HAMMER_RECF_DELETED_FE; - record->type = HAMMER_MEM_RECORD_DEL; - if (record->flush_state == HAMMER_FST_SETUP) { - hammer_test_inode(record->ip); - hammer_test_inode(record->target_ip); + if (error == 0) { + if (record->flags & HAMMER_RECF_CONVERT_DELETE) { + KKASSERT(record->type == HAMMER_MEM_RECORD_ADD); + record->flags &= ~HAMMER_RECF_DELETED_FE; + record->type = HAMMER_MEM_RECORD_DEL; + if (record->flush_state == HAMMER_FST_SETUP) { + hammer_test_inode(record->ip); + hammer_test_inode(record->target_ip); + } + record->flags &= ~HAMMER_RECF_CONVERT_DELETE; + } else { + record->flags |= HAMMER_RECF_DELETED_FE; + record->flags |= HAMMER_RECF_DELETED_BE; } - } - - /* - * If the error occured unwind the operation. - */ - if (error) + } else { hammer_blockmap_free(trans, rec_offset, HAMMER_RECORD_SIZE); + /* XXX free data buffer? */ + } done: - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; return(error); } @@ -1097,10 +1079,13 @@ hammer_ip_lookup(hammer_cursor_t cursor, struct hammer_inode *ip) * the cursor and try again. */ int -hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) +hammer_ip_first(hammer_cursor_t cursor) { + hammer_inode_t ip = cursor->ip; int error; + KKASSERT(ip != NULL); + /* * Clean up fields and setup for merged scan */ @@ -1147,7 +1132,7 @@ hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) * Search the in-memory record list (Red-Black tree). Unlike the * B-Tree search, mem_first checks for records in the range. */ - error = hammer_mem_first(cursor, ip); + error = hammer_mem_first(cursor); if (error && error != ENOENT) return(error); if (error == 0) { @@ -1231,16 +1216,24 @@ next_memory: if (cursor->iprec) { KKASSERT(cursor->iprec == rec); cursor->flags &= ~HAMMER_CURSOR_ATEMEM; -#if 0 - cursor->scan.node = - hammer_rec_rb_tree_RB_NEXT(rec); -#endif } else { cursor->flags |= HAMMER_CURSOR_MEMEOF; } } } + /* + * The memory record may have become stale while being held in + * cursor->iprec. We are interlocked against the backend on + * with regards to B-Tree entries. + */ + if ((cursor->flags & HAMMER_CURSOR_ATEMEM) == 0) { + if (hammer_ip_iterate_mem_good(cursor, cursor->iprec) == 0) { + cursor->flags |= HAMMER_CURSOR_ATEMEM; + goto next_memory; + } + } + /* * Extract either the disk or memory record depending on their * relative position. @@ -1249,7 +1242,8 @@ next_memory: switch(cursor->flags & (HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM)) { case 0: /* - * Both entries valid + * Both entries valid. Return the btree entry if it is + * in front of the memory entry. */ elm = &cursor->node->ondisk->elms[cursor->index]; r = hammer_btree_cmp(&elm->base, &cursor->iprec->rec.base.base); @@ -1278,7 +1272,7 @@ next_memory: goto next_btree; } } else { - KKASSERT(hammer_ip_iterate_mem_good(cursor, cursor->iprec) == 0); + panic("hammer_ip_next: duplicate mem/b-tree entry"); cursor->flags |= HAMMER_CURSOR_ATEMEM; goto next_memory; } @@ -1286,15 +1280,19 @@ next_memory: /* fall through to the memory entry */ case HAMMER_CURSOR_ATEDISK: /* - * Only the memory entry is valid. If the record is - * placemarking an on-disk deletion, we skip it unless - * the caller wants special record visibility. + * Only the memory entry is valid. */ cursor->record = &cursor->iprec->rec; cursor->flags |= HAMMER_CURSOR_ATEMEM; + + /* + * If the memory entry is an on-disk deletion we should have + * also had found a B-Tree record. If the backend beat us + * to it it would have interlocked the cursor and we should + * have seen the in-memory record marked DELETED_FE. + */ if (cursor->iprec->type == HAMMER_MEM_RECORD_DEL) { - if ((cursor->flags & HAMMER_CURSOR_DELETE_VISIBILITY) == 0) - goto next_memory; + panic("hammer_ip_next: del-on-disk with no b-tree entry"); } break; case HAMMER_CURSOR_ATEMEM: @@ -1362,10 +1360,10 @@ hammer_ip_resolve_record_and_data(hammer_cursor_t cursor) * they indicate the end of the range (key = base + bytes). */ int -hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, +hammer_ip_delete_range(hammer_cursor_t cursor, hammer_inode_t ip, int64_t ran_beg, int64_t ran_end) { - struct hammer_cursor cursor; + hammer_transaction_t trans = cursor->trans; hammer_record_ondisk_t rec; hammer_base_elm_t base; int error; @@ -1377,23 +1375,23 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, KKASSERT(trans->type == HAMMER_TRANS_FLS); retry: - hammer_init_cursor(trans, &cursor, &ip->cache[0]); - - cursor.key_beg.obj_id = ip->obj_id; - cursor.key_beg.create_tid = 0; - cursor.key_beg.delete_tid = 0; - cursor.key_beg.obj_type = 0; - cursor.asof = ip->obj_asof; - cursor.flags |= HAMMER_CURSOR_ASOF; - cursor.flags |= HAMMER_CURSOR_DELETE_VISIBILITY; - cursor.flags |= HAMMER_CURSOR_BACKEND; - - cursor.key_end = cursor.key_beg; + hammer_normalize_cursor(cursor); + cursor->key_beg.obj_id = ip->obj_id; + cursor->key_beg.create_tid = 0; + cursor->key_beg.delete_tid = 0; + cursor->key_beg.obj_type = 0; + cursor->asof = ip->obj_asof; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_ASOF; + cursor->flags |= HAMMER_CURSOR_DELETE_VISIBILITY; + cursor->flags |= HAMMER_CURSOR_BACKEND; + + cursor->key_end = cursor->key_beg; if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { - cursor.key_beg.key = ran_beg; - cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; - cursor.key_end.rec_type = HAMMER_RECTYPE_DB; - cursor.key_end.key = ran_end; + cursor->key_beg.key = ran_beg; + cursor->key_beg.rec_type = HAMMER_RECTYPE_DB; + cursor->key_end.rec_type = HAMMER_RECTYPE_DB; + cursor->key_end.key = ran_end; } else { /* * The key in the B-Tree is (base+bytes), so the first possible @@ -1401,25 +1399,25 @@ retry: */ int64_t tmp64; - cursor.key_beg.key = ran_beg + 1; - cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; - cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; + cursor->key_beg.key = ran_beg + 1; + cursor->key_beg.rec_type = HAMMER_RECTYPE_DATA; + cursor->key_end.rec_type = HAMMER_RECTYPE_DATA; tmp64 = ran_end + MAXPHYS + 1; /* work around GCC-4 bug */ if (tmp64 < ran_end) - cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; + cursor->key_end.key = 0x7FFFFFFFFFFFFFFFLL; else - cursor.key_end.key = ran_end + MAXPHYS + 1; + cursor->key_end.key = ran_end + MAXPHYS + 1; } - cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; + cursor->flags |= HAMMER_CURSOR_END_INCLUSIVE; - error = hammer_ip_first(&cursor, ip); + error = hammer_ip_first(cursor); /* * Iterate through matching records and mark them as deleted. */ while (error == 0) { - rec = cursor.record; + rec = cursor->record; base = &rec->base.base; KKASSERT(base->delete_tid == 0); @@ -1475,14 +1473,17 @@ retry: * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() * uses to perform a fixup. */ - error = hammer_ip_delete_record(&cursor, trans->tid); + error = hammer_ip_delete_record(cursor, trans->tid); if (error) break; - error = hammer_ip_next(&cursor); + error = hammer_ip_next(cursor); + } + if (error == EDEADLK) { + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, &ip->cache[0], ip); + if (error == 0) + goto retry; } - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; if (error == ENOENT) error = 0; return(error); @@ -1494,41 +1495,41 @@ retry: * of or nlinks would get upset). */ int -hammer_ip_delete_range_all(hammer_transaction_t trans, hammer_inode_t ip, +hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip, int *countp) { - struct hammer_cursor cursor; + hammer_transaction_t trans = cursor->trans; hammer_record_ondisk_t rec; hammer_base_elm_t base; int error; KKASSERT(trans->type == HAMMER_TRANS_FLS); retry: - hammer_init_cursor(trans, &cursor, &ip->cache[0]); - - cursor.key_beg.obj_id = ip->obj_id; - cursor.key_beg.create_tid = 0; - cursor.key_beg.delete_tid = 0; - cursor.key_beg.obj_type = 0; - cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; - cursor.key_beg.key = HAMMER_MIN_KEY; - - cursor.key_end = cursor.key_beg; - cursor.key_end.rec_type = 0xFFFF; - cursor.key_end.key = HAMMER_MAX_KEY; - - cursor.asof = ip->obj_asof; - cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; - cursor.flags |= HAMMER_CURSOR_DELETE_VISIBILITY; - cursor.flags |= HAMMER_CURSOR_BACKEND; - - error = hammer_ip_first(&cursor, ip); + hammer_normalize_cursor(cursor); + cursor->key_beg.obj_id = ip->obj_id; + cursor->key_beg.create_tid = 0; + cursor->key_beg.delete_tid = 0; + cursor->key_beg.obj_type = 0; + cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; + cursor->key_beg.key = HAMMER_MIN_KEY; + + cursor->key_end = cursor->key_beg; + cursor->key_end.rec_type = 0xFFFF; + cursor->key_end.key = HAMMER_MAX_KEY; + + cursor->asof = ip->obj_asof; + cursor->flags &= ~HAMMER_CURSOR_INITMASK; + cursor->flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; + cursor->flags |= HAMMER_CURSOR_DELETE_VISIBILITY; + cursor->flags |= HAMMER_CURSOR_BACKEND; + + error = hammer_ip_first(cursor); /* * Iterate through matching records and mark them as deleted. */ while (error == 0) { - rec = cursor.record; + rec = cursor->record; base = &rec->base.base; KKASSERT(base->delete_tid == 0); @@ -1544,16 +1545,19 @@ retry: * must be synced and cannot be deleted. */ if (rec->base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) { - error = hammer_ip_delete_record(&cursor, trans->tid); + error = hammer_ip_delete_record(cursor, trans->tid); ++*countp; } if (error) break; - error = hammer_ip_next(&cursor); + error = hammer_ip_next(cursor); + } + if (error == EDEADLK) { + hammer_done_cursor(cursor); + error = hammer_init_cursor(trans, cursor, &ip->cache[0], ip); + if (error == 0) + goto retry; } - hammer_done_cursor(&cursor); - if (error == EDEADLK) - goto retry; if (error == ENOENT) error = 0; return(error); @@ -1719,7 +1723,7 @@ hammer_ip_check_directory_empty(hammer_transaction_t trans, if (ip->flush_state != HAMMER_FST_IDLE) { kprintf("FWAIT\n"); hammer_done_cursor(parent_cursor); - hammer_flush_inode(ip, HAMMER_FLUSH_FORCE|HAMMER_FLUSH_SIGNAL); + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); hammer_wait_inode(ip); return (EDEADLK); } @@ -1728,7 +1732,7 @@ hammer_ip_check_directory_empty(hammer_transaction_t trans, /* * Check directory empty */ - hammer_init_cursor(trans, &cursor, &ip->cache[0]); + hammer_init_cursor(trans, &cursor, &ip->cache[0], ip); cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.create_tid = 0; @@ -1744,7 +1748,7 @@ hammer_ip_check_directory_empty(hammer_transaction_t trans, cursor.asof = ip->obj_asof; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; - error = hammer_ip_first(&cursor, ip); + error = hammer_ip_first(&cursor); if (error == ENOENT) error = 0; else if (error == 0) diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index c3ef71a870..bfdeba1381 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.8 2008/04/27 00:45:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.9 2008/05/03 05:28:55 dillon Exp $ */ /* * HAMMER reblocker - This code frees up fragmented physical space @@ -69,7 +69,7 @@ hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, return(EINVAL); retry: - error = hammer_init_cursor(trans, &cursor, NULL); + error = hammer_init_cursor(trans, &cursor, NULL, NULL); if (error) { hammer_done_cursor(&cursor); return(error); diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 81e5fd11b1..e377b144a4 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.44 2008/05/02 06:51:57 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.45 2008/05/03 05:28:55 dillon Exp $ */ #include @@ -372,7 +372,6 @@ hammer_vop_write(struct vop_write_args *ap) break; } /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ - hammer_lock_sh(&ip->lock); if (ip->ino_rec.ino_size < uio->uio_offset) { ip->ino_rec.ino_size = uio->uio_offset; flags = HAMMER_INODE_RDIRTY; @@ -383,7 +382,6 @@ hammer_vop_write(struct vop_write_args *ap) ip->ino_rec.ino_mtime = trans.time; flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS; hammer_modify_inode(&trans, ip, flags); - hammer_unlock(&ip->lock); if (ap->a_ioflag & IO_SYNC) { bwrite(bp); @@ -495,8 +493,6 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap) *ap->a_vpp = NULL; return (error); } - hammer_lock_sh(&nip->lock); - hammer_lock_sh(&dip->lock); /* * Add the new filesystem object to the directory. This will also @@ -505,8 +501,6 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap) error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); if (error) kprintf("hammer_ip_add_directory error %d\n", error); - hammer_unlock(&dip->lock); - hammer_unlock(&nip->lock); /* * Finish up. @@ -662,7 +656,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) */ namekey = hammer_directory_namekey(ncp->nc_name, nlen); - error = hammer_init_cursor(&trans, &cursor, &dip->cache[0]); + error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip); cursor.key_beg.obj_id = dip->obj_id; cursor.key_beg.key = namekey; cursor.key_beg.create_tid = 0; @@ -682,23 +676,23 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - if (error == 0) - error = hammer_ip_first(&cursor, dip); - - rec = NULL; obj_id = 0; - while (error == 0) { - error = hammer_ip_resolve_data(&cursor); - if (error) - break; - rec = cursor.record; - if (nlen == rec->entry.base.data_len && - bcmp(ncp->nc_name, cursor.data, nlen) == 0) { - obj_id = rec->entry.obj_id; - break; + if (error == 0) { + rec = NULL; + error = hammer_ip_first(&cursor); + while (error == 0) { + error = hammer_ip_resolve_data(&cursor); + if (error) + break; + rec = cursor.record; + if (nlen == rec->entry.base.data_len && + bcmp(ncp->nc_name, cursor.data, nlen) == 0) { + obj_id = rec->entry.obj_id; + break; + } + error = hammer_ip_next(&cursor); } - error = hammer_ip_next(&cursor); } hammer_done_cursor(&cursor); if (error == 0) { @@ -708,6 +702,11 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp); hammer_rel_inode(ip, 0); } else { + kprintf("nresolve: lookup %s failed dip %p (%016llx) on" + " inode %016llx error %d\n", + ncp->nc_name, + dip, dip->obj_id, obj_id, error); + Debugger("x"); vp = NULL; } if (error == 0) { @@ -814,11 +813,7 @@ hammer_vop_nlink(struct vop_nlink_args *ap) * dip nor ip are referenced or locked, but their vnodes are * referenced. This function will bump the inode's link count. */ - hammer_lock_sh(&ip->lock); - hammer_lock_sh(&dip->lock); error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip); - hammer_unlock(&dip->lock); - hammer_unlock(&ip->lock); /* * Finish up. @@ -873,11 +868,7 @@ hammer_vop_nmkdir(struct vop_nmkdir_args *ap) * Add the new filesystem object to the directory. This will also * bump the inode's link count. */ - hammer_lock_sh(&nip->lock); - hammer_lock_sh(&dip->lock); error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); - hammer_unlock(&dip->lock); - hammer_unlock(&nip->lock); if (error) kprintf("hammer_mkdir (add) error %d\n", error); @@ -941,11 +932,7 @@ hammer_vop_nmknod(struct vop_nmknod_args *ap) * Add the new filesystem object to the directory. This will also * bump the inode's link count. */ - hammer_lock_sh(&nip->lock); - hammer_lock_sh(&dip->lock); error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); - hammer_unlock(&dip->lock); - hammer_unlock(&nip->lock); /* * Finish up. @@ -1074,7 +1061,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap) * Key range (begin and end inclusive) to scan. Directory keys * directly translate to a 64 bit 'seek' position. */ - hammer_init_cursor(&trans, &cursor, &ip->cache[0]); + hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip); cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.create_tid = 0; cursor.key_beg.delete_tid = 0; @@ -1087,7 +1074,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap) cursor.asof = ip->obj_asof; cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF; - error = hammer_ip_first(&cursor, ip); + error = hammer_ip_first(&cursor); while (error == 0) { error = hammer_ip_resolve_record_and_data(&cursor); @@ -1158,7 +1145,7 @@ hammer_vop_readlink(struct vop_readlink_args *ap) hammer_simple_transaction(&trans, ip->hmp); - hammer_init_cursor(&trans, &cursor, &ip->cache[0]); + hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip); /* * Key range (begin and end inclusive) to scan. Directory keys @@ -1238,15 +1225,6 @@ hammer_vop_nrename(struct vop_nrename_args *ap) hammer_start_transaction(&trans, fdip->hmp); - hammer_lock_sh(&ip->lock); - if (fdip->obj_id < tdip->obj_id) { - hammer_lock_sh(&fdip->lock); - hammer_lock_sh(&tdip->lock); - } else { - hammer_lock_sh(&tdip->lock); - hammer_lock_sh(&fdip->lock); - } - /* * Remove tncp from the target directory and then link ip as * tncp. XXX pass trans to dounlink @@ -1276,7 +1254,7 @@ hammer_vop_nrename(struct vop_nrename_args *ap) */ namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen); retry: - hammer_init_cursor(&trans, &cursor, &fdip->cache[0]); + hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip); cursor.key_beg.obj_id = fdip->obj_id; cursor.key_beg.key = namekey; cursor.key_beg.create_tid = 0; @@ -1296,7 +1274,7 @@ retry: * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - error = hammer_ip_first(&cursor, fdip); + error = hammer_ip_first(&cursor); while (error == 0) { if (hammer_ip_resolve_data(&cursor) != 0) break; @@ -1323,18 +1301,7 @@ retry: * of crash recovery. */ if (error == EDEADLK) { - hammer_unlock(&ip->lock); - hammer_unlock(&fdip->lock); - hammer_unlock(&tdip->lock); hammer_done_cursor(&cursor); - hammer_lock_sh(&ip->lock); - if (fdip->obj_id < tdip->obj_id) { - hammer_lock_sh(&fdip->lock); - hammer_lock_sh(&tdip->lock); - } else { - hammer_lock_sh(&tdip->lock); - hammer_lock_sh(&fdip->lock); - } goto retry; } @@ -1346,9 +1313,6 @@ retry: cache_rename(ap->a_fnch, ap->a_tnch); failed: - hammer_unlock(&ip->lock); - hammer_unlock(&fdip->lock); - hammer_unlock(&tdip->lock); hammer_done_transaction(&trans); return (error); } @@ -1397,7 +1361,6 @@ hammer_vop_setattr(struct vop_setattr_args *ap) return (EROFS); hammer_start_transaction(&trans, ip->hmp); - hammer_lock_sh(&ip->lock); error = 0; if (vap->va_flags != VNOVAL) { @@ -1445,7 +1408,6 @@ hammer_vop_setattr(struct vop_setattr_args *ap) * if we do not release the lock. Probably not a * big deal here. */ - hammer_unlock(&ip->lock); if (vap->va_size < ip->ino_rec.ino_size) { vtruncbuf(ap->a_vp, vap->va_size, HAMMER_BUFSIZE); @@ -1454,7 +1416,6 @@ hammer_vop_setattr(struct vop_setattr_args *ap) vnode_pager_setsize(ap->a_vp, vap->va_size); truncating = 0; } - hammer_lock_sh(&ip->lock); ip->ino_rec.ino_size = vap->va_size; modflags |= HAMMER_INODE_RDIRTY; aligned_size = (vap->va_size + HAMMER_BUFMASK) & @@ -1530,7 +1491,6 @@ hammer_vop_setattr(struct vop_setattr_args *ap) done: if (error == 0) hammer_modify_inode(&trans, ip, modflags); - hammer_unlock(&ip->lock); hammer_done_transaction(&trans); return (error); } @@ -1575,13 +1535,6 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap) return (error); } - /* - * Add the new filesystem object to the directory. This will also - * bump the inode's link count. - */ - hammer_lock_sh(&nip->lock); - hammer_lock_sh(&dip->lock); - /* * Add a record representing the symlink. symlink stores the link * as pure data, not a string, and is no \0 terminated. @@ -1607,8 +1560,6 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap) } if (error == 0) error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); - hammer_unlock(&dip->lock); - hammer_unlock(&nip->lock); /* * Finish up. @@ -1750,7 +1701,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) ip = ap->a_vp->v_data; hammer_simple_transaction(&trans, ip->hmp); - hammer_init_cursor(&trans, &cursor, &ip->cache[0]); + hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip); /* * Key range (begin and end inclusive) to scan. Note that the key's @@ -1786,7 +1737,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) } cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; - error = hammer_ip_first(&cursor, ip); + error = hammer_ip_first(&cursor); boff = 0; while (error == 0) { @@ -1906,7 +1857,20 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act); ++hammer_bio_count; hammer_modify_inode(NULL, ip, HAMMER_INODE_BUFS); - hammer_flush_inode(ip, HAMMER_FLUSH_FORCE|HAMMER_FLUSH_SIGNAL); + + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); +#if 0 + /* + * XXX + * + * If the write was not part of an integrated flush operation then + * signal a flush. + */ + if (ip->flush_state != HAMMER_FST_FLUSH || + (ip->flags & HAMMER_INODE_WRITE_ALT)) { + hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); + } +#endif return(0); } @@ -1921,7 +1885,7 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) * original data. Then add a record to represent the buffer. */ int -hammer_dowrite(hammer_transaction_t trans, hammer_inode_t ip, struct bio *bio) +hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio) { struct buf *bp = bio->bio_buf; int error; @@ -1942,10 +1906,10 @@ hammer_dowrite(hammer_transaction_t trans, hammer_inode_t ip, struct bio *bio) * (eventually) properly truncate partial overlaps. */ if (ip->sync_ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { - error = hammer_ip_delete_range(trans, ip, bio->bio_offset, + error = hammer_ip_delete_range(cursor, ip, bio->bio_offset, bio->bio_offset); } else { - error = hammer_ip_delete_range(trans, ip, bio->bio_offset, + error = hammer_ip_delete_range(cursor, ip, bio->bio_offset, bio->bio_offset + bp->b_bufsize - 1); } @@ -1970,10 +1934,10 @@ hammer_dowrite(hammer_transaction_t trans, hammer_inode_t ip, struct bio *bio) KKASSERT(limit_size >= 0); limit_size = (limit_size + 63) & ~63; } - - error = hammer_ip_sync_data(trans, ip, bio->bio_offset, - bp->b_data, limit_size); - + if (limit_size) { + error = hammer_ip_sync_data(cursor, ip, bio->bio_offset, + bp->b_data, limit_size); + } } if (error) Debugger("hammer_dowrite: error"); @@ -2022,7 +1986,7 @@ hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch, namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen); retry: - hammer_init_cursor(trans, &cursor, &dip->cache[0]); + hammer_init_cursor(trans, &cursor, &dip->cache[0], dip); cursor.key_beg.obj_id = dip->obj_id; cursor.key_beg.key = namekey; cursor.key_beg.create_tid = 0; @@ -2044,7 +2008,9 @@ retry: * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - error = hammer_ip_first(&cursor, dip); + rec = NULL; + error = hammer_ip_first(&cursor); + while (error == 0) { error = hammer_ip_resolve_data(&cursor); if (error) @@ -2093,12 +2059,8 @@ retry: * hammer_done_cursor() twice. */ if (error == 0) { - hammer_lock_sh(&ip->lock); - hammer_lock_sh(&dip->lock); error = hammer_ip_del_directory(trans, &cursor, dip, ip); - hammer_unlock(&dip->lock); - hammer_unlock(&ip->lock); } if (error == 0) { cache_setunresolved(nch); -- 2.41.0