From c0ade6907483ef11800d36a1bdd853e0c47d85de Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 26 Nov 2007 05:03:11 +0000 Subject: [PATCH] HAMMER 6/many - memory->disk flush, single-cluster sync to disk, more vnops. Get most operations within a single 64M cluster working. There are still numerous issues with the B-Tree algorithms so readbacks generate bad data at the moment. --- sys/vfs/hammer/hammer.h | 13 +- sys/vfs/hammer/hammer_btree.c | 159 ++++++++++++++++++++-- sys/vfs/hammer/hammer_disk.h | 4 +- sys/vfs/hammer/hammer_inode.c | 211 ++++++++++++++++++++++++++++- sys/vfs/hammer/hammer_io.c | 4 +- sys/vfs/hammer/hammer_object.c | 234 +++++++++++++++++++++++++++++---- sys/vfs/hammer/hammer_ondisk.c | 79 ++++++++++- sys/vfs/hammer/hammer_subs.c | 8 +- sys/vfs/hammer/hammer_vfsops.c | 8 +- sys/vfs/hammer/hammer_vnops.c | 100 +++++++++++--- 10 files changed, 749 insertions(+), 71 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index eef3940ca5..4c6fef8673 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.8 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.9 2007/11/26 05:03:11 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -160,6 +160,8 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_INODE_ITIMES 0x0004 /* in-memory mtime/atime modified */ #define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */ #define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */ +#define HAMMER_INODE_TID 0x0040 /* update in-memory last_tid */ +#define HAMMER_INODE_DELETED 0x0080 /* inode ready for deletion */ #define HAMMER_MAX_INODE_CURSORS 4 @@ -187,6 +189,7 @@ typedef struct hammer_record *hammer_record_t; #define HAMMER_RECF_ALLOCDATA 0x0001 #define HAMMER_RECF_ONRBTREE 0x0002 #define HAMMER_RECF_DELETED 0x0004 +#define HAMMER_RECF_EMBEDDED_DATA 0x0008 /* * Structures used to internally represent a volume and a cluster @@ -399,6 +402,7 @@ int hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp); struct hammer_inode *hammer_get_inode(hammer_mount_t hmp, u_int64_t obj_id, int *errorp); +int hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip); void hammer_put_inode(struct hammer_inode *ip); void hammer_put_inode_ref(struct hammer_inode *ip); @@ -454,6 +458,9 @@ int hammer_btree_iterate(hammer_cursor_t cursor); int hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm); int hammer_btree_delete(hammer_cursor_t cursor); int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2); +int hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2); +void hammer_print_btree_node(hammer_node_ondisk_t ondisk); +void hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i); void *hammer_bread(struct hammer_cluster *cluster, int32_t cloff, u_int64_t buf_type, int *errorp, @@ -528,6 +535,7 @@ int hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap, struct ucred *cred, struct hammer_inode *dip, struct hammer_inode **ipp); void hammer_rel_inode(hammer_inode_t ip, int flush); +int hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete); int hammer_ip_add_directory(struct hammer_transaction *trans, hammer_inode_t dip, struct namecache *ncp, @@ -537,9 +545,10 @@ int hammer_ip_del_directory(struct hammer_transaction *trans, hammer_inode_t ip); int hammer_ip_delete_range(struct hammer_transaction *trans, hammer_inode_t ip, int64_t ran_beg, int64_t ran_end); -int hammer_ip_add_data(struct hammer_transaction *trans, +int hammer_ip_sync_data(struct hammer_transaction *trans, hammer_inode_t ip, int64_t offset, void *data, int bytes); +int hammer_ip_sync_record(hammer_record_t rec); int hammer_io_read(struct vnode *devvp, struct hammer_io *io); int hammer_io_new(struct vnode *devvp, struct hammer_io *io); diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index d3d2d987ee..f2c9fcf793 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.6 2007/11/26 05:03:11 dillon Exp $ */ /* @@ -129,8 +129,10 @@ hammer_btree_iterate(hammer_cursor_t cursor) node = cursor->node->ondisk; if (node == NULL) return(ENOENT); - if (cursor->index < node->count) + if (cursor->index < node->count && + (cursor->flags & HAMMER_CURSOR_ATEDISK)) { ++cursor->index; + } /* * Loop until an element is found or we are done. @@ -228,7 +230,7 @@ hammer_btree_iterate(hammer_cursor_t cursor) * old or too new but does not terminate the search. */ elm = &node->elms[cursor->index]; - r = hammer_btree_cmp(&cursor->key_end, &elm->base); + r = hammer_btree_range_cmp(cursor, &elm->base); if (r == -1 || r == 1) { ++cursor->index; continue; @@ -281,6 +283,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) hammer_node_ondisk_t node; hammer_btree_elm_t elm; hammer_cluster_t cluster; + u_int64_t buf_type; int32_t cloff; int error; @@ -308,11 +311,23 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) { if ((cloff ^ elm->leaf.data_offset) & ~HAMMER_BUFMASK) { /* - * Data in different buffer than record + * The data is not in the same buffer as the last + * record we cached, but it could still be embedded + * in a record. Note that we may not have loaded the + * record's buffer above, depending on flags. */ + if ((elm->leaf.rec_offset ^ elm->leaf.data_offset) & + ~HAMMER_BUFMASK) { + if (elm->leaf.data_len & HAMMER_BUFMASK) + buf_type = HAMMER_FSBUF_DATA; + else + buf_type = 0; /* pure data buffer */ + } else { + buf_type = HAMMER_FSBUF_RECORDS; + } cursor->data = hammer_bread(cluster, elm->leaf.data_offset, - HAMMER_FSBUF_DATA, &error, + buf_type, &error, &cursor->data_buffer); } else { /* @@ -321,6 +336,8 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) * though we don't use it in this case, in case * other records extracted during an iteration * go back to it. + * + * Just assume the buffer type is correct. */ cursor->data = (void *) ((char *)cursor->record_buffer->ondisk + @@ -387,7 +404,12 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) ++node->count; hammer_modify_node(cursor->node); - if ((parent = cursor->parent->ondisk) != NULL) { + /* + * Adjust the sub-tree count in the parent. note that the parent + * may be in a different cluster. + */ + if (cursor->parent) { + parent = cursor->parent->ondisk; i = cursor->parent_index; ++parent->elms[i].internal.subtree_count; KKASSERT(parent->elms[i].internal.subtree_count <= node->count); @@ -560,6 +582,8 @@ btree_search(hammer_cursor_t cursor, int flags) */ KKASSERT(cursor->node != NULL && cursor->node->cluster == cluster); +/* hammer_print_btree_node(cursor->node->ondisk);*/ + /* * If we are inserting we can't start at a full node if the parent * is also full (because there is no way to split the node), @@ -903,6 +927,7 @@ btree_split_internal(hammer_cursor_t cursor) * a new root its parent pointer may have changed. */ elm->internal.subtree_offset = 0; + ondisk->count = split; /* * Insert the separator into the parent, fixup the parent's @@ -1060,9 +1085,10 @@ btree_split_leaf(hammer_cursor_t cursor) /* * Cleanup the original node. Because this is a leaf node and * leaf nodes do not have a right-hand boundary, there - * aren't any special edge cases to clean up. + * aren't any special edge cases to clean up. We just fixup the + * count. */ - /* nothing to do */ + ondisk->count = split; /* * Insert the separator into the parent, fixup the parent's @@ -1113,7 +1139,6 @@ btree_split_leaf(hammer_cursor_t cursor) if (cursor->index >= split) { cursor->parent_index = parent_index + 1; cursor->index -= split; - cursor->node = new_leaf; hammer_unlock(&cursor->node->lock); hammer_rel_node(cursor->node); cursor->node = new_leaf; @@ -1656,6 +1681,65 @@ hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2) return(0); } +/* + * Compare the element against the cursor's beginning and ending keys + */ +int +hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2) +{ + /* + * A cursor->key_beg.obj_id of 0 matches any object id + */ + if (cursor->key_beg.obj_id) { + if (cursor->key_end.obj_id < key2->obj_id) + return(-4); + if (cursor->key_beg.obj_id > key2->obj_id) + return(4); + } + + /* + * A cursor->key_beg.rec_type of 0 matches any record type. + */ + if (cursor->key_beg.rec_type) { + if (cursor->key_end.rec_type < key2->rec_type) + return(-3); + if (cursor->key_beg.rec_type > key2->rec_type) + return(3); + } + + /* + * There is no special case for key. 0 means 0. + */ + if (cursor->key_end.key < key2->key) + return(-2); + if (cursor->key_beg.key > key2->key) + return(2); + + /* + * This test has a number of special cases. create_tid in key1 is + * the as-of transction id, and delete_tid in key1 is NOT USED. + * + * A key1->create_tid of 0 matches any record regardles of when + * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be + * used to search for the most current state of the object. + * + * key2->create_tid is a HAMMER record and will never be + * 0. key2->delete_tid is the deletion transaction id or 0 if + * the record has not yet been deleted. + * + * NOTE: only key_beg.create_tid is used for create_tid, we can only + * do as-of scans at the moment. + */ + if (cursor->key_beg.create_tid) { + if (cursor->key_beg.create_tid < key2->create_tid) + return(-1); + if (key2->delete_tid && cursor->key_beg.create_tid >= key2->delete_tid) + return(1); + } + + return(0); +} + /* * Create a separator half way inbetween key1 and key2. For fields just * one unit apart, the separator will match key2. @@ -1719,3 +1803,60 @@ btree_max_elements(u_int8_t type) } #endif +void +hammer_print_btree_node(hammer_node_ondisk_t ondisk) +{ + hammer_btree_elm_t elm; + int i; + + kprintf("node %p count=%d parent=%d type=%c\n", + ondisk, ondisk->count, ondisk->parent, ondisk->type); + + /* + * Dump both boundary elements if an internal node + */ + if (ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { + for (i = 0; i <= ondisk->count; ++i) { + elm = &ondisk->elms[i]; + hammer_print_btree_elm(elm, ondisk->type, i); + } + } else { + for (i = 0; i < ondisk->count; ++i) { + elm = &ondisk->elms[i]; + hammer_print_btree_elm(elm, ondisk->type, i); + } + } +} + +void +hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i) +{ + kprintf(" %2d", i); + kprintf("\tobjid = %016llx\n", elm->base.obj_id); + kprintf("\tkey = %016llx\n", elm->base.key); + kprintf("\tcreate_tid = %016llx\n", elm->base.create_tid); + kprintf("\tdelete_tid = %016llx\n", elm->base.delete_tid); + kprintf("\trec_type = %04x\n", elm->base.rec_type); + kprintf("\tobj_type = %02x\n", elm->base.obj_type); + kprintf("\tsubtree_type = %02x\n", elm->subtree_type); + + if (type == HAMMER_BTREE_TYPE_INTERNAL) { + if (elm->internal.rec_offset) { + kprintf("\tcluster_rec = %08x\n", + elm->internal.rec_offset); + kprintf("\tcluster_id = %08x\n", + elm->internal.subtree_cluid); + kprintf("\tvolno = %08x\n", + elm->internal.subtree_volno); + } else { + kprintf("\tsubtree_off = %08x\n", + elm->internal.subtree_offset); + } + kprintf("\tsubtree_count= %d\n", elm->internal.subtree_count); + } else { + kprintf("\trec_offset = %08x\n", elm->leaf.rec_offset); + kprintf("\tdata_offset = %08x\n", elm->leaf.data_offset); + kprintf("\tdata_len = %08x\n", elm->leaf.data_len); + kprintf("\tdata_crc = %08x\n", elm->leaf.data_crc); + } +} diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index b3188069ee..11753a7530 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.8 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.9 2007/11/26 05:03:11 dillon Exp $ */ #ifndef _SYS_UUID_H_ @@ -318,7 +318,7 @@ struct hammer_cluster_ondisk { int32_t idx_data; /* data append point (element no) */ int32_t idx_index; /* index append point (element no) */ int32_t idx_record; /* record prepend point (element no) */ - u_int32_t idx_reserved03; + int32_t idx_ldata; /* large block data append pt (buf_no) */ /* * Specify the range of information stored in this cluster as two diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index ea7d1adc43..fcb8d9cf59 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.6 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.7 2007/11/26 05:03:11 dillon Exp $ */ #include "hammer.h" @@ -43,8 +43,22 @@ hammer_vop_inactive(struct vop_inactive_args *ap) { struct hammer_inode *ip = VTOI(ap->a_vp); - if (ip == NULL) + /* + * Degenerate case + */ + if (ip == NULL) { vrecycle(ap->a_vp); + return(0); + } + + /* + * If the inode no longer has any references we recover its + * in-memory resources immediately. + */ + if (ip->ino_rec.ino_nlinks == 0 && + (ip->hmp->mp->mnt_flag & MNT_RDONLY) == 0) { + hammer_sync_inode(ip, MNT_NOWAIT, 1); + } return(0); } @@ -55,6 +69,10 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap) struct vnode *vp; vp = ap->a_vp; + + /* + * Release the vnode association and ask that the inode be flushed. + */ if ((ip = vp->v_data) != NULL) { vp->v_data = NULL; ip->vp = NULL; @@ -214,6 +232,7 @@ loop: kfree(ip, M_HAMMER); goto loop; } + ip->flags |= HAMMER_INODE_ONDISK; } else { kfree(ip, M_HAMMER); ip = NULL; @@ -297,6 +316,59 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, return(0); } +int +hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip) +{ + struct hammer_cursor cursor; + hammer_record_t record; + int error; + + /* + * Locate the record on-disk and mark it as deleted + * + * XXX Update the inode record and data in-place if the retention + * policy allows it. + */ + error = 0; + + if (ip->flags & HAMMER_INODE_ONDISK) { + hammer_init_cursor_ip(&cursor, ip); + cursor.key_beg.obj_id = ip->obj_id; + cursor.key_beg.key = 0; + cursor.key_beg.create_tid = ip->obj_asof; + cursor.key_beg.delete_tid = 0; + cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; + cursor.key_beg.obj_type = 0; + cursor.flags = HAMMER_CURSOR_GET_RECORD; + + error = hammer_btree_lookup(&cursor); + + if (error == 0) { + cursor.record->base.base.delete_tid = trans->tid; + hammer_modify_buffer(cursor.record_buffer); + } + hammer_cache_node(cursor.node, &ip->cache); + hammer_done_cursor(&cursor); + } + + /* + * Write out a new record if the in-memory inode is not marked + * as having been deleted. + */ + if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { + record = hammer_alloc_mem_record(trans, ip); + record->rec.inode = ip->ino_rec; + record->rec.inode.base.base.create_tid = trans->tid; + record->rec.inode.base.data_len = sizeof(ip->ino_data); + record->data = (void *)&ip->ino_data; + error = hammer_ip_sync_record(record); + hammer_free_mem_record(record); + ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY); + ip->flags |= HAMMER_INODE_ONDISK; + } + return(error); +} + /* * Release a reference on an inode and unload it if told to flush. */ @@ -318,13 +390,16 @@ hammer_rel_inode(struct hammer_inode *ip, int flush) int hammer_unload_inode(struct hammer_inode *ip, void *data __unused) { + int error; + KASSERT(ip->lock.refs == 0, ("hammer_unload_inode: %d refs\n", ip->lock.refs)); KKASSERT(ip->vp == NULL); hammer_ref(&ip->lock); - /* XXX flush inode to disk */ - kprintf("flush inode %p\n", ip); + error = hammer_sync_inode(ip, MNT_WAIT, 1); + if (error) + kprintf("hammer_sync_inode failed error %d\n", error); RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip); @@ -342,7 +417,129 @@ hammer_modify_inode(struct hammer_transaction *trans, struct hammer_inode *ip, int flags) { ip->flags |= flags; - ip->last_tid = trans->tid; + if (flags & HAMMER_INODE_TID) + ip->last_tid = trans->tid; +} + +/* + * Sync any dirty buffers and records associated with an inode. The + * inode's last_tid field is used as the transaction id for the sync, + * overriding any intermediate TIDs that were used for records. Note + * that the dirty buffer cache buffers do not have any knowledge of + * the transaction id they were modified under. + */ +static int +hammer_sync_inode_callback(hammer_record_t rec, void *data __unused) +{ + int error; + + error = 0; + if ((rec->flags & HAMMER_RECF_DELETED) == 0) + error = hammer_ip_sync_record(rec); + + if (error) { + kprintf("hammer_sync_inode_callback: sync failed rec %p\n", + rec); + return(-1); + } + hammer_free_mem_record(rec); + return(0); +} + +/* + * XXX error handling + */ +int +hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete) +{ + struct hammer_transaction trans; + int error; + int r; + + hammer_lock_ex(&ip->lock); + hammer_start_transaction(&trans, ip->hmp); + + /* + * If the inode has been deleted (nlinks == 0), and the OS no longer + * has any references to it (handle_delete != 0), clean up in-memory + * data. + * + * NOTE: We do not set the RDIRTY flag when updating the delete_tid, + * setting HAMMER_INODE_DELETED takes care of it. + */ + if (ip->ino_rec.ino_nlinks == 0 && handle_delete) { + if (ip->vp) + vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE); + error = hammer_ip_delete_range(&trans, ip, + HAMMER_MIN_KEY, HAMMER_MAX_KEY); + KKASSERT(RB_EMPTY(&ip->rec_tree)); + ip->ino_rec.base.base.delete_tid = trans.tid; + hammer_modify_inode(&trans, ip, + HAMMER_INODE_DELETED | HAMMER_INODE_TID); + } + + /* + * Sync the buffer cache + */ + if (ip->vp != NULL) + error = vfsync(ip->vp, waitfor, 1, NULL, NULL); + else + error = 0; + + /* + * Now sync related records + */ + if (error == 0) { + r = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, + hammer_sync_inode_callback, NULL); + if (r < 0) + error = EIO; + } + + /* + * Now update the inode's on-disk inode-data and/or on-disk record. + */ + switch(ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK)) { + case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK: + /* + * If deleted and on-disk, don't set any additional flags. + * the delete flag takes care of things. + */ + break; + case HAMMER_INODE_DELETED: + /* + * Take care of the case where a deleted inode was never + * flushed to the disk in the first place. + */ + ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY); + while (RB_ROOT(&ip->rec_tree)) + hammer_free_mem_record(RB_ROOT(&ip->rec_tree)); + break; + case HAMMER_INODE_ONDISK: + /* + * If already on-disk, do not set any additional flags. + */ + break; + default: + /* + * If not on-disk and not deleted, set both dirty flags + * to force an initial record to be written. + */ + ip->flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY; + break; + } + + /* + * If RDIRTY or DDIRTY is set, write out a new record. If the + * inode is already on-disk, the old record is marked as deleted. + */ + if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY | + HAMMER_INODE_DELETED)) { + error = hammer_update_inode(&trans, ip); + } + hammer_commit_transaction(&trans); + hammer_unlock(&ip->lock); + return(error); } /* @@ -379,14 +576,14 @@ hammer_bread(hammer_cluster_t cluster, int32_t cloff, if (buffer == NULL || buffer->cluster != cluster || buffer->buf_no != buf_no) { if (buffer) { - hammer_unlock(&buffer->io.lock); + /*hammer_unlock(&buffer->io.lock);*/ hammer_rel_buffer(buffer, 0); } buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); *bufferp = buffer; if (buffer == NULL) return(NULL); - hammer_lock_ex(&buffer->io.lock); + /*hammer_lock_ex(&buffer->io.lock);*/ } /* diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 8f01efb887..cf1d0e2935 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.3 2007/11/20 07:16:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.4 2007/11/26 05:03:11 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -110,6 +110,7 @@ hammer_io_disassociate(union hammer_io_structure *io) bdwrite(bp); else bqrelse(bp); + io->io.released = 1; } } @@ -230,6 +231,7 @@ hammer_io_release(struct hammer_io *io, int flush) } else { bdwrite(bp); io->modified = 0; + io->released = 1; } } else if (flush) { /* diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 5b3422fe3d..aff9ac83ff 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.4 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.5 2007/11/26 05:03:11 dillon Exp $ */ #include "hammer.h" @@ -309,6 +309,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans, record = hammer_alloc_mem_record(trans, dip); + kprintf("add to directory dip %p\n", dip); bytes = ncp->nc_nlen; /* NOTE: terminating \0 is NOT included */ if (++trans->hmp->namekey_iterator == 0) ++trans->hmp->namekey_iterator; @@ -323,6 +324,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans, record->rec.entry.obj_id = ip->obj_id; if (bytes <= sizeof(record->rec.entry.den_name)) { record->data = (void *)record->rec.entry.den_name; + record->flags |= HAMMER_RECF_EMBEDDED_DATA; } else { record->data = kmalloc(bytes, M_HAMMER, M_WAITOK); record->flags |= HAMMER_RECF_ALLOCDATA; @@ -330,7 +332,8 @@ hammer_ip_add_directory(struct hammer_transaction *trans, bcopy(ncp->nc_name, record->data, bytes); record->rec.entry.base.data_len = bytes; ++ip->ino_rec.ino_nlinks; - hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); + hammer_modify_inode(trans, ip, + HAMMER_INODE_RDIRTY | HAMMER_INODE_TID); error = hammer_mem_add(trans, record); return(error); } @@ -369,40 +372,210 @@ hammer_ip_del_directory(struct hammer_transaction *trans, cursor->record->base.base.delete_tid = trans->tid; hammer_modify_node(cursor->node); hammer_modify_buffer(cursor->record_buffer); - } } /* - * One less link. Mark the inode and all of its records as deleted - * when the last link goes away. The inode will be automatically - * flushed when its last reference goes away. + * One less link. The file may still be open in the OS even after + * all links have gone away so we don't destroy the inode's data + * here. */ if (error == 0) { --ip->ino_rec.ino_nlinks; - if (ip->ino_rec.ino_nlinks == 0) - ip->ino_rec.base.base.delete_tid = trans->tid; - error = hammer_ip_delete_range(trans, ip, - HAMMER_MIN_KEY, HAMMER_MAX_KEY); - KKASSERT(RB_EMPTY(&ip->rec_tree)); - hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); + hammer_modify_inode(trans, ip, + HAMMER_INODE_RDIRTY | HAMMER_INODE_TID); } return(error); } /* - * Add a data record to the filesystem. - * - * This is called via the strategy code, typically when the kernel wants to - * flush a buffer cache buffer, so this operation goes directly to the disk. + * Sync data from a buffer cache buffer (typically) to the filesystem. This + * is called via the strategy called from a cached data source. This code + * is responsible for actually writing a data record out to the disk. */ int -hammer_ip_add_data(hammer_transaction_t trans, hammer_inode_t ip, +hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, int64_t offset, void *data, int bytes) { - panic("hammer_ip_add_data"); + struct hammer_cursor cursor; + hammer_record_ondisk_t rec; + union hammer_btree_elm elm; + void *bdata; + int error; + + error = hammer_init_cursor_ip(&cursor, ip); + if (error) + return(error); + cursor.key_beg.obj_id = ip->obj_id; + cursor.key_beg.key = offset + bytes; + cursor.key_beg.create_tid = trans->tid; + cursor.key_beg.delete_tid = 0; + cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; + cursor.flags = HAMMER_CURSOR_INSERT; + + /* + * Issue a lookup to position the cursor and locate the cluster + */ + error = hammer_btree_lookup(&cursor); + if (error == 0) { + kprintf("hammer_ip_sync_data: duplicate data at (%lld,%d)\n", + offset, bytes); + error = EIO; + } + if (error != ENOENT) + goto done; + + /* + * Allocate record and data space now that we know which cluster + * the B-Tree node ended up in. + */ + bdata = hammer_alloc_data(cursor.node->cluster, bytes, &error, + &cursor.data_buffer); + if (bdata == NULL) + goto done; + rec = hammer_alloc_record(cursor.node->cluster, &error, + &cursor.record_buffer); + if (rec == NULL) + goto fail1; + + /* + * Fill everything in and insert our B-Tree node. + */ + rec->base.base = cursor.key_beg; + rec->base.data_crc = crc32(data, bytes); + rec->base.rec_id = 0; /* XXX */ + rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer, bdata); + rec->base.data_len = bytes; + hammer_modify_buffer(cursor.record_buffer); + + bcopy(data, bdata, bytes); + hammer_modify_buffer(cursor.data_buffer); + + elm.leaf.base = cursor.key_beg; + elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); + elm.leaf.data_offset = rec->base.data_offset; + elm.leaf.data_len = bytes; + elm.leaf.data_crc = rec->base.data_crc; + + error = hammer_btree_insert(&cursor, &elm); + if (error == 0) + goto done; + + hammer_free_record_ptr(cursor.record_buffer, rec); +fail1: + hammer_free_data_ptr(cursor.data_buffer, bdata, bytes); +done: + hammer_done_cursor(&cursor); + return(error); +} + +/* + * Sync an in-memory record to the disk. this is typically called via fsync + * from a cached record source. This code is responsible for actually + * writing a record out to the disk. + */ +int +hammer_ip_sync_record(hammer_record_t record) +{ + struct hammer_cursor cursor; + hammer_record_ondisk_t rec; + union hammer_btree_elm elm; + void *bdata; + int error; + + error = hammer_init_cursor_ip(&cursor, record->ip); + if (error) + return(error); + cursor.key_beg = record->rec.base.base; + cursor.flags = HAMMER_CURSOR_INSERT; + + /* + * Issue a lookup to position the cursor and locate the cluster + */ + error = hammer_btree_lookup(&cursor); + if (error == 0) { + kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n", + record->rec.base.base.key); + error = EIO; + } + if (error != ENOENT) + goto done; + + /* + * Allocate record and data space now that we know which cluster + * the B-Tree node ended up in. + */ + if (record->data == NULL || + (record->flags & HAMMER_RECF_EMBEDDED_DATA)) { + bdata = record->data; + } else { + bdata = hammer_alloc_data(cursor.node->cluster, + record->rec.base.data_len, &error, + &cursor.data_buffer); + if (bdata == NULL) + goto done; + } + rec = hammer_alloc_record(cursor.node->cluster, &error, + &cursor.record_buffer); + if (rec == NULL) + goto fail1; + + /* + * Fill everything in and insert our B-Tree node. + * + * XXX assign rec_id here + */ + *rec = record->rec; + kprintf("record->rec %p data %p\n", &record->rec, record->data); + if (bdata) { + rec->base.data_crc = crc32(record->data, + record->rec.base.data_len); + if (record->flags & HAMMER_RECF_EMBEDDED_DATA) { + /* + * Data embedded in record + */ + rec->base.data_offset = ((char *)bdata - + (char *)&record->rec); + KKASSERT(rec->base.data_offset >= 0 && + rec->base.data_offset + rec->base.data_len < + sizeof(*rec)); + rec->base.data_offset += hammer_bclu_offset(cursor.record_buffer, rec); + } else { + /* + * Data separate from record + */ + rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer,bdata); + bcopy(record->data, bdata, rec->base.data_len); + hammer_modify_buffer(cursor.data_buffer); + } + } + rec->base.rec_id = 0; /* XXX */ + + hammer_modify_buffer(cursor.record_buffer); + + elm.leaf.base = cursor.key_beg; + elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); + elm.leaf.data_offset = rec->base.data_offset; + elm.leaf.data_len = rec->base.data_len; + elm.leaf.data_crc = rec->base.data_crc; + + error = hammer_btree_insert(&cursor, &elm); + if (error == 0) + goto done; + + hammer_free_record_ptr(cursor.record_buffer, rec); +fail1: + if (record->data && (record->flags & HAMMER_RECF_EMBEDDED_DATA) == 0) { + hammer_free_data_ptr(cursor.data_buffer, bdata, + rec->base.data_len); + } +done: + hammer_done_cursor(&cursor); + kprintf("hammer_ip_sync_record_done %d\n", error); + return(error); } + /* * Add the record to the inode's rec_tree. The low 32 bits of a directory * entry's key is used to deal with hash collisions in the upper 32 bits. @@ -491,20 +664,32 @@ hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) hammer_rel_mem_record(&cursor->iprec); /* - * Search the on-disk B-Tree + * Search the on-disk B-Tree. hammer_btree_lookup() only does an + * exact lookup so if we get ENOENT we have to call the iterate + * function to validate the first record after the begin key. + * + * The ATEDISK flag is used by hammer_btree_iterate to determine + * whether it must index forwards or not. */ if (ip->flags & HAMMER_INODE_ONDISK) { error = hammer_btree_lookup(cursor); - if (error && error != ENOENT) + if (error == ENOENT) { + cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + error = hammer_btree_iterate(cursor); + } + if (error && error != ENOENT) return(error); if (error == 0) { - cursor->flags &= ~HAMMER_CURSOR_DISKEOF ; - cursor->flags &= ~HAMMER_CURSOR_ATEDISK ; + cursor->flags &= ~HAMMER_CURSOR_DISKEOF; + cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + } else { + cursor->flags |= HAMMER_CURSOR_ATEDISK; } } /* - * Search the in-memory record list (Red-Black tree) + * Search the in-memory record list (Red-Black tree). Unlike the + * B-Tree search, mem_search checks for records in the range. */ error = hammer_mem_search(cursor, ip); if (error && error != ENOENT) @@ -727,7 +912,8 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, hammer_free_mem_record(cursor.iprec); } else { - cursor.node->ondisk->elms[cursor.index].base.delete_tid = trans->tid; + cursor.node->ondisk-> + elms[cursor.index].base.delete_tid = trans->tid; cursor.record->base.base.delete_tid = trans->tid; hammer_modify_node(cursor.node); hammer_modify_buffer(cursor.record_buffer); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 66a915a8c0..bc4c5b0db9 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.6 2007/11/26 05:03:11 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -150,7 +150,7 @@ hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2) { if (node1->node_offset < node2->node_offset) return(-1); - if (node1->node_offset < node2->node_offset) + if (node1->node_offset > node2->node_offset) return(1); return(0); } @@ -1411,10 +1411,12 @@ hammer_alloc_btree(hammer_cluster_t cluster, int *errorp) *errorp = ENOSPC; if (buffer) hammer_rel_buffer(buffer, 0); + hammer_modify_cluster(cluster); return(NULL); } } cluster->ondisk->idx_index = elm_no; + hammer_modify_cluster(cluster); /* * Load and return the B-Tree element @@ -1448,7 +1450,40 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, void *item; /* - * Allocate a data element + * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE + * for these allocations. + */ + if ((bytes & HAMMER_BUFMASK) == 0) { + nblks = bytes / HAMMER_BUFSIZE; + /* only one block allowed for now (so buffer can hold it) */ + KKASSERT(nblks == 1); + + buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, + nblks, + cluster->ondisk->idx_ldata); + if (buf_no == HAMMER_ALIST_BLOCK_NONE) { + buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, + nblks, + 0); + } + hammer_modify_cluster(cluster); + if (buf_no == HAMMER_ALIST_BLOCK_NONE) { + *errorp = ENOSPC; + return(NULL); + } + cluster->ondisk->idx_ldata = buf_no; + buffer = *bufferp; + *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp); + if (buffer) + hammer_rel_buffer(buffer, 0); + buffer = *bufferp; + kprintf("allocate large buffer %p (%d)\n", buffer, buf_no); + return(buffer->ondisk); + } + + /* + * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE + * (64 bytes) for these allocations. */ nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; live = &cluster->alist_mdata; @@ -1462,10 +1497,12 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, elm_no = hammer_alist_alloc(live, nblks); if (elm_no == HAMMER_ALIST_BLOCK_NONE) { *errorp = ENOSPC; + hammer_modify_cluster(cluster); return(NULL); } } cluster->ondisk->idx_index = elm_no; + hammer_modify_cluster(cluster); /* * Load and return the B-Tree element @@ -1479,7 +1516,7 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); *bufferp = buffer; } - KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE); + KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA); item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK]; bzero(item, nblks * HAMMER_DATA_BLKSIZE); *errorp = 0; @@ -1510,10 +1547,12 @@ hammer_alloc_record(hammer_cluster_t cluster, elm_no = hammer_alist_alloc(live, 1); if (elm_no == HAMMER_ALIST_BLOCK_NONE) { *errorp = ENOSPC; + hammer_modify_cluster(cluster); return(NULL); } } cluster->ondisk->idx_record = elm_no; + hammer_modify_cluster(cluster); /* * Load and return the B-Tree element @@ -1549,6 +1588,7 @@ hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node) elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; live = &buffer->cluster->alist_btree; hammer_alist_free(live, elm_no, 1); + hammer_modify_cluster(buffer->cluster); } void @@ -1558,6 +1598,15 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes) int32_t nblks; hammer_alist_t live; + if ((bytes & HAMMER_BUFMASK) == 0) { + nblks = bytes / HAMMER_BUFSIZE; + KKASSERT(nblks == 1 && data == (void *)buffer->ondisk); + hammer_alist_free(&buffer->cluster->alist_master, + buffer->buf_no, nblks); + hammer_modify_cluster(buffer->cluster); + return; + } + elm_no = ((char *)data - (char *)buffer->ondisk->data.data) / HAMMER_DATA_BLKSIZE; KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES); @@ -1565,6 +1614,7 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes) nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; live = &buffer->cluster->alist_mdata; hammer_alist_free(live, elm_no, nblks); + hammer_modify_cluster(buffer->cluster); } void @@ -1578,6 +1628,7 @@ hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec) elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; live = &buffer->cluster->alist_record; hammer_alist_free(live, elm_no, 1); + hammer_modify_cluster(buffer->cluster); } void @@ -1594,6 +1645,7 @@ hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, 1); + hammer_modify_cluster(cluster); } void @@ -1603,8 +1655,18 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes) int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK; hammer_alist_t live; int32_t elm_no; + int32_t buf_no; int32_t nblks; + if ((bytes & HAMMER_BUFMASK) == 0) { + nblks = bytes / HAMMER_BUFSIZE; + KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0); + buf_no = bclu_offset / HAMMER_BUFSIZE; + hammer_alist_free(&cluster->alist_master, buf_no, nblks); + hammer_modify_cluster(cluster); + return; + } + elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS; fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]); live = &cluster->alist_mdata; @@ -1612,6 +1674,7 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, nblks); + hammer_modify_cluster(cluster); } void @@ -1628,6 +1691,7 @@ hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, 1); + hammer_modify_cluster(cluster); } @@ -1662,6 +1726,7 @@ alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live, } } KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */ + hammer_modify_cluster(cluster); /* * The new buffer must be initialized (type != 0) regardless of @@ -1870,6 +1935,7 @@ buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; *fullp = hammer_alist_isfull(&buffer->alist); + hammer_modify_buffer(buffer); hammer_rel_buffer(buffer, 0); } else { r = HAMMER_ALIST_BLOCK_NONE; @@ -1896,6 +1962,7 @@ buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix, if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; *fullp = hammer_alist_isfull(&buffer->alist); + hammer_modify_buffer(buffer); hammer_rel_buffer(buffer, 0); } else { r = HAMMER_ALIST_BLOCK_NONE; @@ -1919,6 +1986,7 @@ buffer_alist_free(void *info, int32_t blk, int32_t radix, KKASSERT(buffer->ondisk->head.buf_type != 0); hammer_alist_free(&buffer->alist, base_blk, count); *emptyp = hammer_alist_isempty(&buffer->alist); + hammer_modify_buffer(buffer); hammer_rel_buffer(buffer, 0); } else { *emptyp = 0; @@ -1980,6 +2048,7 @@ super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; *fullp = hammer_alist_isfull(&supercl->alist); + hammer_modify_supercl(supercl); hammer_rel_supercl(supercl, 0); } else { r = HAMMER_ALIST_BLOCK_NONE; @@ -2005,6 +2074,7 @@ super_alist_alloc_rev(void *info, int32_t blk, int32_t radix, if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; *fullp = hammer_alist_isfull(&supercl->alist); + hammer_modify_supercl(supercl); hammer_rel_supercl(supercl, 0); } else { r = HAMMER_ALIST_BLOCK_NONE; @@ -2027,6 +2097,7 @@ super_alist_free(void *info, int32_t blk, int32_t radix, if (supercl) { hammer_alist_free(&supercl->alist, base_blk, count); *emptyp = hammer_alist_isempty(&supercl->alist); + hammer_modify_supercl(supercl); hammer_rel_supercl(supercl, 0); } else { *emptyp = 0; diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index 9a7f3e3d44..d4a579933f 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.5 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.6 2007/11/26 05:03:11 dillon Exp $ */ /* * HAMMER structural locking @@ -48,9 +48,11 @@ hammer_lock_ex(struct hammer_lock *lock) KKASSERT(lock->refs > 0); crit_enter(); if (lock->locktd != td) { - while (lock->locktd != NULL) { + while (lock->locktd != NULL || lock->lockcount) { lock->wanted = 1; + kprintf("hammer_lock_ex: held by %p\n", lock->locktd); tsleep(lock, 0, "hmrlck", 0); + kprintf("hammer_lock_ex: try again\n"); } lock->locktd = td; } @@ -69,7 +71,7 @@ hammer_lock_ex_try(struct hammer_lock *lock) KKASSERT(lock->refs > 0); crit_enter(); if (lock->locktd != td) { - if (lock->locktd != NULL) + if (lock->locktd != NULL || lock->lockcount) return(EAGAIN); lock->locktd = td; } diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index f39c6279e5..74628c1ff9 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.6 2007/11/26 05:03:11 dillon Exp $ */ #include @@ -150,6 +150,12 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, mp->mnt_kern_flag |= MNTK_FSMID; mp->mnt_stat.f_fsid.val[0] = 0; /* XXX */ mp->mnt_stat.f_fsid.val[1] = 0; /* XXX */ + + /* + * note: f_iosize is used by vnode_pager_haspage() when constructing + * its VOP_BMAP call. + */ + mp->mnt_stat.f_iosize = HAMMER_BUFSIZE; vfs_getnewfsid(mp); /* XXX */ mp->mnt_maxsymlinklen = 255; mp->mnt_flag |= MNT_LOCAL; diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 09b55b0084..ee376833bb 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.5 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.6 2007/11/26 05:03:11 dillon Exp $ */ #include @@ -43,6 +43,7 @@ #include #include #include +#include #include "hammer.h" /* @@ -78,6 +79,8 @@ static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *); struct vop_ops hammer_vnode_vops = { .vop_default = vop_defaultop, .vop_fsync = hammer_vop_fsync, + .vop_getpages = vop_stdgetpages, + .vop_putpages = vop_stdputpages, .vop_read = hammer_vop_read, .vop_write = hammer_vop_write, .vop_access = hammer_vop_access, @@ -127,7 +130,12 @@ static int hammer_vop_fsync(struct vop_fsync_args *ap) { - return EOPNOTSUPP; + hammer_inode_t ip; + int error; + + ip = VTOI(ap->a_vp); + error = hammer_sync_inode(ip, ap->a_waitfor, 0); + return (error); } /* @@ -138,7 +146,7 @@ int hammer_vop_read(struct vop_read_args *ap) { struct hammer_transaction trans; - struct hammer_inode *ip; + hammer_inode_t ip; off_t offset; struct buf *bp; struct uio *uio; @@ -160,14 +168,18 @@ hammer_vop_read(struct vop_read_args *ap) uio = ap->a_uio; while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) { offset = uio->uio_offset & HAMMER_BUFMASK; +#if 0 error = cluster_read(ap->a_vp, ip->ino_rec.ino_size, uio->uio_offset - offset, HAMMER_BUFSIZE, MAXBSIZE, seqcount, &bp); +#endif + error = bread(ap->a_vp, uio->uio_offset - offset, + HAMMER_BUFSIZE, &bp); if (error) { brelse(bp); break; } - bp->b_flags |= B_CLUSTEROK; + /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ n = HAMMER_BUFSIZE - offset; if (n > uio->uio_resid) n = uio->uio_resid; @@ -200,6 +212,7 @@ hammer_vop_write(struct vop_write_args *ap) struct buf *bp; int error; int n; + int flags; if (ap->a_vp->v_type != VREG) return (EINVAL); @@ -229,14 +242,44 @@ hammer_vop_write(struct vop_write_args *ap) */ while (uio->uio_resid > 0) { offset = uio->uio_offset & HAMMER_BUFMASK; - if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) { + if (uio->uio_segflg == UIO_NOCOPY) { + /* + * Issuing a write with the same data backing the + * buffer. Instantiate the buffer to collect the + * backing vm pages, then read-in any missing bits. + * + * This case is used by vop_stdputpages(). + */ + bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, + 0, 0); + if ((bp->b_flags & B_CACHE) == 0) { + bqrelse(bp); + error = bread(ap->a_vp, + uio->uio_offset - offset, + HAMMER_BUFSIZE, &bp); + if (error) { + brelse(bp); + break; + } + } + } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) { + /* + * entirely overwrite the buffer + */ bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, 0, 0); } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) { + /* + * XXX + */ bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, 0, 0); vfs_bio_clrbuf(bp); } else { + /* + * Partial overwrite, read in any missing bits then + * replace the portion being written. + */ error = bread(ap->a_vp, uio->uio_offset - offset, HAMMER_BUFSIZE, &bp); if (error) { @@ -252,13 +295,17 @@ hammer_vop_write(struct vop_write_args *ap) brelse(bp); break; } - bp->b_flags |= B_CLUSTEROK; + /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */ if (ip->ino_rec.ino_size < uio->uio_offset) { ip->ino_rec.ino_size = uio->uio_offset; ip->ino_rec.ino_mtime = trans.tid; - hammer_modify_inode(&trans, ip, - HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES); + flags = HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES | + HAMMER_INODE_TID; + vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size); + } else { + flags = HAMMER_INODE_TID; } + hammer_modify_inode(&trans, ip, flags); if (ap->a_ioflag & IO_SYNC) { bwrite(bp); } else if (ap->a_ioflag & IO_DIRECT) { @@ -920,12 +967,12 @@ hammer_vop_nrename(struct vop_nrename_args *ap) cache_setvp(ap->a_tnch, ip->vp); } done: + hammer_done_cursor(&cursor); if (error == 0) { hammer_commit_transaction(&trans); } else { hammer_abort_transaction(&trans); } - hammer_done_cursor(&cursor); return (error); } @@ -1001,13 +1048,22 @@ hammer_vop_setattr(struct vop_setattr_args *ap) modflags |= HAMMER_INODE_DDIRTY; } } - if (vap->va_size != VNOVAL) { + if (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) { switch(ap->a_vp->v_type) { case VREG: + if (vap->va_size < ip->ino_rec.ino_size) { + vtruncbuf(ap->a_vp, vap->va_size, + HAMMER_BUFSIZE); + } else if (vap->va_size > ip->ino_rec.ino_size) { + vnode_pager_setsize(ap->a_vp, vap->va_size); + } + /* fall through */ case VDATABASE: error = hammer_ip_delete_range(&trans, ip, vap->va_size, 0x7FFFFFFFFFFFFFFFLL); + ip->ino_rec.ino_size = vap->va_size; + modflags |= HAMMER_INODE_RDIRTY; break; default: error = EINVAL; @@ -1034,8 +1090,9 @@ done: if (error) { hammer_abort_transaction(&trans); } else { - if (modflags) - hammer_modify_inode(&trans, ip, modflags); + if (modflags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY)) + modflags |= HAMMER_INODE_TID; + hammer_modify_inode(&trans, ip, modflags); hammer_commit_transaction(&trans); } return (error); @@ -1131,13 +1188,15 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) /* * Key range (begin and end inclusive) to scan. Note that the key's - * stored in the actual records represent the + * stored in the actual records represent BASE+LEN, not BASE. The + * first record containing bio_offset will have a key > bio_offset. */ cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.create_tid = ip->obj_asof; cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; - cursor.key_beg.key = bio->bio_offset; + cursor.key_beg.key = bio->bio_offset + 1; + kprintf("READ AT OFFSET %lld\n", bio->bio_offset); cursor.key_end = cursor.key_beg; if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { @@ -1145,7 +1204,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) cursor.key_end.rec_type = HAMMER_RECTYPE_DB; cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; } else { - ran_end = bio->bio_offset + bp->b_bufsize - 1; + ran_end = bio->bio_offset + bp->b_bufsize; cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; if (ran_end + MAXPHYS < ran_end) @@ -1164,7 +1223,8 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) rec = cursor.record; base = &rec->base.base; - rec_offset = base->key - rec->data.base.data_len + 1; + rec_offset = base->key - rec->data.base.data_len; + kprintf("record offset %lld\n", rec_offset); /* * Calculate the gap, if any, and zero-fill it. @@ -1188,6 +1248,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) */ roff = -n; n = rec->data.base.data_len - roff; + kprintf("roff = %d datalen %d\n", roff, rec->data.base.data_len); KKASSERT(n > 0); if (n > bp->b_bufsize - boff) n = bp->b_bufsize - boff; @@ -1232,6 +1293,8 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) struct buf *bp; int error; + kprintf("vop_strategy_write\n"); + bio = ap->a_bio; bp = bio->bio_buf; ip = ap->a_vp->v_data; @@ -1248,13 +1311,14 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) error = hammer_ip_delete_range(&trans, ip, bio->bio_offset, bio->bio_offset + bp->b_bufsize - 1); } + kprintf("delete_range %d\n", error); /* * Add a single record to cover the write */ if (error == 0) { - error = hammer_ip_add_data(&trans, ip, bio->bio_offset, - bp->b_data, bp->b_bufsize); + error = hammer_ip_sync_data(&trans, ip, bio->bio_offset, + bp->b_data, bp->b_bufsize); } /* -- 2.41.0