HAMMER 6/many - memory->disk flush, single-cluster sync to disk, more vnops.
authorMatthew Dillon <dillon@dragonflybsd.org>
Mon, 26 Nov 2007 05:03:11 +0000 (05:03 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Mon, 26 Nov 2007 05:03:11 +0000 (05:03 +0000)
Get most operations within a single 64M cluster working.  There are still
numerous issues with the B-Tree algorithms so readbacks generate bad data at
the moment.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index eef3940..4c6fef8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.8 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.9 2007/11/26 05:03:11 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -160,6 +160,8 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_INODE_ITIMES    0x0004  /* in-memory mtime/atime modified */
 #define HAMMER_INODE_ONDISK    0x0010  /* inode is on-disk (else not yet) */
 #define HAMMER_INODE_FLUSH     0x0020  /* flush on last ref */
+#define HAMMER_INODE_TID       0x0040  /* update in-memory last_tid */
+#define HAMMER_INODE_DELETED   0x0080  /* inode ready for deletion */
 
 #define HAMMER_MAX_INODE_CURSORS       4
 
@@ -187,6 +189,7 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_ALLOCDATA          0x0001
 #define HAMMER_RECF_ONRBTREE           0x0002
 #define HAMMER_RECF_DELETED            0x0004
+#define HAMMER_RECF_EMBEDDED_DATA      0x0008
 
 /*
  * Structures used to internally represent a volume and a cluster
@@ -399,6 +402,7 @@ int hammer_get_vnode(struct hammer_inode *ip, int lktype,
                        struct vnode **vpp);
 struct hammer_inode *hammer_get_inode(hammer_mount_t hmp,
                        u_int64_t obj_id, int *errorp);
+int    hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip);
 void   hammer_put_inode(struct hammer_inode *ip);
 void   hammer_put_inode_ref(struct hammer_inode *ip);
 
@@ -454,6 +458,9 @@ int hammer_btree_iterate(hammer_cursor_t cursor);
 int    hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
 int    hammer_btree_delete(hammer_cursor_t cursor);
 int    hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
+int    hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2);
+void   hammer_print_btree_node(hammer_node_ondisk_t ondisk);
+void   hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
 
 void   *hammer_bread(struct hammer_cluster *cluster, int32_t cloff,
                      u_int64_t buf_type, int *errorp,
@@ -528,6 +535,7 @@ int  hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
                        struct ucred *cred, struct hammer_inode *dip,
                        struct hammer_inode **ipp);
 void hammer_rel_inode(hammer_inode_t ip, int flush);
+int hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete);
 
 int  hammer_ip_add_directory(struct hammer_transaction *trans,
                        hammer_inode_t dip, struct namecache *ncp,
@@ -537,9 +545,10 @@ int  hammer_ip_del_directory(struct hammer_transaction *trans,
                        hammer_inode_t ip);
 int  hammer_ip_delete_range(struct hammer_transaction *trans,
                        hammer_inode_t ip, int64_t ran_beg, int64_t ran_end);
-int  hammer_ip_add_data(struct hammer_transaction *trans,
+int  hammer_ip_sync_data(struct hammer_transaction *trans,
                        hammer_inode_t ip, int64_t offset,
                        void *data, int bytes);
+int  hammer_ip_sync_record(hammer_record_t rec);
 
 int hammer_io_read(struct vnode *devvp, struct hammer_io *io);
 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
index d3d2d98..f2c9fcf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.5 2007/11/20 07:16:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
  */
 
 /*
@@ -129,8 +129,10 @@ hammer_btree_iterate(hammer_cursor_t cursor)
        node = cursor->node->ondisk;
        if (node == NULL)
                return(ENOENT);
-       if (cursor->index < node->count)
+       if (cursor->index < node->count && 
+           (cursor->flags & HAMMER_CURSOR_ATEDISK)) {
                ++cursor->index;
+       }
 
        /*
         * Loop until an element is found or we are done.
@@ -228,7 +230,7 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                 * old or too new but does not terminate the search.
                 */
                elm = &node->elms[cursor->index];
-               r = hammer_btree_cmp(&cursor->key_end, &elm->base);
+               r = hammer_btree_range_cmp(cursor, &elm->base);
                if (r == -1 || r == 1) {
                        ++cursor->index;
                        continue;
@@ -281,6 +283,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        hammer_node_ondisk_t node;
        hammer_btree_elm_t elm;
        hammer_cluster_t cluster;
+       u_int64_t buf_type;
        int32_t cloff;
        int error;
 
@@ -308,11 +311,23 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) {
                if ((cloff ^ elm->leaf.data_offset) & ~HAMMER_BUFMASK) {
                        /*
-                        * Data in different buffer than record
+                        * The data is not in the same buffer as the last
+                        * record we cached, but it could still be embedded
+                        * in a record.  Note that we may not have loaded the
+                        * record's buffer above, depending on flags.
                         */
+                       if ((elm->leaf.rec_offset ^ elm->leaf.data_offset) &
+                           ~HAMMER_BUFMASK) {
+                               if (elm->leaf.data_len & HAMMER_BUFMASK)
+                                       buf_type = HAMMER_FSBUF_DATA;
+                               else
+                                       buf_type = 0;   /* pure data buffer */
+                       } else {
+                               buf_type = HAMMER_FSBUF_RECORDS;
+                       }
                        cursor->data = hammer_bread(cluster,
                                                  elm->leaf.data_offset,
-                                                 HAMMER_FSBUF_DATA, &error,
+                                                 buf_type, &error,
                                                  &cursor->data_buffer);
                } else {
                        /*
@@ -321,6 +336,8 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
                         * though we don't use it in this case, in case
                         * other records extracted during an iteration
                         * go back to it.
+                        *
+                        * Just assume the buffer type is correct.
                         */
                        cursor->data = (void *)
                                ((char *)cursor->record_buffer->ondisk +
@@ -387,7 +404,12 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
        ++node->count;
        hammer_modify_node(cursor->node);
 
-       if ((parent = cursor->parent->ondisk) != NULL) {
+       /*
+        * Adjust the sub-tree count in the parent.  note that the parent
+        * may be in a different cluster.
+        */
+       if (cursor->parent) {
+               parent = cursor->parent->ondisk;
                i = cursor->parent_index;
                ++parent->elms[i].internal.subtree_count;
                KKASSERT(parent->elms[i].internal.subtree_count <= node->count);
@@ -560,6 +582,8 @@ btree_search(hammer_cursor_t cursor, int flags)
         */
        KKASSERT(cursor->node != NULL && cursor->node->cluster == cluster);
 
+/*     hammer_print_btree_node(cursor->node->ondisk);*/
+
        /*
         * If we are inserting we can't start at a full node if the parent
         * is also full (because there is no way to split the node),
@@ -903,6 +927,7 @@ btree_split_internal(hammer_cursor_t cursor)
         * a new root its parent pointer may have changed.
         */
        elm->internal.subtree_offset = 0;
+       ondisk->count = split;
 
        /*
         * Insert the separator into the parent, fixup the parent's
@@ -1060,9 +1085,10 @@ btree_split_leaf(hammer_cursor_t cursor)
        /*
         * Cleanup the original node.  Because this is a leaf node and
         * leaf nodes do not have a right-hand boundary, there
-        * aren't any special edge cases to clean up.
+        * aren't any special edge cases to clean up.  We just fixup the
+        * count.
         */
-       /* nothing to do */
+       ondisk->count = split;
 
        /*
         * Insert the separator into the parent, fixup the parent's
@@ -1113,7 +1139,6 @@ btree_split_leaf(hammer_cursor_t cursor)
        if (cursor->index >= split) {
                cursor->parent_index = parent_index + 1;
                cursor->index -= split;
-               cursor->node = new_leaf;
                hammer_unlock(&cursor->node->lock);
                hammer_rel_node(cursor->node);
                cursor->node = new_leaf;
@@ -1656,6 +1681,65 @@ hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2)
        return(0);
 }
 
+/*
+ * Compare the element against the cursor's beginning and ending keys
+ */
+int
+hammer_btree_range_cmp(hammer_cursor_t cursor, hammer_base_elm_t key2)
+{
+       /*
+        * A cursor->key_beg.obj_id of 0 matches any object id
+        */
+       if (cursor->key_beg.obj_id) {
+               if (cursor->key_end.obj_id < key2->obj_id)
+                       return(-4);
+               if (cursor->key_beg.obj_id > key2->obj_id)
+                       return(4);
+       }
+
+       /*
+        * A cursor->key_beg.rec_type of 0 matches any record type.
+        */
+       if (cursor->key_beg.rec_type) {
+               if (cursor->key_end.rec_type < key2->rec_type)
+                       return(-3);
+               if (cursor->key_beg.rec_type > key2->rec_type)
+                       return(3);
+       }
+
+       /*
+        * There is no special case for key.  0 means 0.
+        */
+       if (cursor->key_end.key < key2->key)
+               return(-2);
+       if (cursor->key_beg.key > key2->key)
+               return(2);
+
+       /*
+        * This test has a number of special cases.  create_tid in key1 is
+        * the as-of transction id, and delete_tid in key1 is NOT USED.
+        *
+        * A key1->create_tid of 0 matches any record regardles of when
+        * it was created or destroyed.  0xFFFFFFFFFFFFFFFFULL should be
+        * used to search for the most current state of the object.
+        *
+        * key2->create_tid is a HAMMER record and will never be
+        * 0.   key2->delete_tid is the deletion transaction id or 0 if 
+        * the record has not yet been deleted.
+        *
+        * NOTE: only key_beg.create_tid is used for create_tid, we can only
+        * do as-of scans at the moment.
+        */
+       if (cursor->key_beg.create_tid) {
+               if (cursor->key_beg.create_tid < key2->create_tid)
+                       return(-1);
+               if (key2->delete_tid && cursor->key_beg.create_tid >= key2->delete_tid)
+                       return(1);
+       }
+
+       return(0);
+}
+
 /*
  * Create a separator half way inbetween key1 and key2.  For fields just
  * one unit apart, the separator will match key2.
@@ -1719,3 +1803,60 @@ btree_max_elements(u_int8_t type)
 }
 #endif
 
+void
+hammer_print_btree_node(hammer_node_ondisk_t ondisk)
+{
+       hammer_btree_elm_t elm;
+       int i;
+
+       kprintf("node %p count=%d parent=%d type=%c\n",
+               ondisk, ondisk->count, ondisk->parent, ondisk->type);
+
+       /*
+        * Dump both boundary elements if an internal node
+        */
+       if (ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
+               for (i = 0; i <= ondisk->count; ++i) {
+                       elm = &ondisk->elms[i];
+                       hammer_print_btree_elm(elm, ondisk->type, i);
+               }
+       } else {
+               for (i = 0; i < ondisk->count; ++i) {
+                       elm = &ondisk->elms[i];
+                       hammer_print_btree_elm(elm, ondisk->type, i);
+               }
+       }
+}
+
+void
+hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i)
+{
+       kprintf("  %2d", i);
+       kprintf("\tobjid        = %016llx\n", elm->base.obj_id);
+       kprintf("\tkey          = %016llx\n", elm->base.key);
+       kprintf("\tcreate_tid   = %016llx\n", elm->base.create_tid);
+       kprintf("\tdelete_tid   = %016llx\n", elm->base.delete_tid);
+       kprintf("\trec_type     = %04x\n", elm->base.rec_type);
+       kprintf("\tobj_type     = %02x\n", elm->base.obj_type);
+       kprintf("\tsubtree_type = %02x\n", elm->subtree_type);
+
+       if (type == HAMMER_BTREE_TYPE_INTERNAL) {
+               if (elm->internal.rec_offset) {
+                       kprintf("\tcluster_rec  = %08x\n",
+                               elm->internal.rec_offset);
+                       kprintf("\tcluster_id   = %08x\n",
+                               elm->internal.subtree_cluid);
+                       kprintf("\tvolno        = %08x\n",
+                               elm->internal.subtree_volno);
+               } else {
+                       kprintf("\tsubtree_off  = %08x\n",
+                               elm->internal.subtree_offset);
+               }
+               kprintf("\tsubtree_count= %d\n", elm->internal.subtree_count);
+       } else {
+               kprintf("\trec_offset   = %08x\n", elm->leaf.rec_offset);
+               kprintf("\tdata_offset  = %08x\n", elm->leaf.data_offset);
+               kprintf("\tdata_len     = %08x\n", elm->leaf.data_len);
+               kprintf("\tdata_crc     = %08x\n", elm->leaf.data_crc);
+       }
+}
index b318806..11753a7 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.8 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.9 2007/11/26 05:03:11 dillon Exp $
  */
 
 #ifndef _SYS_UUID_H_
@@ -318,7 +318,7 @@ struct hammer_cluster_ondisk {
        int32_t idx_data;       /* data append point (element no) */
        int32_t idx_index;      /* index append point (element no) */
        int32_t idx_record;     /* record prepend point (element no) */
-       u_int32_t idx_reserved03;
+       int32_t idx_ldata;      /* large block data append pt (buf_no) */
 
        /* 
         * Specify the range of information stored in this cluster as two
index ea7d1ad..fcb8d9c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.6 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.7 2007/11/26 05:03:11 dillon Exp $
  */
 
 #include "hammer.h"
@@ -43,8 +43,22 @@ hammer_vop_inactive(struct vop_inactive_args *ap)
 {
        struct hammer_inode *ip = VTOI(ap->a_vp);
 
-       if (ip == NULL)
+       /*
+        * Degenerate case
+        */
+       if (ip == NULL) {
                vrecycle(ap->a_vp);
+               return(0);
+       }
+
+       /*
+        * If the inode no longer has any references we recover its
+        * in-memory resources immediately.
+        */
+       if (ip->ino_rec.ino_nlinks == 0 &&
+           (ip->hmp->mp->mnt_flag & MNT_RDONLY) == 0) {
+               hammer_sync_inode(ip, MNT_NOWAIT, 1);
+       }
        return(0);
 }
 
@@ -55,6 +69,10 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap)
        struct vnode *vp;
 
        vp = ap->a_vp;
+
+       /*
+        * Release the vnode association and ask that the inode be flushed.
+        */
        if ((ip = vp->v_data) != NULL) {
                vp->v_data = NULL;
                ip->vp = NULL;
@@ -214,6 +232,7 @@ loop:
                        kfree(ip, M_HAMMER);
                        goto loop;
                }
+               ip->flags |= HAMMER_INODE_ONDISK;
        } else {
                kfree(ip, M_HAMMER);
                ip = NULL;
@@ -297,6 +316,59 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
        return(0);
 }
 
+int
+hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip)
+{
+       struct hammer_cursor cursor;
+       hammer_record_t record;
+       int error;
+
+       /*
+        * Locate the record on-disk and mark it as deleted
+        *
+        * XXX Update the inode record and data in-place if the retention
+        * policy allows it.
+        */
+       error = 0;
+
+       if (ip->flags & HAMMER_INODE_ONDISK) {
+               hammer_init_cursor_ip(&cursor, ip);
+               cursor.key_beg.obj_id = ip->obj_id;
+               cursor.key_beg.key = 0;
+               cursor.key_beg.create_tid = ip->obj_asof;
+               cursor.key_beg.delete_tid = 0;
+               cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
+               cursor.key_beg.obj_type = 0;
+               cursor.flags = HAMMER_CURSOR_GET_RECORD;
+
+               error = hammer_btree_lookup(&cursor);
+
+               if (error == 0) {
+                       cursor.record->base.base.delete_tid = trans->tid;
+                       hammer_modify_buffer(cursor.record_buffer);
+               }
+               hammer_cache_node(cursor.node, &ip->cache);
+               hammer_done_cursor(&cursor);
+       }
+
+       /*
+        * Write out a new record if the in-memory inode is not marked
+        * as having been deleted.
+        */
+       if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { 
+               record = hammer_alloc_mem_record(trans, ip);
+               record->rec.inode = ip->ino_rec;
+               record->rec.inode.base.base.create_tid = trans->tid;
+               record->rec.inode.base.data_len = sizeof(ip->ino_data);
+               record->data = (void *)&ip->ino_data;
+               error = hammer_ip_sync_record(record);
+               hammer_free_mem_record(record);
+               ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY);
+               ip->flags |= HAMMER_INODE_ONDISK;
+       }
+       return(error);
+}
+
 /*
  * Release a reference on an inode and unload it if told to flush.
  */
@@ -318,13 +390,16 @@ hammer_rel_inode(struct hammer_inode *ip, int flush)
 int
 hammer_unload_inode(struct hammer_inode *ip, void *data __unused)
 {
+       int error;
+
        KASSERT(ip->lock.refs == 0,
                ("hammer_unload_inode: %d refs\n", ip->lock.refs));
        KKASSERT(ip->vp == NULL);
        hammer_ref(&ip->lock);
 
-       /* XXX flush inode to disk */
-       kprintf("flush inode %p\n", ip);
+       error = hammer_sync_inode(ip, MNT_WAIT, 1);
+       if (error)
+               kprintf("hammer_sync_inode failed error %d\n", error);
 
        RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
 
@@ -342,7 +417,129 @@ hammer_modify_inode(struct hammer_transaction *trans,
                    struct hammer_inode *ip, int flags)
 {
        ip->flags |= flags;
-       ip->last_tid = trans->tid;
+       if (flags & HAMMER_INODE_TID)
+               ip->last_tid = trans->tid;
+}
+
+/*
+ * Sync any dirty buffers and records associated with an inode.  The
+ * inode's last_tid field is used as the transaction id for the sync,
+ * overriding any intermediate TIDs that were used for records.  Note
+ * that the dirty buffer cache buffers do not have any knowledge of
+ * the transaction id they were modified under.
+ */
+static int
+hammer_sync_inode_callback(hammer_record_t rec, void *data __unused)
+{
+       int error;
+
+       error = 0;
+       if ((rec->flags & HAMMER_RECF_DELETED) == 0)
+               error = hammer_ip_sync_record(rec);
+
+       if (error) {
+               kprintf("hammer_sync_inode_callback: sync failed rec %p\n",
+                       rec);
+               return(-1);
+       }
+       hammer_free_mem_record(rec);
+       return(0);
+}
+
+/*
+ * XXX error handling
+ */
+int
+hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
+{
+       struct hammer_transaction trans;
+       int error;
+       int r;
+
+       hammer_lock_ex(&ip->lock);
+       hammer_start_transaction(&trans, ip->hmp);
+
+       /*
+        * If the inode has been deleted (nlinks == 0), and the OS no longer
+        * has any references to it (handle_delete != 0), clean up in-memory
+        * data.
+        *
+        * NOTE: We do not set the RDIRTY flag when updating the delete_tid,
+        * setting HAMMER_INODE_DELETED takes care of it.
+        */
+       if (ip->ino_rec.ino_nlinks == 0 && handle_delete) {
+               if (ip->vp)
+                       vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
+               error = hammer_ip_delete_range(&trans, ip,
+                                              HAMMER_MIN_KEY, HAMMER_MAX_KEY);
+               KKASSERT(RB_EMPTY(&ip->rec_tree));
+               ip->ino_rec.base.base.delete_tid = trans.tid;
+               hammer_modify_inode(&trans, ip,
+                                   HAMMER_INODE_DELETED | HAMMER_INODE_TID);
+       }
+
+       /*
+        * Sync the buffer cache
+        */
+       if (ip->vp != NULL)
+               error = vfsync(ip->vp, waitfor, 1, NULL, NULL);
+       else
+               error = 0;
+
+       /*
+        * Now sync related records
+        */
+       if (error == 0) {
+               r = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
+                           hammer_sync_inode_callback, NULL);
+               if (r < 0)
+                       error = EIO;
+       }
+
+       /*
+        * Now update the inode's on-disk inode-data and/or on-disk record.
+        */
+       switch(ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK)) {
+       case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
+               /*
+                * If deleted and on-disk, don't set any additional flags.
+                * the delete flag takes care of things.
+                */
+               break;
+       case HAMMER_INODE_DELETED:
+               /*
+                * Take care of the case where a deleted inode was never
+                * flushed to the disk in the first place.
+                */
+               ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY);
+               while (RB_ROOT(&ip->rec_tree))
+                       hammer_free_mem_record(RB_ROOT(&ip->rec_tree));
+               break;
+       case HAMMER_INODE_ONDISK:
+               /*
+                * If already on-disk, do not set any additional flags.
+                */
+               break;
+       default:
+               /*
+                * If not on-disk and not deleted, set both dirty flags
+                * to force an initial record to be written.
+                */
+               ip->flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
+               break;
+       }
+
+       /*
+        * If RDIRTY or DDIRTY is set, write out a new record.  If the
+        * inode is already on-disk, the old record is marked as deleted.
+        */
+       if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
+                        HAMMER_INODE_DELETED)) {
+               error = hammer_update_inode(&trans, ip);
+       }
+       hammer_commit_transaction(&trans);
+       hammer_unlock(&ip->lock);
+       return(error);
 }
 
 /*
@@ -379,14 +576,14 @@ hammer_bread(hammer_cluster_t cluster, int32_t cloff,
        if (buffer == NULL || buffer->cluster != cluster ||
            buffer->buf_no != buf_no) {
                if (buffer) {
-                       hammer_unlock(&buffer->io.lock);
+                       /*hammer_unlock(&buffer->io.lock);*/
                        hammer_rel_buffer(buffer, 0);
                }
                buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
                *bufferp = buffer;
                if (buffer == NULL)
                        return(NULL);
-               hammer_lock_ex(&buffer->io.lock);
+               /*hammer_lock_ex(&buffer->io.lock);*/
        }
 
        /*
index 8f01efb..cf1d0e2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.3 2007/11/20 07:16:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.4 2007/11/26 05:03:11 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -110,6 +110,7 @@ hammer_io_disassociate(union hammer_io_structure *io)
                        bdwrite(bp);
                else
                        bqrelse(bp);
+               io->io.released = 1;
        }
 }
 
@@ -230,6 +231,7 @@ hammer_io_release(struct hammer_io *io, int flush)
                        } else {
                                bdwrite(bp);
                                io->modified = 0;
+                               io->released = 1;
                        }
                } else if (flush) {
                        /*
index 5b3422f..aff9ac8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.4 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.5 2007/11/26 05:03:11 dillon Exp $
  */
 
 #include "hammer.h"
@@ -309,6 +309,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
 
        record = hammer_alloc_mem_record(trans, dip);
 
+       kprintf("add to directory dip %p\n", dip);
        bytes = ncp->nc_nlen;   /* NOTE: terminating \0 is NOT included */
        if (++trans->hmp->namekey_iterator == 0)
                ++trans->hmp->namekey_iterator;
@@ -323,6 +324,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        record->rec.entry.obj_id = ip->obj_id;
        if (bytes <= sizeof(record->rec.entry.den_name)) {
                record->data = (void *)record->rec.entry.den_name;
+               record->flags |= HAMMER_RECF_EMBEDDED_DATA;
        } else {
                record->data = kmalloc(bytes, M_HAMMER, M_WAITOK);
                record->flags |= HAMMER_RECF_ALLOCDATA;
@@ -330,7 +332,8 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        bcopy(ncp->nc_name, record->data, bytes);
        record->rec.entry.base.data_len = bytes;
        ++ip->ino_rec.ino_nlinks;
-       hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
+       hammer_modify_inode(trans, ip,
+                           HAMMER_INODE_RDIRTY | HAMMER_INODE_TID);
        error = hammer_mem_add(trans, record);
        return(error);
 }
@@ -369,40 +372,210 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
                        cursor->record->base.base.delete_tid = trans->tid;
                        hammer_modify_node(cursor->node);
                        hammer_modify_buffer(cursor->record_buffer);
-
                }
        }
 
        /*
-        * One less link.  Mark the inode and all of its records as deleted
-        * when the last link goes away.  The inode will be automatically
-        * flushed when its last reference goes away.
+        * One less link.  The file may still be open in the OS even after
+        * all links have gone away so we don't destroy the inode's data
+        * here.
         */
        if (error == 0) {
                --ip->ino_rec.ino_nlinks;
-               if (ip->ino_rec.ino_nlinks == 0)
-                       ip->ino_rec.base.base.delete_tid = trans->tid;
-               error = hammer_ip_delete_range(trans, ip,
-                                              HAMMER_MIN_KEY, HAMMER_MAX_KEY);
-               KKASSERT(RB_EMPTY(&ip->rec_tree));
-               hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
+               hammer_modify_inode(trans, ip,
+                                   HAMMER_INODE_RDIRTY | HAMMER_INODE_TID);
        }
        return(error);
 }
 
 /*
- * Add a data record to the filesystem.
- *
- * This is called via the strategy code, typically when the kernel wants to
- * flush a buffer cache buffer, so this operation goes directly to the disk.
+ * Sync data from a buffer cache buffer (typically) to the filesystem.  This
+ * is called via the strategy called from a cached data source.  This code
+ * is responsible for actually writing a data record out to the disk.
  */
 int
-hammer_ip_add_data(hammer_transaction_t trans, hammer_inode_t ip,
+hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip,
                       int64_t offset, void *data, int bytes)
 {
-       panic("hammer_ip_add_data");
+       struct hammer_cursor cursor;
+       hammer_record_ondisk_t rec;
+       union hammer_btree_elm elm;
+       void *bdata;
+       int error;
+
+       error = hammer_init_cursor_ip(&cursor, ip);
+       if (error)
+               return(error);
+       cursor.key_beg.obj_id = ip->obj_id;
+       cursor.key_beg.key = offset + bytes;
+       cursor.key_beg.create_tid = trans->tid;
+       cursor.key_beg.delete_tid = 0;
+       cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
+       cursor.flags = HAMMER_CURSOR_INSERT;
+
+       /*
+        * Issue a lookup to position the cursor and locate the cluster
+        */
+       error = hammer_btree_lookup(&cursor);
+       if (error == 0) {
+               kprintf("hammer_ip_sync_data: duplicate data at (%lld,%d)\n",
+                       offset, bytes);
+               error = EIO;
+       }
+       if (error != ENOENT)
+               goto done;
+
+       /*
+        * Allocate record and data space now that we know which cluster
+        * the B-Tree node ended up in.
+        */
+       bdata = hammer_alloc_data(cursor.node->cluster, bytes, &error,
+                                 &cursor.data_buffer);
+       if (bdata == NULL)
+               goto done;
+       rec = hammer_alloc_record(cursor.node->cluster, &error,
+                                 &cursor.record_buffer);
+       if (rec == NULL)
+               goto fail1;
+
+       /*
+        * Fill everything in and insert our B-Tree node.
+        */
+       rec->base.base = cursor.key_beg;
+       rec->base.data_crc = crc32(data, bytes);
+       rec->base.rec_id = 0;   /* XXX */
+       rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer, bdata);
+       rec->base.data_len = bytes;
+       hammer_modify_buffer(cursor.record_buffer);
+
+       bcopy(data, bdata, bytes);
+       hammer_modify_buffer(cursor.data_buffer);
+
+       elm.leaf.base = cursor.key_beg;
+       elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec);
+       elm.leaf.data_offset = rec->base.data_offset;
+       elm.leaf.data_len = bytes;
+       elm.leaf.data_crc = rec->base.data_crc;
+
+       error = hammer_btree_insert(&cursor, &elm);
+       if (error == 0)
+               goto done;
+
+       hammer_free_record_ptr(cursor.record_buffer, rec);
+fail1:
+       hammer_free_data_ptr(cursor.data_buffer, bdata, bytes);
+done:
+       hammer_done_cursor(&cursor);
+       return(error);
+}
+
+/*
+ * Sync an in-memory record to the disk.  this is typically called via fsync
+ * from a cached record source.  This code is responsible for actually
+ * writing a record out to the disk.
+ */
+int
+hammer_ip_sync_record(hammer_record_t record)
+{
+       struct hammer_cursor cursor;
+       hammer_record_ondisk_t rec;
+       union hammer_btree_elm elm;
+       void *bdata;
+       int error;
+
+       error = hammer_init_cursor_ip(&cursor, record->ip);
+       if (error)
+               return(error);
+       cursor.key_beg = record->rec.base.base;
+       cursor.flags = HAMMER_CURSOR_INSERT;
+
+       /*
+        * Issue a lookup to position the cursor and locate the cluster
+        */
+       error = hammer_btree_lookup(&cursor);
+       if (error == 0) {
+               kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n",
+                       record->rec.base.base.key);
+               error = EIO;
+       }
+       if (error != ENOENT)
+               goto done;
+
+       /*
+        * Allocate record and data space now that we know which cluster
+        * the B-Tree node ended up in.
+        */
+       if (record->data == NULL ||
+           (record->flags & HAMMER_RECF_EMBEDDED_DATA)) {
+               bdata = record->data;
+       } else {
+               bdata = hammer_alloc_data(cursor.node->cluster,
+                                         record->rec.base.data_len, &error,
+                                         &cursor.data_buffer);
+               if (bdata == NULL)
+                       goto done;
+       }
+       rec = hammer_alloc_record(cursor.node->cluster, &error,
+                                 &cursor.record_buffer);
+       if (rec == NULL)
+               goto fail1;
+
+       /*
+        * Fill everything in and insert our B-Tree node.
+        *
+        * XXX assign rec_id here
+        */
+       *rec = record->rec;
+       kprintf("record->rec %p data %p\n", &record->rec, record->data);
+       if (bdata) {
+               rec->base.data_crc = crc32(record->data,
+                                          record->rec.base.data_len);
+               if (record->flags & HAMMER_RECF_EMBEDDED_DATA) {
+                       /*
+                        * Data embedded in record
+                        */
+                       rec->base.data_offset = ((char *)bdata -
+                                                (char *)&record->rec);
+                       KKASSERT(rec->base.data_offset >= 0 && 
+                                rec->base.data_offset + rec->base.data_len <
+                                 sizeof(*rec));
+                       rec->base.data_offset += hammer_bclu_offset(cursor.record_buffer, rec);
+               } else {
+                       /*
+                        * Data separate from record
+                        */
+                       rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer,bdata);
+                       bcopy(record->data, bdata, rec->base.data_len);
+                       hammer_modify_buffer(cursor.data_buffer);
+               }
+       }
+       rec->base.rec_id = 0;   /* XXX */
+
+       hammer_modify_buffer(cursor.record_buffer);
+
+       elm.leaf.base = cursor.key_beg;
+       elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec);
+       elm.leaf.data_offset = rec->base.data_offset;
+       elm.leaf.data_len = rec->base.data_len;
+       elm.leaf.data_crc = rec->base.data_crc;
+
+       error = hammer_btree_insert(&cursor, &elm);
+       if (error == 0)
+               goto done;
+
+       hammer_free_record_ptr(cursor.record_buffer, rec);
+fail1:
+       if (record->data && (record->flags & HAMMER_RECF_EMBEDDED_DATA) == 0) {
+               hammer_free_data_ptr(cursor.data_buffer, bdata,
+                                    rec->base.data_len);
+       }
+done:
+       hammer_done_cursor(&cursor);
+       kprintf("hammer_ip_sync_record_done %d\n", error);
+       return(error);
 }
 
+
 /*
  * Add the record to the inode's rec_tree.  The low 32 bits of a directory
  * entry's key is used to deal with hash collisions in the upper 32 bits.
@@ -491,20 +664,32 @@ hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip)
                hammer_rel_mem_record(&cursor->iprec);
 
        /*
-        * Search the on-disk B-Tree
+        * Search the on-disk B-Tree.  hammer_btree_lookup() only does an
+        * exact lookup so if we get ENOENT we have to call the iterate
+        * function to validate the first record after the begin key.
+        *
+        * The ATEDISK flag is used by hammer_btree_iterate to determine
+        * whether it must index forwards or not.
         */
        if (ip->flags & HAMMER_INODE_ONDISK) {
                error = hammer_btree_lookup(cursor);
-               if (error && error != ENOENT)
+               if (error == ENOENT) {
+                       cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
+                       error = hammer_btree_iterate(cursor);
+               }
+               if (error && error != ENOENT) 
                        return(error);
                if (error == 0) {
-                       cursor->flags &= ~HAMMER_CURSOR_DISKEOF ;
-                       cursor->flags &= ~HAMMER_CURSOR_ATEDISK ;
+                       cursor->flags &= ~HAMMER_CURSOR_DISKEOF;
+                       cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
+               } else {
+                       cursor->flags |= HAMMER_CURSOR_ATEDISK;
                }
        }
 
        /*
-        * Search the in-memory record list (Red-Black tree)
+        * Search the in-memory record list (Red-Black tree).  Unlike the
+        * B-Tree search, mem_search checks for records in the range.
         */
        error = hammer_mem_search(cursor, ip);
        if (error && error != ENOENT)
@@ -727,7 +912,8 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip,
                        hammer_free_mem_record(cursor.iprec);
 
                } else {
-                       cursor.node->ondisk->elms[cursor.index].base.delete_tid = trans->tid;
+                       cursor.node->ondisk->
+                           elms[cursor.index].base.delete_tid = trans->tid;
                        cursor.record->base.base.delete_tid = trans->tid;
                        hammer_modify_node(cursor.node);
                        hammer_modify_buffer(cursor.record_buffer);
index 66a915a..bc4c5b0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.5 2007/11/20 07:16:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -150,7 +150,7 @@ hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
 {
        if (node1->node_offset < node2->node_offset)
                return(-1);
-       if (node1->node_offset < node2->node_offset)
+       if (node1->node_offset > node2->node_offset)
                return(1);
        return(0);
 }
@@ -1411,10 +1411,12 @@ hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
                        *errorp = ENOSPC;
                        if (buffer)
                                hammer_rel_buffer(buffer, 0);
+                       hammer_modify_cluster(cluster);
                        return(NULL);
                }
        }
        cluster->ondisk->idx_index = elm_no;
+       hammer_modify_cluster(cluster);
 
        /*
         * Load and return the B-Tree element
@@ -1448,7 +1450,40 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
        void *item;
 
        /*
-        * Allocate a data element
+        * Deal with large data blocks.  The blocksize is HAMMER_BUFSIZE
+        * for these allocations.
+        */
+       if ((bytes & HAMMER_BUFMASK) == 0) {
+               nblks = bytes / HAMMER_BUFSIZE;
+               /* only one block allowed for now (so buffer can hold it) */
+               KKASSERT(nblks == 1);
+
+               buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
+                                               nblks,
+                                               cluster->ondisk->idx_ldata);
+               if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
+                       buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
+                                               nblks,
+                                               0);
+               }
+               hammer_modify_cluster(cluster);
+               if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
+                       *errorp = ENOSPC;
+                       return(NULL);
+               }
+               cluster->ondisk->idx_ldata = buf_no;
+               buffer = *bufferp;
+               *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
+               if (buffer)
+                       hammer_rel_buffer(buffer, 0);
+               buffer = *bufferp;
+               kprintf("allocate large buffer %p (%d)\n", buffer, buf_no);
+               return(buffer->ondisk);
+       }
+
+       /*
+        * Allocate a data element.  The block size is HAMMER_DATA_BLKSIZE
+        * (64 bytes) for these allocations.
         */
        nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
        live = &cluster->alist_mdata;
@@ -1462,10 +1497,12 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
                elm_no = hammer_alist_alloc(live, nblks);
                if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
                        *errorp = ENOSPC;
+                       hammer_modify_cluster(cluster);
                        return(NULL);
                }
        }
        cluster->ondisk->idx_index = elm_no;
+       hammer_modify_cluster(cluster);
 
        /*
         * Load and return the B-Tree element
@@ -1479,7 +1516,7 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
                buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
                *bufferp = buffer;
        }
-       KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE);
+       KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
        item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
        bzero(item, nblks * HAMMER_DATA_BLKSIZE);
        *errorp = 0;
@@ -1510,10 +1547,12 @@ hammer_alloc_record(hammer_cluster_t cluster,
                elm_no = hammer_alist_alloc(live, 1);
                if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
                        *errorp = ENOSPC;
+                       hammer_modify_cluster(cluster);
                        return(NULL);
                }
        }
        cluster->ondisk->idx_record = elm_no;
+       hammer_modify_cluster(cluster);
 
        /*
         * Load and return the B-Tree element
@@ -1549,6 +1588,7 @@ hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
        elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
        live = &buffer->cluster->alist_btree;
        hammer_alist_free(live, elm_no, 1);
+       hammer_modify_cluster(buffer->cluster);
 }
 
 void
@@ -1558,6 +1598,15 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
        int32_t nblks;
        hammer_alist_t live;
 
+       if ((bytes & HAMMER_BUFMASK) == 0) {
+               nblks = bytes / HAMMER_BUFSIZE;
+               KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
+               hammer_alist_free(&buffer->cluster->alist_master,
+                                 buffer->buf_no, nblks);
+               hammer_modify_cluster(buffer->cluster);
+               return;
+       }
+
        elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
                 HAMMER_DATA_BLKSIZE;
        KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
@@ -1565,6 +1614,7 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
        nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
        live = &buffer->cluster->alist_mdata;
        hammer_alist_free(live, elm_no, nblks);
+       hammer_modify_cluster(buffer->cluster);
 }
 
 void
@@ -1578,6 +1628,7 @@ hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
        elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
        live = &buffer->cluster->alist_record;
        hammer_alist_free(live, elm_no, 1);
+       hammer_modify_cluster(buffer->cluster);
 }
 
 void
@@ -1594,6 +1645,7 @@ hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
        KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
        elm_no += fsbuf_offset / blksize;
        hammer_alist_free(live, elm_no, 1);
+       hammer_modify_cluster(cluster);
 }
 
 void
@@ -1603,8 +1655,18 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
        int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
        hammer_alist_t live;
        int32_t elm_no;
+       int32_t buf_no;
        int32_t nblks;
 
+       if ((bytes & HAMMER_BUFMASK) == 0) {
+               nblks = bytes / HAMMER_BUFSIZE;
+               KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
+               buf_no = bclu_offset / HAMMER_BUFSIZE;
+               hammer_alist_free(&cluster->alist_master, buf_no, nblks);
+               hammer_modify_cluster(cluster);
+               return;
+       }
+
        elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
        fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
        live = &cluster->alist_mdata;
@@ -1612,6 +1674,7 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
        KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
        elm_no += fsbuf_offset / blksize;
        hammer_alist_free(live, elm_no, nblks);
+       hammer_modify_cluster(cluster);
 }
 
 void
@@ -1628,6 +1691,7 @@ hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
        KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
        elm_no += fsbuf_offset / blksize;
        hammer_alist_free(live, elm_no, 1);
+       hammer_modify_cluster(cluster);
 }
 
 
@@ -1662,6 +1726,7 @@ alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
                }
        }
        KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
+       hammer_modify_cluster(cluster);
 
        /*
         * The new buffer must be initialized (type != 0) regardless of
@@ -1870,6 +1935,7 @@ buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
                if (r != HAMMER_ALIST_BLOCK_NONE)
                        r += blk;
                *fullp = hammer_alist_isfull(&buffer->alist);
+               hammer_modify_buffer(buffer);
                hammer_rel_buffer(buffer, 0);
        } else {
                r = HAMMER_ALIST_BLOCK_NONE;
@@ -1896,6 +1962,7 @@ buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
                if (r != HAMMER_ALIST_BLOCK_NONE)
                        r += blk;
                *fullp = hammer_alist_isfull(&buffer->alist);
+               hammer_modify_buffer(buffer);
                hammer_rel_buffer(buffer, 0);
        } else {
                r = HAMMER_ALIST_BLOCK_NONE;
@@ -1919,6 +1986,7 @@ buffer_alist_free(void *info, int32_t blk, int32_t radix,
                KKASSERT(buffer->ondisk->head.buf_type != 0);
                hammer_alist_free(&buffer->alist, base_blk, count);
                *emptyp = hammer_alist_isempty(&buffer->alist);
+               hammer_modify_buffer(buffer);
                hammer_rel_buffer(buffer, 0);
        } else {
                *emptyp = 0;
@@ -1980,6 +2048,7 @@ super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
                if (r != HAMMER_ALIST_BLOCK_NONE)
                        r += blk;
                *fullp = hammer_alist_isfull(&supercl->alist);
+               hammer_modify_supercl(supercl);
                hammer_rel_supercl(supercl, 0);
        } else {
                r = HAMMER_ALIST_BLOCK_NONE;
@@ -2005,6 +2074,7 @@ super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
                if (r != HAMMER_ALIST_BLOCK_NONE)
                        r += blk;
                *fullp = hammer_alist_isfull(&supercl->alist);
+               hammer_modify_supercl(supercl);
                hammer_rel_supercl(supercl, 0);
        } else { 
                r = HAMMER_ALIST_BLOCK_NONE;
@@ -2027,6 +2097,7 @@ super_alist_free(void *info, int32_t blk, int32_t radix,
        if (supercl) {
                hammer_alist_free(&supercl->alist, base_blk, count);
                *emptyp = hammer_alist_isempty(&supercl->alist);
+               hammer_modify_supercl(supercl);
                hammer_rel_supercl(supercl, 0);
        } else {
                *emptyp = 0;
index 9a7f3e3..d4a5799 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.5 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -48,9 +48,11 @@ hammer_lock_ex(struct hammer_lock *lock)
        KKASSERT(lock->refs > 0);
        crit_enter();
        if (lock->locktd != td) {
-               while (lock->locktd != NULL) {
+               while (lock->locktd != NULL || lock->lockcount) {
                        lock->wanted = 1;
+                       kprintf("hammer_lock_ex: held by %p\n", lock->locktd);
                        tsleep(lock, 0, "hmrlck", 0);
+                       kprintf("hammer_lock_ex: try again\n");
                }
                lock->locktd = td;
        }
@@ -69,7 +71,7 @@ hammer_lock_ex_try(struct hammer_lock *lock)
        KKASSERT(lock->refs > 0);
        crit_enter();
        if (lock->locktd != td) {
-               if (lock->locktd != NULL)
+               if (lock->locktd != NULL || lock->lockcount)
                        return(EAGAIN);
                lock->locktd = td;
        }
index f39c627..74628c1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.5 2007/11/20 07:16:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -150,6 +150,12 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
        mp->mnt_kern_flag |= MNTK_FSMID;
        mp->mnt_stat.f_fsid.val[0] = 0; /* XXX */
        mp->mnt_stat.f_fsid.val[1] = 0; /* XXX */
+
+       /* 
+        * note: f_iosize is used by vnode_pager_haspage() when constructing
+        * its VOP_BMAP call.
+        */
+       mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
        vfs_getnewfsid(mp);             /* XXX */
        mp->mnt_maxsymlinklen = 255;
        mp->mnt_flag |= MNT_LOCAL;
index 09b55b0..ee37683 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.5 2007/11/20 22:55:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.6 2007/11/26 05:03:11 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -43,6 +43,7 @@
 #include <sys/lockf.h>
 #include <sys/event.h>
 #include <sys/stat.h>
+#include <vm/vm_extern.h>
 #include "hammer.h"
 
 /*
@@ -78,6 +79,8 @@ static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
 struct vop_ops hammer_vnode_vops = {
        .vop_default =          vop_defaultop,
        .vop_fsync =            hammer_vop_fsync,
+       .vop_getpages =         vop_stdgetpages,
+       .vop_putpages =         vop_stdputpages,
        .vop_read =             hammer_vop_read,
        .vop_write =            hammer_vop_write,
        .vop_access =           hammer_vop_access,
@@ -127,7 +130,12 @@ static
 int
 hammer_vop_fsync(struct vop_fsync_args *ap)
 {
-       return EOPNOTSUPP;
+       hammer_inode_t ip;
+       int error;
+
+       ip = VTOI(ap->a_vp);
+       error = hammer_sync_inode(ip, ap->a_waitfor, 0);
+       return (error);
 }
 
 /*
@@ -138,7 +146,7 @@ int
 hammer_vop_read(struct vop_read_args *ap)
 {
        struct hammer_transaction trans;
-       struct hammer_inode *ip;
+       hammer_inode_t ip;
        off_t offset;
        struct buf *bp;
        struct uio *uio;
@@ -160,14 +168,18 @@ hammer_vop_read(struct vop_read_args *ap)
        uio = ap->a_uio;
        while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
                offset = uio->uio_offset & HAMMER_BUFMASK;
+#if 0
                error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
                                     uio->uio_offset - offset, HAMMER_BUFSIZE,
                                     MAXBSIZE, seqcount, &bp);
+#endif
+               error = bread(ap->a_vp, uio->uio_offset - offset,
+                             HAMMER_BUFSIZE, &bp);
                if (error) {
                        brelse(bp);
                        break;
                }
-               bp->b_flags |= B_CLUSTEROK;
+               /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
                n = HAMMER_BUFSIZE - offset;
                if (n > uio->uio_resid)
                        n = uio->uio_resid;
@@ -200,6 +212,7 @@ hammer_vop_write(struct vop_write_args *ap)
        struct buf *bp;
        int error;
        int n;
+       int flags;
 
        if (ap->a_vp->v_type != VREG)
                return (EINVAL);
@@ -229,14 +242,44 @@ hammer_vop_write(struct vop_write_args *ap)
         */
        while (uio->uio_resid > 0) {
                offset = uio->uio_offset & HAMMER_BUFMASK;
-               if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
+               if (uio->uio_segflg == UIO_NOCOPY) {
+                       /*
+                        * Issuing a write with the same data backing the
+                        * buffer.  Instantiate the buffer to collect the
+                        * backing vm pages, then read-in any missing bits.
+                        *
+                        * This case is used by vop_stdputpages().
+                        */
+                       bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
+                                   0, 0);
+                       if ((bp->b_flags & B_CACHE) == 0) {
+                               bqrelse(bp);
+                               error = bread(ap->a_vp,
+                                             uio->uio_offset - offset,
+                                             HAMMER_BUFSIZE, &bp);
+                               if (error) {
+                                       brelse(bp);
+                                       break;
+                               }
+                       }
+               } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
+                       /*
+                        * entirely overwrite the buffer
+                        */
                        bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
                                    0, 0);
                } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
+                       /*
+                        * XXX
+                        */
                        bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE,
                                    0, 0);
                        vfs_bio_clrbuf(bp);
                } else {
+                       /*
+                        * Partial overwrite, read in any missing bits then
+                        * replace the portion being written.
+                        */
                        error = bread(ap->a_vp, uio->uio_offset - offset,
                                      HAMMER_BUFSIZE, &bp);
                        if (error) {
@@ -252,13 +295,17 @@ hammer_vop_write(struct vop_write_args *ap)
                        brelse(bp);
                        break;
                }
-               bp->b_flags |= B_CLUSTEROK;
+               /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
                if (ip->ino_rec.ino_size < uio->uio_offset) {
                        ip->ino_rec.ino_size = uio->uio_offset;
                        ip->ino_rec.ino_mtime = trans.tid;
-                       hammer_modify_inode(&trans, ip,
-                               HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES);
+                       flags = HAMMER_INODE_RDIRTY | HAMMER_INODE_ITIMES |
+                               HAMMER_INODE_TID;
+                       vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
+               } else {
+                       flags = HAMMER_INODE_TID;
                }
+               hammer_modify_inode(&trans, ip, flags);
                if (ap->a_ioflag & IO_SYNC) {
                        bwrite(bp);
                } else if (ap->a_ioflag & IO_DIRECT) {
@@ -920,12 +967,12 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
                cache_setvp(ap->a_tnch, ip->vp);
        }
 done:
+        hammer_done_cursor(&cursor);
        if (error == 0) {
                hammer_commit_transaction(&trans);
        } else {
                hammer_abort_transaction(&trans);
        }
-        hammer_done_cursor(&cursor);
        return (error);
 }
 
@@ -1001,13 +1048,22 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                        modflags |= HAMMER_INODE_DDIRTY;
                }
        }
-       if (vap->va_size != VNOVAL) {
+       if (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
                switch(ap->a_vp->v_type) {
                case VREG:
+                       if (vap->va_size < ip->ino_rec.ino_size) {
+                               vtruncbuf(ap->a_vp, vap->va_size,
+                                         HAMMER_BUFSIZE);
+                       } else if (vap->va_size > ip->ino_rec.ino_size) {
+                               vnode_pager_setsize(ap->a_vp, vap->va_size);
+                       }
+                       /* fall through */
                case VDATABASE:
                        error = hammer_ip_delete_range(&trans, ip,
                                                    vap->va_size,
                                                    0x7FFFFFFFFFFFFFFFLL);
+                       ip->ino_rec.ino_size = vap->va_size;
+                       modflags |= HAMMER_INODE_RDIRTY;
                        break;
                default:
                        error = EINVAL;
@@ -1034,8 +1090,9 @@ done:
        if (error) {
                hammer_abort_transaction(&trans);
        } else {
-               if (modflags)
-                       hammer_modify_inode(&trans, ip, modflags);
+               if (modflags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY))
+                       modflags |= HAMMER_INODE_TID;
+               hammer_modify_inode(&trans, ip, modflags);
                hammer_commit_transaction(&trans);
        }
        return (error);
@@ -1131,13 +1188,15 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
 
        /*
         * Key range (begin and end inclusive) to scan.  Note that the key's
-        * stored in the actual records represent the 
+        * stored in the actual records represent BASE+LEN, not BASE.  The
+        * first record containing bio_offset will have a key > bio_offset.
         */
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = ip->obj_asof;
        cursor.key_beg.delete_tid = 0;
        cursor.key_beg.obj_type = 0;
-       cursor.key_beg.key = bio->bio_offset;
+       cursor.key_beg.key = bio->bio_offset + 1;
+       kprintf("READ AT OFFSET %lld\n", bio->bio_offset);
 
        cursor.key_end = cursor.key_beg;
        if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
@@ -1145,7 +1204,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
                cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
        } else {
-               ran_end = bio->bio_offset + bp->b_bufsize - 1;
+               ran_end = bio->bio_offset + bp->b_bufsize;
                cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
                cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
                if (ran_end + MAXPHYS < ran_end)
@@ -1164,7 +1223,8 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                rec = cursor.record;
                base = &rec->base.base;
 
-               rec_offset = base->key - rec->data.base.data_len + 1;
+               rec_offset = base->key - rec->data.base.data_len;
+               kprintf("record offset %lld\n", rec_offset);
 
                /*
                 * Calculate the gap, if any, and zero-fill it.
@@ -1188,6 +1248,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                 */
                roff = -n;
                n = rec->data.base.data_len - roff;
+               kprintf("roff = %d datalen %d\n", roff, rec->data.base.data_len);
                KKASSERT(n > 0);
                if (n > bp->b_bufsize - boff)
                        n = bp->b_bufsize - boff;
@@ -1232,6 +1293,8 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
        struct buf *bp;
        int error;
 
+       kprintf("vop_strategy_write\n");
+
        bio = ap->a_bio;
        bp = bio->bio_buf;
        ip = ap->a_vp->v_data;
@@ -1248,13 +1311,14 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
                error = hammer_ip_delete_range(&trans, ip, bio->bio_offset,
                                       bio->bio_offset + bp->b_bufsize - 1);
        }
+       kprintf("delete_range %d\n", error);
 
        /*
         * Add a single record to cover the write
         */
        if (error == 0) {
-               error = hammer_ip_add_data(&trans, ip, bio->bio_offset,
-                                          bp->b_data, bp->b_bufsize);
+               error = hammer_ip_sync_data(&trans, ip, bio->bio_offset,
+                                           bp->b_data, bp->b_bufsize);
        }
 
        /*