HAMMER 43/Many: Remove records from the media format, plus other stuff
authorMatthew Dillon <dillon@dragonflybsd.org>
Mon, 12 May 2008 21:17:18 +0000 (21:17 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Mon, 12 May 2008 21:17:18 +0000 (21:17 +0000)
* Get rid of hammer_record_ondisk.  As HAMMER has evolved the need for
  a separate record structure has devolved into trivialities.  Originally
  the idea was to have B-Tree nodes referencing records and data.  The
  B-Tree elements were originally intended to be throw-away and the on-media
  records were originally intended to be the official representation of
  the data and contained additional meta-information such as the obj_id
  of a directory entry and a few additional fields related to the inode.

  But once the UNDO code went in and it became obvious that the B-Tree needed
  to be tracked (undo-wise) along with everything else, the need for an
  official representation of the record as a separate media structure
  essentially disappeared.

  Move the directory-record meta-data into the directory-entry data and move
  the inode-record meta-data into the inode-record data.  As a single
  exception move the atime field to the B-Tree element itself (it replaces
  what used to be the record offset), in order to continue to allow atime
  updates to occur without requiring record rewrites.  With these changes
  records are no longer needed at all, so remove the on-media record structure
  and all the related code.

* The removal of the on-media record structure also greatly improves
  performance.

* B-Tree elements are now the official on-media record.

* Fix a race in the extraction of the root of the B-Tree.

* Clean up the in-memory record handling API.  Instead of having to
  construct B-Tree leaf elements we can simply embed one in the in-memory
  record structure (struct hammer_record), and in the inode.

20 files changed:
sbin/hammer/cmd_show.c
sbin/hammer/cycle.c
sbin/hammer/hammer_util.h
sbin/hammer/ondisk.c
sbin/newfs_hammer/newfs_hammer.c
sys/conf/files
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_prune.c [copied from sys/vfs/hammer/hammer_ioctl.c with 57% similarity]
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_vnops.c

index 4363667..cb0f297 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/cmd_show.c,v 1.8 2008/05/05 20:34:52 dillon Exp $
+ * $DragonFly: src/sbin/hammer/cmd_show.c,v 1.9 2008/05/12 21:17:16 dillon Exp $
  */
 
 #include "hammer.h"
@@ -44,6 +44,7 @@
 static void print_btree_node(hammer_off_t node_offset, int depth, int spike,
                        hammer_base_elm_t left_bound,
                        hammer_base_elm_t right_bound);
+static void print_record(hammer_btree_elm_t elm);
 static void print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
                        int flags, const char *label);
 static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
@@ -51,7 +52,6 @@ static int print_elm_flags(hammer_node_ondisk_t node, hammer_off_t node_offset,
                        hammer_base_elm_t left_bound,
                        hammer_base_elm_t right_bound);
 static void print_bigblock_fill(hammer_off_t offset);
-static void print_record(hammer_btree_elm_t elm);
 
 void
 hammer_cmd_show(hammer_off_t node_offset, int depth,
@@ -167,13 +167,10 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_RECORD:
                        printf("\n\t         ");
-                       printf("recoff=%016llx", elm->leaf.rec_offset);
                        printf(" dataoff=%016llx/%d",
                                elm->leaf.data_offset, elm->leaf.data_len);
                        if (VerboseOpt) {
                                printf("\n\t         fills=");
-                               print_bigblock_fill(elm->leaf.rec_offset);
-                               printf(", ");
                                print_bigblock_fill(elm->leaf.data_offset);
                        }
                        if (VerboseOpt > 1)
@@ -275,48 +272,38 @@ static
 void
 print_record(hammer_btree_elm_t elm)
 {
-       struct buffer_info *rec_buffer;
        struct buffer_info *data_buffer;
-       hammer_record_ondisk_t rec;
-       hammer_off_t rec_offset;
        hammer_off_t data_offset;
-       hammer_crc_t crc;
        int32_t data_len;
-       char *data;
+       hammer_data_ondisk_t data;
 
-       rec_offset = elm->leaf.rec_offset;
        data_offset = elm->leaf.data_offset;
        data_len = elm->leaf.data_len;
-       rec_buffer = NULL;
        data_buffer = NULL;
 
-       rec = get_buffer_data(rec_offset, &rec_buffer, 0);
        if (data_offset)
                data = get_buffer_data(data_offset, &data_buffer, 0);
        else
                data = NULL;
 
-       if (rec == NULL) {
-               printf("record FAILED\n");
-               return;
-       }
-       switch(rec->base.base.rec_type) {
+       switch(elm->leaf.base.rec_type) {
        case HAMMER_RECTYPE_INODE:
                printf("\n%17s", "");
                printf("size=%lld nlinks=%lld",
-                      rec->inode.ino_size, rec->inode.ino_nlinks);
+                      data->inode.size, data->inode.nlinks);
                break;
        case HAMMER_RECTYPE_DIRENTRY:
                printf("\n%17s", "");
                printf("dir-entry ino=%016llx name=\"%*.*s\"",
-                      rec->entry.obj_id,
-                      data_len, data_len, data);
+                      data->entry.obj_id,
+                      data_len, data_len, data->entry.name);
                break;
        case HAMMER_RECTYPE_FIX:
-               switch(rec->base.base.key) {
+               switch(elm->leaf.base.key) {
                case HAMMER_FIXKEY_SYMLINK:
                        printf("\n%17s", "");
-                       printf("symlink=\"%*.*s\"", data_len, data_len, data);
+                       printf("symlink=\"%*.*s\"", data_len, data_len,
+                               data->symlink.name);
                        break;
                default:
                        break;
@@ -325,18 +312,6 @@ print_record(hammer_btree_elm_t elm)
        default:
                break;
        }
-       if (rec->base.signature != HAMMER_RECORD_SIGNATURE_GOOD) {
-               printf("\n%17s", "");
-               printf("BAD SIGNATURE: %08x\n", rec->base.signature);
-       }
-       crc = crc32(&rec->base.rec_crc + 1, HAMMER_RECORD_CRCSIZE);
-       if (crc != rec->base.rec_crc) {
-               printf("\n%17s", "");
-               printf("BAD CRC: %08x v %08x\n", rec->base.rec_crc, crc);
-       }
-
-       if (rec_buffer)
-               rel_buffer(rec_buffer);
        if (data_buffer)
                rel_buffer(data_buffer);
 }
index fcd4fd9..01b8b43 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/cycle.c,v 1.1 2008/05/11 20:44:44 dillon Exp $
+ * $DragonFly: src/sbin/hammer/cycle.c,v 1.2 2008/05/12 21:17:16 dillon Exp $
  */
 
 #include "hammer.h"
@@ -42,7 +42,7 @@ hammer_get_cycle(int64_t default_obj_id)
        int64_t obj_id;
        FILE *fp;
 
-       if ((fp = fopen(CyclePath, "r")) != NULL) {
+       if (CyclePath && (fp = fopen(CyclePath, "r")) != NULL) {
                if (fscanf(fp, "%llx\n", &obj_id) != 1) {
                        obj_id = default_obj_id;
                        fprintf(stderr, "Warning: malformed obj_id in %s\n",
index ecab2f9..36d2745 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.14 2008/05/12 05:13:48 dillon Exp $
+ * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.15 2008/05/12 21:17:16 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -127,8 +127,9 @@ void format_blockmap(hammer_blockmap_t blockmap, hammer_off_t zone_off);
 void format_undomap(hammer_volume_ondisk_t ondisk);
 
 void *alloc_btree_element(hammer_off_t *offp);
-hammer_record_ondisk_t alloc_record_element(hammer_off_t *offp,
-                               int32_t data_len, void **datap);
+void *alloc_data_element(hammer_off_t *offp, int32_t data_len, 
+                        struct buffer_info **data_bufferp);
+
 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
 
 
index 0f378f6..5342dbb 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/ondisk.c,v 1.18 2008/05/12 05:13:48 dillon Exp $
+ * $DragonFly: src/sbin/hammer/ondisk.c,v 1.19 2008/05/12 21:17:16 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -326,33 +326,25 @@ alloc_btree_element(hammer_off_t *offp)
        return(node);
 }
 
-hammer_record_ondisk_t
-alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap)
+void *
+alloc_data_element(hammer_off_t *offp, int32_t data_len,
+                  struct buffer_info **data_bufferp)
 {
-       struct buffer_info *record_buffer = NULL;
-       struct buffer_info *data_buffer = NULL;
-       hammer_record_ondisk_t rec;
-
-       rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec),
-                            offp, &record_buffer);
-       bzero(rec, sizeof(*rec));
+       void *data;
 
        if (data_len >= HAMMER_BUFSIZE) {
                assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
-               *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
-                                       &rec->base.data_off, &data_buffer);
-               rec->base.data_len = data_len;
-               bzero(*datap, data_len);
+               data = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
+                                     offp, data_bufferp);
+               bzero(data, data_len);
        } else if (data_len) {
-               *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
-                                       &rec->base.data_off, &data_buffer);
-               rec->base.data_len = data_len;
-               bzero(*datap, data_len);
+               data = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
+                                     offp, data_bufferp);
+               bzero(data, data_len);
        } else {
-               *datap = NULL;
+               data = NULL;
        }
-       /* XXX buf not released, ptr remains valid */
-       return(rec);
+       return (data);
 }
 
 /*
index c07f95c..3de42a1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.24 2008/05/06 00:15:35 dillon Exp $
+ * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.25 2008/05/12 21:17:17 dillon Exp $
  */
 
 #include "newfs_hammer.h"
@@ -59,7 +59,6 @@ main(int ac, char **av)
         * if it gets broken!
         */
        assert(sizeof(struct hammer_volume_ondisk) <= HAMMER_BUFSIZE);
-       assert(sizeof(union hammer_record_ondisk) == HAMMER_RECORD_SIZE);
        assert(sizeof(struct hammer_blockmap_layer1) == 32);
        assert(sizeof(struct hammer_blockmap_layer2) == 16);
 
@@ -427,9 +426,11 @@ format_volume(struct volume_info *vol, int nvols, const char *label,
                format_blockmap(
                        &ondisk->vol0_blockmap[HAMMER_ZONE_BTREE_INDEX],
                        HAMMER_ZONE_BTREE);
+#if 0
                format_blockmap(
                        &ondisk->vol0_blockmap[HAMMER_ZONE_RECORD_INDEX],
                        HAMMER_ZONE_RECORD);
+#endif
                format_blockmap(
                        &ondisk->vol0_blockmap[HAMMER_ZONE_LARGE_DATA_INDEX],
                        HAMMER_ZONE_LARGE_DATA);
@@ -459,38 +460,26 @@ hammer_off_t
 format_root(void)
 {
        hammer_off_t btree_off;
-       hammer_off_t rec_off;
+       hammer_off_t data_off;
+       hammer_tid_t create_tid;
        hammer_node_ondisk_t bnode;
-       hammer_record_ondisk_t rec;
        struct hammer_inode_data *idata;
+       struct buffer_info *data_buffer = NULL;
        hammer_btree_elm_t elm;
 
        bnode = alloc_btree_element(&btree_off);
-       rec = alloc_record_element(&rec_off, sizeof(*idata), (void **)&idata);
+       idata = alloc_data_element(&data_off, sizeof(*idata), &data_buffer);
+       create_tid = createtid();
 
        /*
         * Populate the inode data and inode record for the root directory.
         */
        idata->version = HAMMER_INODE_DATA_VERSION;
        idata->mode = 0755;
-
-       rec->base.base.btype = HAMMER_BTREE_TYPE_RECORD;
-       rec->base.base.obj_id = HAMMER_OBJID_ROOT;
-       rec->base.base.key = 0;
-       rec->base.base.create_tid = createtid();
-       rec->base.base.delete_tid = 0;
-       rec->base.base.rec_type = HAMMER_RECTYPE_INODE;
-       rec->base.base.obj_type = HAMMER_OBJTYPE_DIRECTORY;
-       /* rec->base.data_offset - initialized by alloc_record_element */
-       /* rec->base.data_len    - initialized by alloc_record_element */
-       rec->base.data_crc = crc32(idata, sizeof(*idata));
-       rec->inode.ino_atime  = rec->base.base.create_tid;
-       rec->inode.ino_mtime  = rec->base.base.create_tid;
-       rec->inode.ino_size   = 0;
-       rec->inode.ino_nlinks = 1;
-       rec->base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-       rec->base.rec_crc = crc32(&rec->base.rec_crc + 1,
-                               HAMMER_RECORD_SIZE - sizeof(rec->base.rec_crc));
+       idata->mtime = create_tid;
+       idata->obj_type = HAMMER_OBJTYPE_DIRECTORY;
+       idata->size = 0;
+       idata->nlinks = 1;
 
        /*
         * Create the root of the B-Tree.  The root is a leaf node so we
@@ -501,14 +490,20 @@ format_root(void)
        bnode->type = HAMMER_BTREE_TYPE_LEAF;
 
        elm = &bnode->elms[0];
-       elm->base = rec->base.base;
-       elm->leaf.rec_offset = rec_off;
-       elm->leaf.data_offset = rec->base.data_off;
-       elm->leaf.data_len = rec->base.data_len;
-       elm->leaf.data_crc = rec->base.data_crc;
-
-       bnode->crc = crc32(&bnode->crc + 1,
-                          sizeof(*bnode) - sizeof(bnode->crc));
+       elm->leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       elm->leaf.base.obj_id = HAMMER_OBJID_ROOT;
+       elm->leaf.base.key = 0;
+       elm->leaf.base.create_tid = create_tid;
+       elm->leaf.base.delete_tid = 0;
+       elm->leaf.base.rec_type = HAMMER_RECTYPE_INODE;
+       elm->leaf.base.obj_type = HAMMER_OBJTYPE_DIRECTORY;
+
+       elm->leaf.atime = create_tid;
+       elm->leaf.data_offset = data_off;
+       elm->leaf.data_len = sizeof(*idata);
+       elm->leaf.data_crc = crc32(idata, sizeof(*idata));
+
+       bnode->crc = crc32(&bnode->crc + 1, HAMMER_BTREE_CRCSIZE);
 
        return(btree_off);
 }
index e78274c..9819144 100644 (file)
@@ -1,5 +1,5 @@
 # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $
-# $DragonFly: src/sys/conf/files,v 1.215 2008/04/23 21:06:19 thomas Exp $
+# $DragonFly: src/sys/conf/files,v 1.216 2008/05/12 21:17:15 dillon Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -1152,6 +1152,7 @@ vfs/hammer/hammer_blockmap.c      optional hammer
 vfs/hammer/hammer_freemap.c    optional hammer
 vfs/hammer/hammer_undo.c       optional hammer
 vfs/hammer/hammer_reblock.c    optional hammer
+vfs/hammer/hammer_prune.c      optional hammer
 vfs/hammer/hammer_flusher.c    optional hammer
 vm/default_pager.c             standard
 vm/device_pager.c              standard
index 46d9869..10c9fb6 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.63 2008/05/09 07:26:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.64 2008/05/12 21:17:18 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -212,7 +212,7 @@ struct hammer_inode {
        TAILQ_HEAD(, bio)       bio_list;       /* BIOs to flush out */
        TAILQ_HEAD(, bio)       bio_alt_list;   /* BIOs to flush out */
        off_t                   trunc_off;
-       struct hammer_inode_record ino_rec;     /* in-memory cache */
+       struct hammer_btree_leaf_elm ino_leaf;  /* in-memory cache */
        struct hammer_inode_data ino_data;      /* in-memory cache */
        struct hammer_rec_rb_tree rec_tree;     /* in-memory cache */
        struct hammer_node      *cache[2];      /* search initiate cache */
@@ -225,7 +225,7 @@ struct hammer_inode {
         */
        int             sync_flags;             /* to-sync flags cache */
        off_t           sync_trunc_off;         /* to-sync truncation */
-       struct hammer_inode_record sync_ino_rec;/* to-sync cache */
+       struct hammer_btree_leaf_elm sync_ino_leaf; /* to-sync cache */
        struct hammer_inode_data sync_ino_data; /* to-sync cache */
 };
 
@@ -234,7 +234,7 @@ typedef struct hammer_inode *hammer_inode_t;
 #define VTOI(vp)       ((struct hammer_inode *)(vp)->v_data)
 
 #define HAMMER_INODE_DDIRTY    0x0001  /* in-memory ino_data is dirty */
-#define HAMMER_INODE_RDIRTY    0x0002  /* in-memory ino_rec is dirty */
+#define HAMMER_INODE_UNUSED0002        0x0002
 #define HAMMER_INODE_ITIMES    0x0004  /* in-memory mtime/atime modified */
 #define HAMMER_INODE_XDIRTY    0x0008  /* in-memory records */
 #define HAMMER_INODE_ONDISK    0x0010  /* inode is on-disk (else not yet) */
@@ -254,8 +254,8 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_INODE_RESIGNAL  0x00040000 /* re-signal on re-flush */
 #define HAMMER_INODE_RESIGNAL  0x00040000 /* re-signal on re-flush */
 
-#define HAMMER_INODE_MODMASK   (HAMMER_INODE_DDIRTY|HAMMER_INODE_RDIRTY| \
-                                HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS|   \
+#define HAMMER_INODE_MODMASK   (HAMMER_INODE_DDIRTY|                       \
+                                HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS|     \
                                 HAMMER_INODE_ITIMES|HAMMER_INODE_TRUNCATED|\
                                 HAMMER_INODE_DELETING)
 
@@ -299,7 +299,7 @@ struct hammer_record {
        struct hammer_lock              lock;
        struct hammer_inode             *ip;
        struct hammer_inode             *target_ip;
-       union hammer_record_ondisk      rec;
+       struct hammer_btree_leaf_elm    leaf;
        union hammer_data_ondisk        *data;
        int                             flags;
 };
@@ -314,7 +314,7 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_ONRBTREE           0x0002
 #define HAMMER_RECF_DELETED_FE         0x0004  /* deleted (frontend) */
 #define HAMMER_RECF_DELETED_BE         0x0008  /* deleted (backend) */
-#define HAMMER_RECF_INBAND             0x0010
+#define HAMMER_RECF_UNUSED0010         0x0010
 #define HAMMER_RECF_INTERLOCK_BE       0x0020  /* backend interlock */
 #define HAMMER_RECF_WANTED             0x0040
 #define HAMMER_RECF_CONVERT_DELETE     0x0100 /* special case */
@@ -628,7 +628,6 @@ int hammer_install_volume(hammer_mount_t hmp, const char *volname);
 int    hammer_ip_lookup(hammer_cursor_t cursor);
 int    hammer_ip_first(hammer_cursor_t cursor);
 int    hammer_ip_next(hammer_cursor_t cursor);
-int    hammer_ip_resolve_record_and_data(hammer_cursor_t cursor);
 int    hammer_ip_resolve_data(hammer_cursor_t cursor);
 int    hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid);
 int    hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes);
@@ -637,7 +636,7 @@ int hammer_ip_check_directory_empty(hammer_transaction_t trans,
 int    hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
 
 hammer_record_t
-       hammer_alloc_mem_record(hammer_inode_t ip);
+       hammer_alloc_mem_record(hammer_inode_t ip, int data_len);
 void   hammer_flush_record_done(hammer_record_t record, int error);
 void   hammer_wait_mem_record(hammer_record_t record);
 void   hammer_rel_mem_record(hammer_record_t record);
@@ -688,7 +687,8 @@ int hammer_btree_last(hammer_cursor_t cursor);
 int    hammer_btree_extract(hammer_cursor_t cursor, int flags);
 int    hammer_btree_iterate(hammer_cursor_t cursor);
 int    hammer_btree_iterate_reverse(hammer_cursor_t cursor);
-int    hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
+int    hammer_btree_insert(hammer_cursor_t cursor,
+                           hammer_btree_leaf_elm_t elm);
 int    hammer_btree_delete(hammer_cursor_t cursor);
 int    hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
 int    hammer_btree_chkts(hammer_tid_t ts, hammer_base_elm_t key);
@@ -839,6 +839,8 @@ void hammer_modify_buffer_done(hammer_buffer_t buffer);
 
 int hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
                        struct hammer_ioc_reblock *reblock);
+int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct hammer_ioc_prune *prune);
 
 void hammer_init_holes(hammer_mount_t hmp, hammer_holes_t holes);
 void hammer_free_holes(hammer_mount_t hmp, hammer_holes_t holes);
@@ -856,7 +858,6 @@ void hammer_crc_set_volume(hammer_volume_ondisk_t ondisk);
 
 int hammer_crc_test_blockmap(hammer_blockmap_t blockmap);
 int hammer_crc_test_volume(hammer_volume_ondisk_t ondisk);
-int hammer_crc_test_record(hammer_record_ondisk_t ondisk);
 int hammer_crc_test_btree(hammer_node_ondisk_t ondisk);
 void hkprintf(const char *ctl, ...);
 
@@ -897,58 +898,6 @@ hammer_modify_node_done(hammer_node_t node)
        hammer_modify_buffer_done(node->buffer);
 }
 
-static __inline void
-hammer_modify_record_noundo(hammer_transaction_t trans, hammer_buffer_t buffer,
-                           hammer_record_ondisk_t rec __unused)
-{
-       hammer_modify_buffer(trans, buffer, NULL, 0);
-}
-
-static __inline void
-hammer_modify_record_all(hammer_transaction_t trans, hammer_buffer_t buffer,
-                        hammer_record_ondisk_t rec)
-{
-       KKASSERT((char *)rec >= (char *)buffer->ondisk &&
-                (char *)(rec + 1) <= (char *)buffer->ondisk + HAMMER_BUFSIZE);
-       hammer_modify_buffer(trans, buffer, rec, sizeof(*rec));
-}
-
-static __inline void
-hammer_modify_record(hammer_transaction_t trans, hammer_buffer_t buffer,
-                    hammer_record_ondisk_t rec, void *base, int len,
-                    int dodelete)
-{
-       KKASSERT((char *)base >= (char *)buffer->ondisk &&
-                (char *)base + len <= (char *)buffer->ondisk + HAMMER_BUFSIZE);
-       KKASSERT((char *)rec >= (char *)buffer->ondisk &&
-                (char *)(rec + 1) <= (char *)buffer->ondisk + HAMMER_BUFSIZE);
-
-       /*
-        * Due to undo overheads it is more efficient to just undo the whole
-        * record.
-        */
-       hammer_modify_buffer(trans, buffer, rec, sizeof(*rec));
-#if 0
-       hammer_modify_buffer(trans, buffer, base, len);
-       hammer_modify_buffer(trans, buffer, &rec->base.rec_crc,
-                                    sizeof(rec->base.rec_crc));
-       --node->buffer->io.modify_refs; /* only want one ref */
-       if (dodelete) {
-               hammer_modify_buffer(trans, buffer, &rec->base.signature,
-                                    sizeof(rec->base.signature));
-               --node->buffer->io.modify_refs; /* only want one ref */
-       }
-#endif
-}
-
-static __inline void
-hammer_modify_record_done(hammer_buffer_t buffer, hammer_record_ondisk_t rec)
-{
-       rec->base.rec_crc = crc32(&rec->base.rec_crc + 1,
-                                 HAMMER_RECORD_CRCSIZE);
-       hammer_modify_buffer_done(buffer);
-}
-
 #define hammer_modify_volume_field(trans, vol, field)          \
        hammer_modify_volume(trans, vol, &(vol)->ondisk->field, \
                             sizeof((vol)->ondisk->field))
@@ -957,7 +906,3 @@ hammer_modify_record_done(hammer_buffer_t buffer, hammer_record_ondisk_t rec)
        hammer_modify_node(trans, node, &(node)->ondisk->field, \
                             sizeof((node)->ondisk->field))
 
-#define hammer_modify_record_field(trans, buffer, rec, field, dodelete) \
-       hammer_modify_record(trans, buffer, rec, &(rec)->field,         \
-                            sizeof((rec)->field), dodelete)
-
index 93f3176..da83573 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.45 2008/05/12 05:13:11 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.46 2008/05/12 21:17:18 dillon Exp $
  */
 
 /*
@@ -584,7 +584,6 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        hammer_mount_t hmp;
        hammer_node_ondisk_t node;
        hammer_btree_elm_t elm;
-       hammer_off_t rec_off;
        hammer_off_t data_off;
        int32_t data_len;
        int error;
@@ -609,45 +608,21 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
         * Only record types have data.
         */
        KKASSERT(node->type == HAMMER_BTREE_TYPE_LEAF);
+       cursor->leaf = &elm->leaf;
        if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
                flags &= ~HAMMER_CURSOR_GET_DATA;
        data_off = elm->leaf.data_offset;
        data_len = elm->leaf.data_len;
        if (data_off == 0)
                flags &= ~HAMMER_CURSOR_GET_DATA;
-       rec_off = elm->leaf.rec_offset;
 
-       /*
-        * Extract the record if the record was requested or the data
-        * resides in the record buf.
-        */
-       if ((flags & HAMMER_CURSOR_GET_RECORD) ||
-           ((flags & HAMMER_CURSOR_GET_DATA) &&
-            ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) == 0)) {
-               cursor->record = hammer_bread(hmp, rec_off, &error,
-                                             &cursor->record_buffer);
-               if (hammer_crc_test_record(cursor->record) == 0) {
-                       Debugger("CRC FAILED: RECORD");
-               }
-       } else {
-               rec_off = 0;
-               error = 0;
-       }
-       if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) {
-               if ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) {
-                       /*
-                        * Data and record are in different buffers.
-                        */
-                       cursor->data = hammer_bread(hmp, data_off, &error,
+       error = 0;
+       if ((flags & HAMMER_CURSOR_GET_DATA)) {
+               /*
+                * Data and record are in different buffers.
+                */
+               cursor->data = hammer_bread(hmp, data_off, &error,
                                                    &cursor->data_buffer);
-               } else {
-                       /*
-                        * Data resides in same buffer as record.
-                        */
-                       cursor->data = (void *)
-                               ((char *)cursor->record_buffer->ondisk +
-                               ((int32_t)data_off & HAMMER_BUFMASK));
-               }
                KKASSERT(data_len >= 0 && data_len <= HAMMER_BUFSIZE);
                if (data_len && 
                    crc32(cursor->data, data_len) != elm->leaf.data_crc) {
@@ -673,7 +648,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
  * ENOSPC is returned if there is no room to insert a new record.
  */
 int
-hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
+hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_leaf_elm_t elm)
 {
        hammer_node_ondisk_t node;
        int i;
@@ -702,20 +677,20 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
                bcopy(&node->elms[i], &node->elms[i+1],
                      (node->count - i) * sizeof(*elm));
        }
-       node->elms[i] = *elm;
+       node->elms[i].leaf = *elm;
        ++node->count;
        hammer_modify_node_done(cursor->node);
 
        /*
         * Debugging sanity checks.
         */
-       KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->leaf.base) <= 0);
-       KKASSERT(hammer_btree_cmp(cursor->right_bound, &elm->leaf.base) > 0);
+       KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->base) <= 0);
+       KKASSERT(hammer_btree_cmp(cursor->right_bound, &elm->base) > 0);
        if (i) {
-               KKASSERT(hammer_btree_cmp(&node->elms[i-1].leaf.base, &elm->leaf.base) < 0);
+               KKASSERT(hammer_btree_cmp(&node->elms[i-1].leaf.base, &elm->base) < 0);
        }
        if (i != node->count - 1)
-               KKASSERT(hammer_btree_cmp(&node->elms[i+1].leaf.base, &elm->leaf.base) > 0);
+               KKASSERT(hammer_btree_cmp(&node->elms[i+1].leaf.base, &elm->base) > 0);
 
        return(0);
 }
@@ -1006,7 +981,8 @@ re_search:
                        /*
                         * If we aren't inserting we can stop here.
                         */
-                       if ((flags & HAMMER_CURSOR_INSERT) == 0) {
+                       if ((flags & (HAMMER_CURSOR_INSERT |
+                                     HAMMER_CURSOR_PRUNING)) == 0) {
                                cursor->index = 0;
                                return(ENOENT);
                        }
@@ -1039,7 +1015,8 @@ re_search:
                         * elms[i-2] prior to adjustments to 'i'.
                         */
                        --i;
-                       if ((flags & HAMMER_CURSOR_INSERT) == 0) {
+                       if ((flags & (HAMMER_CURSOR_INSERT |
+                                     HAMMER_CURSOR_PRUNING)) == 0) {
                                cursor->index = i;
                                return (ENOENT);
                        }
@@ -2385,7 +2362,7 @@ hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i)
                        elm->internal.subtree_offset);
                break;
        case HAMMER_BTREE_TYPE_RECORD:
-               kprintf("\trec_offset   = %016llx\n", elm->leaf.rec_offset);
+               kprintf("\tatime        = %016llx\n", elm->leaf.atime);
                kprintf("\tdata_offset  = %016llx\n", elm->leaf.data_offset);
                kprintf("\tdata_len     = %08x\n", elm->leaf.data_len);
                kprintf("\tdata_crc     = %08x\n", elm->leaf.data_crc);
index f30b6fc..fdc0472 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.13 2008/05/05 20:34:47 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.14 2008/05/12 21:17:18 dillon Exp $
  */
 
 /*
@@ -65,7 +65,7 @@
  * from any point in the tree without revisting nodes.  It is also possible
  * to terminate searches early and make minor adjustments to the boundaries
  * (within the confines of the parent's boundaries) on the fly.  This greatly
- * improves the efficiency of many operations, most especially record appends.
+ * improves the efficiency of many operations.
  *
  * HAMMER B-Trees are per-cluster.  The global multi-cluster B-Tree is
  * constructed by allowing internal nodes to link to the roots of other
@@ -132,18 +132,20 @@ struct hammer_btree_internal_elm {
  */
 struct hammer_btree_leaf_elm {
        struct hammer_base_elm base;
-       hammer_off_t    rec_offset;
+       hammer_off_t    atime;          /* access time */
        hammer_off_t    data_offset;
        int32_t         data_len;
        hammer_crc_t    data_crc;
 };
 
+typedef struct hammer_btree_leaf_elm *hammer_btree_leaf_elm_t;
+
 /*
  * Rollup btree leaf element types - 64 byte structure
  */
 union hammer_btree_elm {
-       struct hammer_base_elm base;
-       struct hammer_btree_leaf_elm leaf;
+       struct hammer_base_elm          base;
+       struct hammer_btree_leaf_elm    leaf;
        struct hammer_btree_internal_elm internal;
 };
 
index 210c6a5..9de9f74 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.24 2008/05/05 20:34:47 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.25 2008/05/12 21:17:18 dillon Exp $
  */
 
 /*
@@ -101,10 +101,21 @@ hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
                if (error)
                        break;
                hammer_lock_sh(&node->lock);
+
+               /*
+                * If someone got in before we could lock the node, retry.
+                */
                if (node->flags & HAMMER_NODE_DELETED) {
                        hammer_unlock(&node->lock);
                        hammer_rel_node(node);
                        node = NULL;
+                       continue;
+               }
+               if (volume->ondisk->vol0_btree_root != node->node_offset) {
+                       hammer_unlock(&node->lock);
+                       hammer_rel_node(node);
+                       node = NULL;
+                       continue;
                }
        }
 
@@ -204,7 +215,7 @@ hammer_done_cursor(hammer_cursor_t cursor)
        }
 
        cursor->data = NULL;
-       cursor->record = NULL;
+       cursor->leaf = NULL;
        cursor->left_bound = NULL;
        cursor->right_bound = NULL;
        cursor->trans = NULL;
index 1f843b4..b2c632a 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.17 2008/05/03 05:28:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.18 2008/05/12 21:17:18 dillon Exp $
  */
 
 /*
@@ -96,7 +96,7 @@ struct hammer_cursor {
         */
        struct hammer_buffer *record_buffer;    /* record (+ built-in data) */
        struct hammer_buffer *data_buffer;      /* extended data */
-       union hammer_record_ondisk *record;
+       struct hammer_btree_leaf_elm *leaf;
        union hammer_data_ondisk *data;
 
        /*
@@ -113,7 +113,7 @@ struct hammer_cursor {
 
 typedef struct hammer_cursor *hammer_cursor_t;
 
-#define HAMMER_CURSOR_GET_RECORD       0x0001
+#define HAMMER_CURSOR_GET_LEAF         0x0001
 #define HAMMER_CURSOR_GET_DATA         0x0002
 #define HAMMER_CURSOR_BACKEND          0x0004  /* cursor run by backend */
 #define HAMMER_CURSOR_INSERT           0x0008  /* adjust for insert */
@@ -130,6 +130,8 @@ typedef struct hammer_cursor *hammer_cursor_t;
 #define HAMMER_CURSOR_ASOF             0x4000  /* as-of lookup */
 #define HAMMER_CURSOR_CREATE_CHECK     0x8000  /* as-of lookup */
 
+#define HAMMER_CURSOR_PRUNING          0x00010000
+
 /*
  * Flags we can clear when reusing a cursor (we can clear all of them)
  */
index 6120d69..7c21dc2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.31 2008/05/05 20:34:47 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.32 2008/05/12 21:17:18 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -506,28 +506,6 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t;
        (sizeof(struct hammer_volume_ondisk) - HAMMER_VOL_CRCSIZE1 -    \
         sizeof(hammer_crc_t))
 
-/*
- * All HAMMER records have a common 64-byte base and a 32 byte extension,
- * plus a possible data reference.  The data reference can be in-band or
- * out-of-band.
- */
-
-#define HAMMER_RECORD_SIZE             (64+32)
-
-struct hammer_base_record {
-       hammer_crc_t    rec_crc;        /* record crc (full 64-4 bytes) */
-                                       /* MUST BE FIRST FIELD OF STRUCTURE */
-       hammer_crc_t    data_crc;       /* data crc */
-                                       /* MUST BE SECOND FIELD OF STRUCTURE */
-       struct hammer_base_elm base;    /* 40 byte base element */
-       hammer_off_t    data_off;       /* in-band or out-of-band */
-       int32_t         data_len;       /* size of data in bytes */
-       u_int32_t       signature;      /* record signature */
-};
-
-#define HAMMER_RECORD_SIGNATURE_GOOD           0xA7B6C5D4
-#define HAMMER_RECORD_SIGNATURE_DESTROYED      0xF8071625
-
 /*
  * Record types are fairly straightforward.  The B-Tree includes the record
  * type in its index sort.
@@ -539,17 +517,6 @@ struct hammer_base_record {
  * get their own obj_id space (and thus can serve as a replication target)
  * and look like a mount point to the system.
  *
- * Inter-cluster records are special-cased in the B-Tree.  These records
- * are referenced from a B-Tree INTERNAL node, NOT A LEAF.  This means
- * that the element in the B-Tree node is actually a boundary element whos
- * base element fields, including rec_type, reflect the boundary, NOT
- * the inter-cluster record type.
- *
- * HAMMER_RECTYPE_CLUSTER - only set in the actual inter-cluster record,
- * not set in the left or right boundary elements around the inter-cluster
- * reference of an internal node in the B-Tree (because doing so would
- * interfere with the boundary tests).
- *
  * NOTE: hammer_ip_delete_range_all() deletes all record types greater
  * then HAMMER_RECTYPE_INODE.
  */
@@ -578,61 +545,45 @@ struct hammer_base_record {
 #define HAMMER_OBJTYPE_PSEUDOFS                8       /* pseudo filesystem obj */
 
 /*
- * A HAMMER inode record.
+ * HAMMER inode attribute data
  *
- * This forms the basis for a filesystem object.  obj_id is the inode number,
- * key1 represents the pseudo filesystem id for security partitioning
- * (preventing cross-links and/or restricting a NFS export and specifying the
- * security policy), and key2 represents the data retention policy id.
- *
- * Inode numbers are 64 bit quantities which uniquely identify a filesystem
- * object for the ENTIRE life of the filesystem, even after the object has
- * been deleted.  For all intents and purposes inode numbers are simply 
- * allocated by incrementing a sequence space.
- *
- * There is an important distinction between the data stored in the inode
- * record and the record's data reference.  The record references a
- * hammer_inode_data structure but the filesystem object size and hard link
- * count is stored in the inode record itself.  This allows multiple inodes
- * to share the same hammer_inode_data structure.  This is possible because
- * any modifications will lay out new data.  The HAMMER implementation need
- * not use the data-sharing ability when laying down new records.
- *
- * A HAMMER inode is subject to the same historical storage requirements
- * as any other record.  In particular any change in filesystem or hard link
- * count will lay down a new inode record when the filesystem is synced to
- * disk.  This can lead to a lot of junk records which get cleaned up by
- * the data retention policy.
+ * The data reference for a HAMMER inode points to this structure.  Any
+ * modifications to the contents of this structure will result in a
+ * replacement operation.
  *
- * The ino_atime and ino_mtime fields are a special case.  Modifications to
- * these fields do NOT lay down a new record by default, though the values
- * are effectively frozen for snapshots which access historical versions
- * of the inode record due to other operations.  This means that atime will
- * not necessarily be accurate in snapshots, backups, or mirrors.  mtime
- * will be accurate in backups and mirrors since it can be regenerated from
- * the mirroring stream.
+ * parent_obj_id is only valid for directories (which cannot be hard-linked),
+ * and specifies the parent directory obj_id.  This field will also be set
+ * for non-directory inodes as a recovery aid, but can wind up specifying
+ * stale information.  However, since object id's are not reused, the worse
+ * that happens is that the recovery code is unable to use it.
  *
- * Because nlinks is historically retained the hardlink count will be
- * accurate when accessing a HAMMER filesystem snapshot.
+ * NOTE: atime is stored in the inode's B-Tree element and not in the inode
+ * data.  This allows the atime to be updated without having to lay down a
+ * new record.
  */
-struct hammer_inode_record {
-       struct hammer_base_record base;
-       u_int64_t ino_atime;    /* last access time (not historical) */
-       u_int64_t ino_mtime;    /* last modified time (not historical) */
-       u_int64_t ino_size;     /* filesystem object size */
-       u_int64_t ino_nlinks;   /* hard links */
+struct hammer_inode_data {
+       u_int16_t version;      /* inode data version */
+       u_int16_t mode;         /* basic unix permissions */
+       u_int32_t uflags;       /* chflags */
+       u_int32_t rmajor;       /* used by device nodes */
+       u_int32_t rminor;       /* used by device nodes */
+       u_int64_t ctime;
+       u_int64_t parent_obj_id;/* parent directory obj_id */
+       uuid_t    uid;
+       uuid_t    gid;
+
+       u_int8_t  obj_type;
+       u_int8_t  reserved01;
+       u_int16_t reserved02;
+       u_int32_t reserved03;
+       u_int64_t mtime;
+       u_int64_t size;         /* filesystem object size */
+       u_int64_t nlinks;       /* hard links */
+       char    reserved04[32];
 };
 
-/*
- * Data records specify the entire contents of a regular file object,
- * including attributes.  Small amounts of data can theoretically be
- * embedded in the record itself but the use of this ability verses using
- * an out-of-band data reference depends on the implementation.
- */
-struct hammer_data_record {
-       struct hammer_base_record base;
-       char    data[32];
-};
+#define HAMMER_INODE_DATA_VERSION      1
+#define HAMMER_OBJID_ROOT              1
 
 /*
  * A directory entry specifies the HAMMER filesystem object id, a copy of
@@ -652,68 +603,30 @@ struct hammer_data_record {
  * NOTE: den_name / the filename data reference is NOT terminated with \0.
  *
  */
-struct hammer_entry_record {
-       struct hammer_base_record base;
+struct hammer_entry_data {
        u_int64_t obj_id;               /* object being referenced */
        u_int64_t reserved01;
-       char    name[16];
-};
-
-/*
- * Hammer rollup record
- */
-union hammer_record_ondisk {
-       struct hammer_base_record       base;
-       struct hammer_inode_record      inode;
-       struct hammer_data_record       data;
-       struct hammer_entry_record      entry;
+       char    name[16];               /* name (extended) */
 };
 
-typedef union hammer_record_ondisk *hammer_record_ondisk_t;
-
-#define HAMMER_RECORD_CRCSIZE  (HAMMER_RECORD_SIZE - sizeof(hammer_crc_t))
+#define HAMMER_ENTRY_NAME_OFF  offsetof(struct hammer_entry_data, name[0])
+#define HAMMER_ENTRY_SIZE(nlen)        offsetof(struct hammer_entry_data, name[nlen])
 
-/*
- * HAMMER UNIX Attribute data
- *
- * The data reference in a HAMMER inode record points to this structure.  Any
- * modifications to the contents of this structure will result in a record
- * replacement operation.
- *
- * short_data_off allows a small amount of data to be embedded in the
- * hammer_inode_data structure.  HAMMER typically uses this to represent
- * up to 64 bytes of data, or to hold symlinks.  Remember that allocations
- * are in powers of 2 so 64, 192, 448, or 960 bytes of embedded data is
- * support (64+64, 64+192, 64+448 64+960).
- *
- * parent_obj_id is only valid for directories (which cannot be hard-linked),
- * and specifies the parent directory obj_id.  This field will also be set
- * for non-directory inodes as a recovery aid, but can wind up specifying
- * stale information.  However, since object id's are not reused, the worse
- * that happens is that the recovery code is unable to use it.
- */
-struct hammer_inode_data {
-       u_int16_t version;      /* inode data version */
-       u_int16_t mode;         /* basic unix permissions */
-       u_int32_t uflags;       /* chflags */
-       u_int32_t rmajor;       /* used by device nodes */
-       u_int32_t rminor;       /* used by device nodes */
-       u_int64_t ctime;
-       u_int64_t parent_obj_id;/* parent directory obj_id */
-       uuid_t  uid;
-       uuid_t  gid;
-       /* XXX device, softlink extension */
+struct hammer_symlink_data {
+       char    name[16];
 };
 
-#define HAMMER_INODE_DATA_VERSION      1
-
-#define HAMMER_OBJID_ROOT              1
+#define HAMMER_SYMLINK_NAME_OFF        offsetof(struct hammer_symlink_data, name[0])
 
 /*
  * Rollup various structures embedded as record data
  */
 union hammer_data_ondisk {
+       struct hammer_entry_data entry;
        struct hammer_inode_data inode;
+       struct hammer_symlink_data symlink;
 };
 
+typedef union hammer_data_ondisk *hammer_data_ondisk_t;
+
 #endif
index 4d9098a..7d1da72 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.54 2008/05/09 07:26:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.55 2008/05/12 21:17:18 dillon Exp $
  */
 
 #include "hammer.h"
@@ -71,7 +71,7 @@ hammer_vop_inactive(struct vop_inactive_args *ap)
        hammer_inode_unloadable_check(ip, 0);
        if (ip->flags & HAMMER_INODE_MODMASK)
                hammer_flush_inode(ip, 0);
-       else if (ip->ino_rec.ino_nlinks == 0)
+       else if (ip->ino_data.nlinks == 0)
                vrecycle(ap->a_vp);
        return(0);
 }
@@ -127,10 +127,10 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
                        hammer_ref(&ip->lock);
                        vp = *vpp;
                        ip->vp = vp;
-                       vp->v_type = hammer_get_vnode_type(
-                                           ip->ino_rec.base.base.obj_type);
+                       vp->v_type =
+                               hammer_get_vnode_type(ip->ino_data.obj_type);
 
-                       switch(ip->ino_rec.base.base.obj_type) {
+                       switch(ip->ino_data.obj_type) {
                        case HAMMER_OBJTYPE_CDEV:
                        case HAMMER_OBJTYPE_BDEV:
                                vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
@@ -160,7 +160,7 @@ hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
                        /* make related vnode dirty if inode dirty? */
                        hammer_unlock(&ip->lock);
                        if (vp->v_type == VREG)
-                               vinitvmio(vp, ip->ino_rec.ino_size);
+                               vinitvmio(vp, ip->ino_data.size);
                        break;
                }
 
@@ -237,7 +237,7 @@ retry:
        cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
        cursor.key_beg.obj_type = 0;
        cursor.asof = iinfo.obj_asof;
-       cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
+       cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
                       HAMMER_CURSOR_ASOF;
 
        *errorp = hammer_btree_lookup(&cursor);
@@ -253,7 +253,7 @@ retry:
         * and cache the B-Tree node to improve future operations.
         */
        if (*errorp == 0) {
-               ip->ino_rec = cursor.record->inode;
+               ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
                ip->ino_data = cursor.data->inode;
                hammer_cache_node(cursor.node, &ip->cache[0]);
                if (cache)
@@ -322,8 +322,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
        ip->obj_asof = hmp->asof;
        ip->hmp = hmp;
        ip->flush_state = HAMMER_FST_IDLE;
-       ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
-                   HAMMER_INODE_ITIMES;
+       ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
 
        ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
        RB_INIT(&ip->rec_tree);
@@ -331,25 +330,26 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
        TAILQ_INIT(&ip->bio_alt_list);
        TAILQ_INIT(&ip->target_list);
 
-       ip->ino_rec.ino_atime = trans->time;
-       ip->ino_rec.ino_mtime = trans->time;
-       ip->ino_rec.ino_size = 0;
-       ip->ino_rec.ino_nlinks = 0;
+       ip->ino_leaf.atime = trans->time;
+       ip->ino_data.mtime = trans->time;
+       ip->ino_data.size = 0;
+       ip->ino_data.nlinks = 0;
        /* XXX */
-       ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
-       ip->ino_rec.base.base.obj_id = ip->obj_id;
-       ip->ino_rec.base.base.key = 0;
-       ip->ino_rec.base.base.create_tid = 0;
-       ip->ino_rec.base.base.delete_tid = 0;
-       ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
-       ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
-
+       ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       ip->ino_leaf.base.obj_id = ip->obj_id;
+       ip->ino_leaf.base.key = 0;
+       ip->ino_leaf.base.create_tid = 0;
+       ip->ino_leaf.base.delete_tid = 0;
+       ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
+       ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
+
+       ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
        ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
        ip->ino_data.mode = vap->va_mode;
        ip->ino_data.ctime = trans->time;
-       ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
+       ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
 
-       switch(ip->ino_rec.base.base.obj_type) {
+       switch(ip->ino_leaf.base.obj_type) {
        case HAMMER_OBJTYPE_CDEV:
        case HAMMER_OBJTYPE_BDEV:
                ip->ino_data.rmajor = vap->va_rmajor;
@@ -418,7 +418,7 @@ retry:
                cursor->key_beg.obj_type = 0;
                cursor->asof = ip->obj_asof;
                cursor->flags &= ~HAMMER_CURSOR_INITMASK;
-               cursor->flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
+               cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
                cursor->flags |= HAMMER_CURSOR_BACKEND;
 
                error = hammer_btree_lookup(cursor);
@@ -466,13 +466,12 @@ retry:
                /*
                 * Generate a record and write it to the media
                 */
-               record = hammer_alloc_mem_record(ip);
+               record = hammer_alloc_mem_record(ip, 0);
                record->type = HAMMER_MEM_RECORD_GENERAL;
                record->flush_state = HAMMER_FST_FLUSH;
-               record->rec.inode = ip->sync_ino_rec;
-               record->rec.inode.base.base.create_tid = trans->tid;
-               record->rec.inode.base.data_len = sizeof(ip->sync_ino_data);
-               record->rec.base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
+               record->leaf = ip->sync_ino_leaf;
+               record->leaf.base.create_tid = trans->tid;
+               record->leaf.data_len = sizeof(ip->sync_ino_data);
                record->data = (void *)&ip->sync_ino_data;
                record->flags |= HAMMER_RECF_INTERLOCK_BE;
                for (;;) {
@@ -510,8 +509,7 @@ retry:
                if (error == 0) {
                        if (hammer_debug_inode)
                                kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
-                       ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
-                                           HAMMER_INODE_DDIRTY |
+                       ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
                                            HAMMER_INODE_ITIMES);
                        ip->flags &= ~HAMMER_INODE_DELONDISK;
 
@@ -536,8 +534,7 @@ retry:
         * that may have been set by the frontend.
         */
        if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) { 
-               ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
-                                   HAMMER_INODE_DDIRTY |
+               ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
                                    HAMMER_INODE_ITIMES);
        }
        return(error);
@@ -551,7 +548,7 @@ static int
 hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
 {
        hammer_transaction_t trans = cursor->trans;
-       struct hammer_inode_record *rec;
+       struct hammer_btree_leaf_elm *leaf;
        int error;
 
 retry:
@@ -567,7 +564,7 @@ retry:
                cursor->key_beg.obj_type = 0;
                cursor->asof = ip->obj_asof;
                cursor->flags &= ~HAMMER_CURSOR_INITMASK;
-               cursor->flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
+               cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
                cursor->flags |= HAMMER_CURSOR_BACKEND;
 
                error = hammer_btree_lookup(cursor);
@@ -577,17 +574,14 @@ retry:
                }
                if (error == 0) {
                        /*
-                        * Do not generate UNDO records for atime/mtime
-                        * updates.
+                        * Do not generate UNDO records for atime updates.
                         */
-                       rec = &cursor->record->inode;
-                       hammer_modify_record_noundo(trans,
-                                                   cursor->record_buffer,
-                                                   cursor->record);
-                       rec->ino_atime = ip->sync_ino_rec.ino_atime;
-                       rec->ino_mtime = ip->sync_ino_rec.ino_mtime;
-                       hammer_modify_record_done(cursor->record_buffer,
-                                                 cursor->record);
+                       leaf = cursor->leaf;
+                       hammer_modify_node(trans, cursor->node, 
+                                          &leaf->atime, sizeof(leaf->atime));
+                       leaf->atime = ip->sync_ino_leaf.atime;
+                       hammer_modify_node_done(cursor->node);
+                       /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
                        ip->sync_flags &= ~HAMMER_INODE_ITIMES;
                        /* XXX recalculate crc */
                        hammer_cache_node(cursor->node, &ip->cache[0]);
@@ -697,7 +691,6 @@ hammer_unload_inode(struct hammer_inode *ip)
  * A transaction has modified an inode, requiring updates as specified by
  * the passed flags.
  *
- * HAMMER_INODE_RDIRTY:        Inode record has been updated
  * HAMMER_INODE_DDIRTY: Inode data has been updated
  * HAMMER_INODE_XDIRTY: Dirty in-memory records
  * HAMMER_INODE_BUFS:   Dirty buffer cache buffers
@@ -708,9 +701,9 @@ void
 hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
 {
        KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
-                 (flags & (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
-                  HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS|
-                  HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES)) == 0);
+                 (flags & (HAMMER_INODE_DDIRTY |
+                           HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
+                           HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
 
        ip->flags |= flags;
 }
@@ -996,7 +989,7 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
         */
        ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
        ip->sync_trunc_off = ip->trunc_off;
-       ip->sync_ino_rec = ip->ino_rec;
+       ip->sync_ino_leaf = ip->ino_leaf;
        ip->sync_ino_data = ip->ino_data;
        ip->flags &= ~HAMMER_INODE_MODMASK | HAMMER_INODE_TRUNCATED;
 
@@ -1164,8 +1157,8 @@ hammer_flush_inode_done(hammer_inode_t ip)
         * The backend may have adjusted nlinks, so if the adjusted nlinks
         * does not match the fronttend set the frontend's RDIRTY flag again.
         */
-       if (ip->ino_rec.ino_nlinks != ip->sync_ino_rec.ino_nlinks)
-               ip->flags |= HAMMER_INODE_RDIRTY;
+       if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
+               ip->flags |= HAMMER_INODE_DDIRTY;
 
        /*
         * Reflush any BIOs that wound up in the alt list.  Our inode will
@@ -1304,7 +1297,7 @@ hammer_sync_record_callback(hammer_record_t record, void *data)
         * have the record's entire key properly set up.
         */
        if (record->type != HAMMER_MEM_RECORD_DEL)
-               record->rec.inode.base.base.create_tid = trans->tid;
+               record->leaf.base.create_tid = trans->tid;
        for (;;) {
                error = hammer_ip_sync_record_cursor(cursor, record);
                if (error != EDEADLK)
@@ -1360,7 +1353,7 @@ hammer_sync_inode(hammer_inode_t ip)
         * inode now, potentially allowing the inode to be physically
         * deleted.
         */
-       nlinks = ip->ino_rec.ino_nlinks;
+       nlinks = ip->ino_data.nlinks;
        next = TAILQ_FIRST(&ip->target_list);
        while ((depend = next) != NULL) {
                next = TAILQ_NEXT(depend, target_entry);
@@ -1405,10 +1398,10 @@ hammer_sync_inode(hammer_inode_t ip)
        /*
         * Set dirty if we had to modify the link count.
         */
-       if (ip->sync_ino_rec.ino_nlinks != nlinks) {
+       if (ip->sync_ino_data.nlinks != nlinks) {
                KKASSERT((int64_t)nlinks >= 0);
-               ip->sync_ino_rec.ino_nlinks = nlinks;
-               ip->sync_flags |= HAMMER_INODE_RDIRTY;
+               ip->sync_ino_data.nlinks = nlinks;
+               ip->sync_flags |= HAMMER_INODE_DDIRTY;
        }
 
        /*
@@ -1434,9 +1427,9 @@ hammer_sync_inode(hammer_inode_t ip)
                ip->sync_trunc_off = ip->trunc_off;
                ip->sync_flags |= HAMMER_INODE_TRUNCATED;
        }
-       if (ip->sync_ino_rec.ino_size != ip->ino_rec.ino_size) {
-               ip->sync_ino_rec.ino_size = ip->ino_rec.ino_size;
-               ip->sync_flags |= HAMMER_INODE_RDIRTY;
+       if (ip->sync_ino_data.size != ip->ino_data.size) {
+               ip->sync_ino_data.size = ip->ino_data.size;
+               ip->sync_flags |= HAMMER_INODE_DDIRTY;
        }
 
        /*
@@ -1505,7 +1498,7 @@ hammer_sync_inode(hammer_inode_t ip)
         * If we are deleting the inode the frontend had better not have
         * any active references on elements making up the inode.
         */
-       if (error == 0 && ip->sync_ino_rec.ino_nlinks == 0 &&
+       if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
                RB_EMPTY(&ip->rec_tree)  &&
            (ip->sync_flags & HAMMER_INODE_DELETING) &&
            (ip->flags & HAMMER_INODE_DELETED) == 0) {
@@ -1524,8 +1517,8 @@ hammer_sync_inode(hammer_inode_t ip)
                         * copy of the inode record.  The DELETED flag handles
                         * this, do not set RDIRTY.
                         */
-                       ip->ino_rec.base.base.delete_tid = trans.tid;
-                       ip->sync_ino_rec.base.base.delete_tid = trans.tid;
+                       ip->ino_leaf.base.delete_tid = trans.tid;
+                       ip->sync_ino_leaf.base.delete_tid = trans.tid;
 
                        /*
                         * Adjust the inode count in the volume header
@@ -1567,7 +1560,7 @@ hammer_sync_inode(hammer_inode_t ip)
                 *
                 * Clear flags which may have been set by the frontend.
                 */
-               ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
+               ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
                                    HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
                                    HAMMER_INODE_DELETING);
                break;
@@ -1578,7 +1571,7 @@ hammer_sync_inode(hammer_inode_t ip)
                 *
                 * Clear flags which may have been set by the frontend.
                 */
-               ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
+               ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
                                    HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
                                    HAMMER_INODE_DELETING);
                while (RB_ROOT(&ip->rec_tree)) {
@@ -1604,9 +1597,9 @@ hammer_sync_inode(hammer_inode_t ip)
                 * Set create_tid in both the frontend and backend
                 * copy of the inode record.
                 */
-               ip->ino_rec.base.base.create_tid = trans.tid;
-               ip->sync_ino_rec.base.base.create_tid = trans.tid;
-               ip->sync_flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
+               ip->ino_leaf.base.create_tid = trans.tid;
+               ip->sync_ino_leaf.base.create_tid = trans.tid;
+               ip->sync_flags |= HAMMER_INODE_DDIRTY;
                break;
        }
 
@@ -1622,12 +1615,11 @@ hammer_sync_inode(hammer_inode_t ip)
        if (ip->flags & HAMMER_INODE_DELETED) {
                error = hammer_update_inode(&cursor, ip);
        } else 
-       if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
-                              HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) {
+       if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
+           HAMMER_INODE_ITIMES) {
                error = hammer_update_itimes(&cursor, ip);
        } else
-       if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
-                             HAMMER_INODE_ITIMES)) {
+       if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
                error = hammer_update_inode(&cursor, ip);
        }
        if (error)
@@ -1663,7 +1655,7 @@ hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
         * (a state flag) when it is actually able to perform the
         * operation.
         */
-       if (ip->ino_rec.ino_nlinks == 0 &&
+       if (ip->ino_data.nlinks == 0 &&
            (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
                ip->flags |= HAMMER_INODE_DELETING;
                ip->flags |= HAMMER_INODE_TRUNCATED;
index 0eb7c87..9ae0f21 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.16 2008/05/12 05:13:11 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.17 2008/05/12 21:17:18 dillon Exp $
  */
 
 #include "hammer.h"
 
-static int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
-                               struct hammer_ioc_prune *prune);
 static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
                                struct hammer_ioc_history *hist);
 
@@ -75,323 +73,6 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
        return (error);
 }
 
-/*
- * Iterate through the specified range of object ids and remove any
- * deleted records that fall entirely within a prune modulo.
- *
- * A reverse iteration is used to prevent overlapping records from being
- * created during the iteration due to alignments.  This also allows us
- * to adjust alignments without blowing up the B-Tree.
- */
-static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm,
-                       int *realign_cre, int *realign_del);
-static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor,
-                       int realign_cre, int realign_del);
-
-static int
-hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
-                struct hammer_ioc_prune *prune)
-{
-       struct hammer_cursor cursor;
-       hammer_btree_elm_t elm;
-       int error;
-       int isdir;
-       int realign_cre;
-       int realign_del;
-
-       if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS)
-               return(EINVAL);
-       if (prune->beg_obj_id >= prune->end_obj_id)
-               return(EINVAL);
-       if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
-               return(EINVAL);
-
-       prune->cur_obj_id = cursor.key_end.obj_id;
-       prune->cur_key = HAMMER_MAX_KEY;
-
-retry:
-       error = hammer_init_cursor(trans, &cursor, NULL, NULL);
-       if (error) {
-               hammer_done_cursor(&cursor);
-               return(error);
-       }
-       cursor.key_beg.obj_id = prune->beg_obj_id;
-       cursor.key_beg.key = HAMMER_MIN_KEY;
-       cursor.key_beg.create_tid = 1;
-       cursor.key_beg.delete_tid = 0;
-       cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
-       cursor.key_beg.obj_type = 0;
-
-       cursor.key_end.obj_id = prune->cur_obj_id;
-       cursor.key_end.key = prune->cur_key;
-       cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
-       cursor.key_end.delete_tid = 0;
-       cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
-       cursor.key_end.obj_type = 0;
-
-       cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
-       cursor.flags |= HAMMER_CURSOR_BACKEND;
-
-       error = hammer_btree_last(&cursor);
-       while (error == 0) {
-               elm = &cursor.node->ondisk->elms[cursor.index];
-               prune->cur_obj_id = elm->base.obj_id;
-               prune->cur_key = elm->base.key;
-
-               if (prune->stat_oldest_tid > elm->leaf.base.create_tid)
-                       prune->stat_oldest_tid = elm->leaf.base.create_tid;
-
-               if (check_prune(prune, elm, &realign_cre, &realign_del) == 0) {
-                       if (hammer_debug_general & 0x0200) {
-                               kprintf("check %016llx %016llx: DELETE\n",
-                                       elm->base.obj_id, elm->base.key);
-                       }
-
-                       /*
-                        * NOTE: This can return EDEADLK
-                        *
-                        * Acquiring the sync lock guarantees that the
-                        * operation will not cross a synchronization
-                        * boundary (see the flusher).
-                        */
-                       isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);
-
-                       hammer_lock_ex(&trans->hmp->sync_lock);
-                       error = hammer_delete_at_cursor(&cursor,
-                                                       &prune->stat_bytes);
-                       hammer_unlock(&trans->hmp->sync_lock);
-                       if (error)
-                               break;
-
-                       if (isdir)
-                               ++prune->stat_dirrecords;
-                       else
-                               ++prune->stat_rawrecords;
-
-                       /*
-                        * The current record might now be the one after
-                        * the one we deleted, set ATEDISK to force us
-                        * to skip it (since we are iterating backwards).
-                        */
-                       cursor.flags |= HAMMER_CURSOR_ATEDISK;
-               } else if (realign_cre >= 0 || realign_del >= 0) {
-                       hammer_lock_ex(&trans->hmp->sync_lock);
-                       error = realign_prune(prune, &cursor,
-                                             realign_cre, realign_del);
-                       hammer_unlock(&trans->hmp->sync_lock);
-                       if (error == 0) {
-                               cursor.flags |= HAMMER_CURSOR_ATEDISK;
-                               if (hammer_debug_general & 0x0200) {
-                                       kprintf("check %016llx %016llx: "
-                                               "REALIGN\n",
-                                               elm->base.obj_id,
-                                               elm->base.key);
-                               }
-                       }
-               } else {
-                       cursor.flags |= HAMMER_CURSOR_ATEDISK;
-                       if (hammer_debug_general & 0x0100) {
-                               kprintf("check %016llx %016llx: SKIP\n",
-                                       elm->base.obj_id, elm->base.key);
-                       }
-               }
-               ++prune->stat_scanrecords;
-
-               /*
-                * Bad hack for now, don't blow out the kernel's buffer
-                * cache.  NOTE: We still hold locks on the cursor, we
-                * cannot call the flusher synchronously.
-                */
-               if (trans->hmp->locked_dirty_count > hammer_limit_dirtybufs) {
-                       hammer_flusher_async(trans->hmp);
-                       tsleep(trans, 0, "hmrslo", hz / 10);
-               }
-               error = hammer_signal_check(trans->hmp);
-               if (error == 0)
-                       error = hammer_btree_iterate_reverse(&cursor);
-       }
-       if (error == ENOENT)
-               error = 0;
-       hammer_done_cursor(&cursor);
-       if (error == EDEADLK)
-               goto retry;
-       if (error == EINTR) {
-               prune->head.flags |= HAMMER_IOC_HEAD_INTR;
-               error = 0;
-       }
-       return(error);
-}
-
-/*
- * Check pruning list.  The list must be sorted in descending order.
- */
-static int
-check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm,
-           int *realign_cre, int *realign_del)
-{
-       struct hammer_ioc_prune_elm *scan;
-       int i;
-
-       *realign_cre = -1;
-       *realign_del = -1;
-
-       /*
-        * If pruning everything remove all records with a non-zero
-        * delete_tid.
-        */
-       if (prune->head.flags & HAMMER_IOC_PRUNE_ALL) {
-               if (elm->base.delete_tid != 0)
-                       return(0);
-               return(-1);
-       }
-
-       for (i = 0; i < prune->nelms; ++i) {
-               scan = &prune->elms[i];
-
-               /*
-                * Locate the scan index covering the create and delete TIDs.
-                */
-               if (*realign_cre < 0 &&
-                   elm->base.create_tid >= scan->beg_tid &&
-                   elm->base.create_tid < scan->end_tid) {
-                       *realign_cre = i;
-               }
-               if (*realign_del < 0 && elm->base.delete_tid &&
-                   elm->base.delete_tid > scan->beg_tid &&
-                   elm->base.delete_tid <= scan->end_tid) {
-                       *realign_del = i;
-               }
-
-               /*
-                * Now check for loop termination.
-                */
-               if (elm->base.create_tid >= scan->end_tid ||
-                   elm->base.delete_tid > scan->end_tid) {
-                       break;
-               }
-
-               /*
-                * Now determine if we can delete the record.
-                */
-               if (elm->base.delete_tid &&
-                   elm->base.create_tid >= scan->beg_tid &&
-                   elm->base.delete_tid <= scan->end_tid &&
-                   elm->base.create_tid / scan->mod_tid ==
-                   elm->base.delete_tid / scan->mod_tid) {
-                       return(0);
-               }
-       }
-       return(-1);
-}
-
-/*
- * Align the record to cover any gaps created through the deletion of
- * records within the pruning space.  If we were to just delete the records
- * there would be gaps which in turn would cause a snapshot that is NOT on
- * a pruning boundary to appear corrupt to the user.  Forcing alignment
- * of the create_tid and delete_tid for retained records 'reconnects'
- * the previously contiguous space, making it contiguous again after the
- * deletions.
- *
- * The use of a reverse iteration allows us to safely align the records and
- * related elements without creating temporary overlaps.  XXX we should
- * add ordering dependancies for record buffers to guarantee consistency
- * during recovery.
- */
-static int
-realign_prune(struct hammer_ioc_prune *prune,
-             hammer_cursor_t cursor, int realign_cre, int realign_del)
-{
-       hammer_btree_elm_t elm;
-       hammer_tid_t delta;
-       hammer_tid_t mod;
-       hammer_tid_t tid;
-       int error;
-
-       hammer_cursor_downgrade(cursor);
-
-       elm = &cursor->node->ondisk->elms[cursor->index];
-       ++prune->stat_realignments;
-
-       /*
-        * Align the create_tid.  By doing a reverse iteration we guarantee
-        * that all records after our current record have already been
-        * aligned, allowing us to safely correct the right-hand-boundary
-        * (because no record to our right if otherwise exactly matching
-        * will have a create_tid to the left of our aligned create_tid).
-        *
-        * Ordering is important here XXX but disk write ordering for
-        * inter-cluster corrections is not currently guaranteed.
-        */
-       error = 0;
-       if (realign_cre >= 0) {
-               mod = prune->elms[realign_cre].mod_tid;
-               delta = elm->leaf.base.create_tid % mod;
-               if (delta) {
-                       tid = elm->leaf.base.create_tid - delta + mod;
-
-                       /* can EDEADLK */
-                       error = hammer_btree_correct_rhb(cursor, tid + 1);
-                       if (error == 0) {
-                               error = hammer_btree_extract(cursor,
-                                                    HAMMER_CURSOR_GET_RECORD);
-                       }
-                       if (error == 0) {
-                               /* can EDEADLK */
-                               error = hammer_cursor_upgrade(cursor);
-                       }
-                       if (error == 0) {
-                               hammer_modify_record_field(cursor->trans,
-                                           cursor->record_buffer,
-                                           cursor->record,
-                                           base.base.create_tid, 0);
-                               cursor->record->base.base.create_tid = tid;
-                               hammer_modify_record_done(
-                                           cursor->record_buffer,
-                                           cursor->record);
-                               hammer_modify_node(cursor->trans, cursor->node,
-                                           &elm->leaf.base.create_tid,
-                                           sizeof(elm->leaf.base.create_tid));
-                               elm->leaf.base.create_tid = tid;
-                               hammer_modify_node_done(cursor->node);
-                       }
-               }
-       }
-
-       /*
-        * Align the delete_tid.  This only occurs if the record is historical
-        * was deleted at some point.  Realigning the delete_tid does not
-        * move the record within the B-Tree but may cause it to temporarily
-        * overlap a record that has not yet been pruned.
-        */
-       if (error == 0 && realign_del >= 0) {
-               mod = prune->elms[realign_del].mod_tid;
-               delta = elm->leaf.base.delete_tid % mod;
-               if (delta) {
-                       error = hammer_btree_extract(cursor,
-                                                    HAMMER_CURSOR_GET_RECORD);
-                       if (error == 0) {
-                               hammer_modify_node(cursor->trans, cursor->node,
-                                           &elm->leaf.base.delete_tid,
-                                           sizeof(elm->leaf.base.delete_tid));
-                               elm->leaf.base.delete_tid =
-                                           elm->leaf.base.delete_tid -
-                                           delta + mod;
-                               hammer_modify_node_done(cursor->node);
-                               hammer_modify_record_field(cursor->trans,
-                                           cursor->record_buffer,
-                                           cursor->record,
-                                           base.base.delete_tid, 0);
-                               cursor->record->base.base.delete_tid =
-                                           elm->leaf.base.delete_tid;
-                               hammer_modify_record_done(cursor->record_buffer,
-                                           cursor->record);
-                       }
-               }
-       }
-       return (error);
-}
-
 /*
  * Iterate through an object's inode or an object's records and record
  * modification TIDs.
@@ -463,7 +144,7 @@ hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
                cursor.key_beg.key = hist->key;
                cursor.key_end.key = HAMMER_MAX_KEY;
 
-               switch(ip->ino_rec.base.base.obj_type) {
+               switch(ip->ino_data.obj_type) {
                case HAMMER_OBJTYPE_REGFILE:
                        ++cursor.key_beg.key;
                        cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
@@ -521,7 +202,7 @@ add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
        if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
                return;
        if ((hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) &&
-           ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE) {
+           ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE) {
                /*
                 * Adjust nxt_key
                 */
index 393aaf3..60cb768 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.56 2008/05/09 07:26:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.57 2008/05/12 21:17:18 dillon Exp $
  */
 
 #include "hammer.h"
@@ -46,27 +46,27 @@ static int hammer_mem_first(hammer_cursor_t cursor);
 static int
 hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2)
 {
-       if (rec1->rec.base.base.rec_type < rec2->rec.base.base.rec_type)
+       if (rec1->leaf.base.rec_type < rec2->leaf.base.rec_type)
                return(-1);
-       if (rec1->rec.base.base.rec_type > rec2->rec.base.base.rec_type)
+       if (rec1->leaf.base.rec_type > rec2->leaf.base.rec_type)
                return(1);
 
-       if (rec1->rec.base.base.key < rec2->rec.base.base.key)
+       if (rec1->leaf.base.key < rec2->leaf.base.key)
                return(-1);
-       if (rec1->rec.base.base.key > rec2->rec.base.base.key)
+       if (rec1->leaf.base.key > rec2->leaf.base.key)
                return(1);
 
-       if (rec1->rec.base.base.create_tid == 0) {
-               if (rec2->rec.base.base.create_tid == 0)
+       if (rec1->leaf.base.create_tid == 0) {
+               if (rec2->leaf.base.create_tid == 0)
                        return(0);
                return(1);
        }
-       if (rec2->rec.base.base.create_tid == 0)
+       if (rec2->leaf.base.create_tid == 0)
                return(-1);
 
-       if (rec1->rec.base.base.create_tid < rec2->rec.base.base.create_tid)
+       if (rec1->leaf.base.create_tid < rec2->leaf.base.create_tid)
                return(-1);
-       if (rec1->rec.base.base.create_tid > rec2->rec.base.base.create_tid)
+       if (rec1->leaf.base.create_tid > rec2->leaf.base.create_tid)
                return(1);
 
        /*
@@ -83,26 +83,26 @@ hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2)
 static int
 hammer_rec_compare(hammer_base_elm_t info, hammer_record_t rec)
 {
-       if (info->rec_type < rec->rec.base.base.rec_type)
+       if (info->rec_type < rec->leaf.base.rec_type)
                return(-3);
-       if (info->rec_type > rec->rec.base.base.rec_type)
+       if (info->rec_type > rec->leaf.base.rec_type)
                return(3);
 
-        if (info->key < rec->rec.base.base.key)
+        if (info->key < rec->leaf.base.key)
                 return(-2);
-        if (info->key > rec->rec.base.base.key)
+        if (info->key > rec->leaf.base.key)
                 return(2);
 
        if (info->create_tid == 0) {
-               if (rec->rec.base.base.create_tid == 0)
+               if (rec->leaf.base.create_tid == 0)
                        return(0);
                return(1);
        }
-       if (rec->rec.base.base.create_tid == 0)
+       if (rec->leaf.base.create_tid == 0)
                return(-1);
-       if (info->create_tid < rec->rec.base.base.create_tid)
+       if (info->create_tid < rec->leaf.base.create_tid)
                return(-1);
-       if (info->create_tid > rec->rec.base.base.create_tid)
+       if (info->create_tid > rec->leaf.base.create_tid)
                return(1);
         return(0);
 }
@@ -160,16 +160,24 @@ RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
  * returned referenced.
  */
 hammer_record_t
-hammer_alloc_mem_record(hammer_inode_t ip)
+hammer_alloc_mem_record(hammer_inode_t ip, int data_len)
 {
        hammer_record_t record;
 
        ++hammer_count_records;
-       record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO);
+       record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK | M_ZERO);
        record->flush_state = HAMMER_FST_IDLE;
        record->ip = ip;
-       record->rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       record->leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       record->leaf.data_len = data_len;
        hammer_ref(&record->lock);
+
+       if (data_len) {
+               record->data = kmalloc(data_len, M_HAMMER, M_WAITOK | M_ZERO);
+               record->flags |= HAMMER_RECF_ALLOCDATA;
+               ++hammer_count_record_datas;
+       }
+
        return (record);
 }
 
@@ -333,10 +341,10 @@ hammer_rec_scan_callback(hammer_record_t rec, void *data)
         * Skip if not visible due to our as-of TID
         */
         if (cursor->flags & HAMMER_CURSOR_ASOF) {
-                if (cursor->asof < rec->rec.base.base.create_tid)
+                if (cursor->asof < rec->leaf.base.create_tid)
                         return(0);
-                if (rec->rec.base.base.delete_tid &&
-                   cursor->asof >= rec->rec.base.base.delete_tid) {
+                if (rec->leaf.base.delete_tid &&
+                   cursor->asof >= rec->leaf.base.delete_tid) {
                         return(0);
                }
         }
@@ -462,25 +470,22 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        int error;
        int bytes;
 
-       record = hammer_alloc_mem_record(dip);
-
        bytes = ncp->nc_nlen;   /* NOTE: terminating \0 is NOT included */
+       record = hammer_alloc_mem_record(dip, HAMMER_ENTRY_SIZE(bytes));
        if (++trans->hmp->namekey_iterator == 0)
                ++trans->hmp->namekey_iterator;
 
        record->type = HAMMER_MEM_RECORD_ADD;
-       record->rec.entry.base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-       record->rec.entry.base.base.obj_id = dip->obj_id;
-       record->rec.entry.base.base.key =
-               hammer_directory_namekey(ncp->nc_name, bytes);
-       record->rec.entry.base.base.key += trans->hmp->namekey_iterator;
-       record->rec.entry.base.base.rec_type = HAMMER_RECTYPE_DIRENTRY;
-       record->rec.entry.base.base.obj_type = ip->ino_rec.base.base.obj_type;
-       record->rec.entry.obj_id = ip->obj_id;
-       record->data = (void *)ncp->nc_name;
-       record->rec.entry.base.data_len = bytes;
-       ++ip->ino_rec.ino_nlinks;
-       hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
+       record->leaf.base.obj_id = dip->obj_id;
+       record->leaf.base.key = hammer_directory_namekey(ncp->nc_name, bytes);
+       record->leaf.base.key += trans->hmp->namekey_iterator;
+       record->leaf.base.rec_type = HAMMER_RECTYPE_DIRENTRY;
+       record->leaf.base.obj_type = ip->ino_leaf.base.obj_type;
+       record->data->entry.obj_id = ip->obj_id;
+       bcopy(ncp->nc_name, record->data->entry.name, bytes);
+
+       ++ip->ino_data.nlinks;
+       hammer_modify_inode(trans, ip, HAMMER_INODE_DDIRTY);
 
        /*
         * The target inode and the directory entry are bound together.
@@ -521,7 +526,7 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
        hammer_record_t record;
        int error;
 
-       if (cursor->record == &cursor->iprec->rec) {
+       if (cursor->leaf == &cursor->iprec->leaf) {
                /*
                 * In-memory (unsynchronized) records can simply be freed.
                 * Even though the HAMMER_RECF_DELETED_FE flag is ignored
@@ -548,11 +553,9 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
                 * the record's key.  This also causes lookups to skip the
                 * record.
                 */
-               record = hammer_alloc_mem_record(dip);
+               record = hammer_alloc_mem_record(dip, 0);
                record->type = HAMMER_MEM_RECORD_DEL;
-               record->rec.entry.base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-               record->rec.entry.base.base = cursor->record->base.base;
-               hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
+               record->leaf.base = cursor->leaf->base;
 
                record->target_ip = ip;
                record->flush_state = HAMMER_FST_SETUP;
@@ -585,9 +588,9 @@ hammer_ip_del_directory(struct hammer_transaction *trans,
         * on-media until we unmount.
         */
        if (error == 0) {
-               --ip->ino_rec.ino_nlinks;
-               hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
-               if (ip->ino_rec.ino_nlinks == 0 &&
+               --ip->ino_data.nlinks;
+               hammer_modify_inode(trans, ip, HAMMER_INODE_DDIRTY);
+               if (ip->ino_data.nlinks == 0 &&
                    (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))) {
                        hammer_done_cursor(cursor);
                        hammer_inode_unloadable_check(ip, 1);
@@ -618,12 +621,8 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record)
        hammer_inode_t ip = record->ip;
        int error;
 
-       record->rec.base.base.obj_id = ip->obj_id;
-       record->rec.base.base.obj_type = ip->ino_rec.base.base.obj_type;
-
-       hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY);
-
-       /* NOTE: copies record->data */
+       record->leaf.base.obj_id = ip->obj_id;
+       record->leaf.base.obj_type = ip->ino_leaf.base.obj_type;
        error = hammer_mem_add(trans, record);
        return(error);
 }
@@ -644,9 +643,8 @@ hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip,
                       int64_t offset, void *data, int bytes)
 {
        hammer_transaction_t trans = cursor->trans;
-       hammer_record_ondisk_t rec;
-       union hammer_btree_elm elm;
-       hammer_off_t rec_offset, data_offset;
+       struct hammer_btree_leaf_elm elm;
+       hammer_off_t data_offset;
        void *bdata;
        int error;
 
@@ -683,49 +681,36 @@ retry:
                goto done;
 
        /*
-        * Allocate record and data space.  HAMMER_RECTYPE_DATA records
-        * can cross buffer boundaries so we may have to split our bcopy.
+        * Allocate our data.  The data buffer is not marked modified (yet)
         */
-       rec = hammer_alloc_record(trans, &rec_offset, HAMMER_RECTYPE_DATA,
-                                 &cursor->record_buffer,
-                                 bytes, &bdata,
-                                 &data_offset, &cursor->data_buffer, &error);
-       if (rec == NULL)
+       bdata = hammer_alloc_data(trans, bytes, &data_offset,
+                                 &cursor->data_buffer, &error);
+
+       if (bdata == NULL)
                goto done;
-       if (hammer_debug_general & 0x1000)
-               kprintf("OOB RECOR2 DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, rec->base.data_len);
 
        /*
         * Fill everything in and insert our B-Tree node.
         *
-        * NOTE: hammer_alloc_record() has already marked the related
-        * buffers as modified.  If we do it again we will generate
-        * unnecessary undo elements.
+        * NOTE: hammer_alloc_data() has already marked the data buffer
+        * as modified.  If we do it again we will generate unnecessary
+        * undo elements.
         */
-       hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0);
-       rec->base.base.btype = HAMMER_BTREE_TYPE_RECORD;
-       rec->base.base.obj_id = ip->obj_id;
-       rec->base.base.key = offset + bytes;
-       rec->base.base.create_tid = trans->tid;
-       rec->base.base.delete_tid = 0;
-       rec->base.base.rec_type = HAMMER_RECTYPE_DATA;
-       rec->base.data_crc = crc32(data, bytes);
-       rec->base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-       rec->base.rec_crc = crc32(&rec->base.data_crc,
-                                 HAMMER_RECORD_CRCSIZE);
-       hammer_modify_buffer_done(cursor->record_buffer);
-       KKASSERT(rec->base.data_len == bytes);
+       elm.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       elm.base.obj_id = ip->obj_id;
+       elm.base.key = offset + bytes;
+       elm.base.create_tid = trans->tid;
+       elm.base.delete_tid = 0;
+       elm.base.rec_type = HAMMER_RECTYPE_DATA;
+       elm.atime = 0;
+       elm.data_offset = data_offset;
+       elm.data_len = bytes;
+       elm.data_crc = crc32(data, bytes);
 
        hammer_modify_buffer(trans, cursor->data_buffer, NULL, 0);
        bcopy(data, bdata, bytes);
        hammer_modify_buffer_done(cursor->data_buffer);
 
-       elm.leaf.base = rec->base.base;
-       elm.leaf.rec_offset = rec_offset;
-       elm.leaf.data_offset = rec->base.data_off;
-       elm.leaf.data_len = bytes;
-       elm.leaf.data_crc = rec->base.data_crc;
-
        /*
         * Data records can wind up on-disk before the inode itself is
         * on-disk.  One must assume data records may be on-disk if either
@@ -737,11 +722,6 @@ retry:
        if (error == 0)
                goto done;
 
-       hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0);
-       rec->base.signature = HAMMER_RECORD_SIGNATURE_DESTROYED;
-       hammer_modify_buffer_done(cursor->record_buffer);
-
-       hammer_blockmap_free(trans, rec_offset, HAMMER_RECORD_SIZE);
        hammer_blockmap_free(trans, data_offset, bytes);
 done:
        if (error == EDEADLK) {
@@ -793,19 +773,14 @@ int
 hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
 {
        hammer_transaction_t trans = cursor->trans;
-       hammer_record_ondisk_t rec;
-       union hammer_btree_elm elm;
-       hammer_off_t rec_offset;
-       hammer_off_t data_offset;
        void *bdata;
        int error;
 
        KKASSERT(record->flush_state == HAMMER_FST_FLUSH);
        KKASSERT(record->flags & HAMMER_RECF_INTERLOCK_BE);
-       KKASSERT(record->rec.base.signature == HAMMER_RECORD_SIGNATURE_GOOD);
 
        hammer_normalize_cursor(cursor);
-       cursor->key_beg = record->rec.base.base;
+       cursor->key_beg = record->leaf.base;
        cursor->flags &= ~HAMMER_CURSOR_INITMASK;
        cursor->flags |= HAMMER_CURSOR_BACKEND;
        cursor->flags &= ~HAMMER_CURSOR_INSERT;
@@ -841,18 +816,18 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
                        kprintf("DOINSERT LOOKUP %d\n", error);
                if (error)
                        break;
-               if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) {
+               if (record->leaf.base.rec_type != HAMMER_RECTYPE_DIRENTRY) {
                        kprintf("hammer_ip_sync_record: duplicate rec "
-                               "at (%016llx)\n", record->rec.base.base.key);
+                               "at (%016llx)\n", record->leaf.base.key);
                        Debugger("duplicate record1");
                        error = EIO;
                        break;
                }
                if (++trans->hmp->namekey_iterator == 0)
                        ++trans->hmp->namekey_iterator;
-               record->rec.base.base.key &= ~(0xFFFFFFFFLL);
-               record->rec.base.base.key |= trans->hmp->namekey_iterator;
-               cursor->key_beg.key = record->rec.base.base.key;
+               record->leaf.base.key &= ~(0xFFFFFFFFLL);
+               record->leaf.base.key |= trans->hmp->namekey_iterator;
+               cursor->key_beg.key = record->leaf.base.key;
        }
        if (error != ENOENT)
                goto done;
@@ -864,72 +839,24 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
         *
         * Support zero-fill records (data == NULL and data_len != 0)
         */
-       if (record->data == NULL) {
-               rec = hammer_alloc_record(trans, &rec_offset,
-                                         record->rec.base.base.rec_type,
-                                         &cursor->record_buffer,
-                                         0, &bdata,
-                                         NULL, NULL, &error);
-               if (hammer_debug_general & 0x1000)
-                       kprintf("NULL RECORD DATA\n");
-       } else if (record->flags & HAMMER_RECF_INBAND) {
-               rec = hammer_alloc_record(trans, &rec_offset,
-                                         record->rec.base.base.rec_type,
-                                         &cursor->record_buffer,
-                                         record->rec.base.data_len, &bdata,
-                                         NULL, NULL, &error);
-               if (hammer_debug_general & 0x1000)
-                       kprintf("INBAND RECORD DATA %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len);
-       } else {
-               rec = hammer_alloc_record(trans, &rec_offset,
-                                         record->rec.base.base.rec_type,
-                                         &cursor->record_buffer,
-                                         record->rec.base.data_len, &bdata,
-                                         &data_offset,
+       if (record->data && record->leaf.data_len) {
+               bdata = hammer_alloc_data(trans, record->leaf.data_len,
+                                         &record->leaf.data_offset,
                                          &cursor->data_buffer, &error);
-               if (hammer_debug_general & 0x1000)
-                       kprintf("OOB RECORD DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len);
-       }
-
-       if (rec == NULL)
-               goto done;
-
-       /*
-        * Fill in the remaining fields and insert our B-Tree node.
-        */
-       if (hammer_debug_inode)
-               kprintf("COPYREC %p\n", rec);
-       hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0);
-       rec->base.base = record->rec.base.base;
-       bcopy(&record->rec.base + 1, &rec->base + 1,
-             HAMMER_RECORD_SIZE - sizeof(record->rec.base));
-
-       /*
-        * Copy the data and deal with zero-fill support.
-        */
-       if (record->data && (record->flags & HAMMER_RECF_INBAND)) {
-               rec->base.data_crc = crc32(record->data, rec->base.data_len);
-               bcopy(record->data, bdata, rec->base.data_len);
-       } else if (record->data) {
-               rec->base.data_crc = crc32(record->data, rec->base.data_len);
+               if (bdata == NULL)
+                       goto done;
+               record->leaf.data_crc = crc32(record->data,
+                                             record->leaf.data_len);
                hammer_modify_buffer(trans, cursor->data_buffer, NULL, 0);
-               bcopy(record->data, bdata, rec->base.data_len);
+               bcopy(record->data, bdata, record->leaf.data_len);
                hammer_modify_buffer_done(cursor->data_buffer);
        } else {
-               rec->base.data_len = record->rec.base.data_len;
+               /* record->leaf.data_len can be non-zero for future zero-fill */
+               record->leaf.data_offset = 0;
+               record->leaf.data_crc = 0;
        }
-       rec->base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-       rec->base.rec_crc = crc32(&rec->base.data_crc,
-                                 HAMMER_RECORD_CRCSIZE);
-       hammer_modify_buffer_done(cursor->record_buffer);
-
-       elm.leaf.base = record->rec.base.base;
-       elm.leaf.rec_offset = rec_offset;
-       elm.leaf.data_offset = rec->base.data_off;
-       elm.leaf.data_len = rec->base.data_len;
-       elm.leaf.data_crc = rec->base.data_crc;
 
-       error = hammer_btree_insert(cursor, &elm);
+       error = hammer_btree_insert(cursor, &record->leaf);
        if (hammer_debug_inode)
                kprintf("BTREE INSERT error %d @ %016llx:%d\n", error, cursor->node->node_offset, cursor->index);
 
@@ -957,13 +884,9 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
                        record->flags |= HAMMER_RECF_DELETED_BE;
                }
        } else {
-               hammer_modify_buffer(trans, cursor->record_buffer, NULL, 0);
-               rec->base.signature = HAMMER_RECORD_SIGNATURE_DESTROYED;
-               hammer_modify_buffer_done(cursor->record_buffer);
-               hammer_blockmap_free(trans, rec_offset, HAMMER_RECORD_SIZE);
-               if (record->data && (record->flags & HAMMER_RECF_INBAND) == 0) {
-                       hammer_blockmap_free(trans, data_offset,
-                                            record->rec.base.data_len);
+               if (record->leaf.data_offset) {
+                       hammer_blockmap_free(trans, record->leaf.data_offset,
+                                            record->leaf.data_len);
                }
        }
 
@@ -987,60 +910,26 @@ static
 int
 hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
 {
-       void *data;
-       int bytes;
-       int reclen;
-               
-       KKASSERT(record->rec.base.signature == HAMMER_RECORD_SIGNATURE_GOOD);
-
        /*
         * Make a private copy of record->data
         */
-       if (record->data) {
-               /*
-                * Try to embed the data in extra space in the record
-                * union, otherwise allocate a copy.
-                */
-               bytes = record->rec.base.data_len;
-               switch(record->rec.base.base.rec_type) {
-               case HAMMER_RECTYPE_DIRENTRY:
-                       reclen = offsetof(struct hammer_entry_record, name[0]);
-                       break;
-               case HAMMER_RECTYPE_DATA:
-                       reclen = offsetof(struct hammer_data_record, data[0]);
-                       break;
-               default:
-                       reclen = sizeof(record->rec);
-                       break;
-               }
-               if (reclen + bytes <= HAMMER_RECORD_SIZE) {
-                       bcopy(record->data, (char *)&record->rec + reclen,
-                             bytes);
-                       record->data = (void *)((char *)&record->rec + reclen);
-                       record->flags |= HAMMER_RECF_INBAND;
-               } else {
-                       ++hammer_count_record_datas;
-                       data = kmalloc(bytes, M_HAMMER, M_WAITOK);
-                       record->flags |= HAMMER_RECF_ALLOCDATA;
-                       bcopy(record->data, data, bytes);
-                       record->data = data;
-               }
-       }
+       if (record->data)
+               KKASSERT(record->flags & HAMMER_RECF_ALLOCDATA);
 
        /*
         * Insert into the RB tree, find an unused iterator if this is
         * a directory entry.
         */
        while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) {
-               if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){
+               if (record->leaf.base.rec_type != HAMMER_RECTYPE_DIRENTRY){
                        record->flags |= HAMMER_RECF_DELETED_FE;
                        hammer_rel_mem_record(record);
                        return (EEXIST);
                }
                if (++trans->hmp->namekey_iterator == 0)
                        ++trans->hmp->namekey_iterator;
-               record->rec.base.base.key &= ~(0xFFFFFFFFLL);
-               record->rec.base.base.key |= trans->hmp->namekey_iterator;
+               record->leaf.base.key &= ~(0xFFFFFFFFLL);
+               record->leaf.base.key |= trans->hmp->namekey_iterator;
        }
        record->flags |= HAMMER_RECF_ONRBTREE;
        hammer_modify_inode(trans, record->ip, HAMMER_INODE_XDIRTY);
@@ -1074,7 +963,7 @@ hammer_ip_lookup(hammer_cursor_t cursor)
        KKASSERT(cursor->ip);
        error = hammer_mem_lookup(cursor);
        if (error == 0) {
-               cursor->record = &cursor->iprec->rec;
+               cursor->leaf = &cursor->iprec->leaf;
                return(error);
        }
        if (error != ENOENT)
@@ -1087,7 +976,7 @@ hammer_ip_lookup(hammer_cursor_t cursor)
                return(error);
        error = hammer_btree_lookup(cursor);
        if (error == 0)
-               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
+               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF);
        return(error);
 }
 
@@ -1270,10 +1159,10 @@ next_memory:
                 * in front of the memory entry.
                 */
                elm = &cursor->node->ondisk->elms[cursor->index];
-               r = hammer_btree_cmp(&elm->base, &cursor->iprec->rec.base.base);
+               r = hammer_btree_cmp(&elm->base, &cursor->iprec->leaf.base);
                if (r < 0) {
                        error = hammer_btree_extract(cursor,
-                                                    HAMMER_CURSOR_GET_RECORD);
+                                                    HAMMER_CURSOR_GET_LEAF);
                        cursor->flags |= HAMMER_CURSOR_ATEDISK;
                        break;
                }
@@ -1306,7 +1195,7 @@ next_memory:
                /*
                 * Only the memory entry is valid.
                 */
-               cursor->record = &cursor->iprec->rec;
+               cursor->leaf = &cursor->iprec->leaf;
                cursor->flags |= HAMMER_CURSOR_ATEMEM;
 
                /*
@@ -1323,7 +1212,7 @@ next_memory:
                /*
                 * Only the disk entry is valid
                 */
-               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
+               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF);
                cursor->flags |= HAMMER_CURSOR_ATEDISK;
                break;
        default:
@@ -1332,7 +1221,7 @@ next_memory:
                 *
                 * XXX error not set properly
                 */
-               cursor->record = NULL;
+               cursor->leaf = NULL;
                error = ENOENT;
                break;
        }
@@ -1348,30 +1237,17 @@ hammer_ip_resolve_data(hammer_cursor_t cursor)
 {
        int error;
 
-       if (cursor->iprec && cursor->record == &cursor->iprec->rec) {
+       if (cursor->iprec && cursor->leaf == &cursor->iprec->leaf) {
+               cursor->leaf = &cursor->iprec->leaf;
                cursor->data = cursor->iprec->data;
                error = 0;
        } else {
+               cursor->leaf = &cursor->node->ondisk->elms[cursor->index].leaf;
                error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA);
        }
        return(error);
 }
 
-int
-hammer_ip_resolve_record_and_data(hammer_cursor_t cursor)
-{
-       int error;
-
-       if (cursor->iprec && cursor->record == &cursor->iprec->rec) {
-               cursor->data = cursor->iprec->data;
-               error = 0;
-       } else {
-               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
-                                                    HAMMER_CURSOR_GET_RECORD);
-       }
-       return(error);
-}
-
 /*
  * Delete all records within the specified range for inode ip.
  *
@@ -1388,8 +1264,7 @@ hammer_ip_delete_range(hammer_cursor_t cursor, hammer_inode_t ip,
                       int64_t ran_beg, int64_t ran_end)
 {
        hammer_transaction_t trans = cursor->trans;
-       hammer_record_ondisk_t rec;
-       hammer_base_elm_t base;
+       hammer_btree_leaf_elm_t leaf;
        int error;
        int64_t off;
 
@@ -1411,7 +1286,7 @@ retry:
        cursor->flags |= HAMMER_CURSOR_BACKEND;
 
        cursor->key_end = cursor->key_beg;
-       if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
+       if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
                cursor->key_beg.key = ran_beg;
                cursor->key_beg.rec_type = HAMMER_RECTYPE_DB;
                cursor->key_end.rec_type = HAMMER_RECTYPE_DB;
@@ -1441,10 +1316,9 @@ retry:
         * Iterate through matching records and mark them as deleted.
         */
        while (error == 0) {
-               rec = cursor->record;
-               base = &rec->base.base;
+               leaf = cursor->leaf;
 
-               KKASSERT(base->delete_tid == 0);
+               KKASSERT(leaf->base.delete_tid == 0);
 
                /*
                 * There may be overlap cases for regular file data.  Also
@@ -1453,21 +1327,22 @@ retry:
                 * base offset.
                 */
 #if 0
-               kprintf("delete_range rec_type %02x\n", base->rec_type);
+               kprintf("delete_range rec_type %02x\n", leaf->base.rec_type);
 #endif
-               if (base->rec_type == HAMMER_RECTYPE_DATA) {
+               if (leaf->base.rec_type == HAMMER_RECTYPE_DATA) {
 #if 0
                        kprintf("delete_range loop key %016llx,%d\n",
-                               base->key - rec->base.data_len, rec->base.data_len);
+                               leaf->base.key - leaf->data_len,
+                               leaf->data_len);
 #endif
-                       off = base->key - rec->base.data_len;
+                       off = leaf->base.key - leaf->data_len;
                        /*
                         * Check the left edge case.  We currently do not
                         * split existing records.
                         */
                        if (off < ran_beg) {
                                panic("hammer left edge case %016llx %d\n",
-                                       base->key, rec->base.data_len);
+                                       leaf->base.key, leaf->data_len);
                        }
 
                        /*
@@ -1483,8 +1358,8 @@ retry:
                         * we missing a + 1 somewhere?  Note that ran_end
                         * could overflow.
                         */
-                       if (base->key - 1 > ran_end) {
-                               if (base->key - rec->base.data_len > ran_end)
+                       if (leaf->base.key - 1 > ran_end) {
+                               if (leaf->base.key - leaf->data_len > ran_end)
                                        break;
                                panic("hammer right edge case\n");
                        }
@@ -1523,8 +1398,7 @@ hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip,
                           int *countp)
 {
        hammer_transaction_t trans = cursor->trans;
-       hammer_record_ondisk_t rec;
-       hammer_base_elm_t base;
+       hammer_btree_leaf_elm_t leaf;
        int error;
 
        KKASSERT(trans->type == HAMMER_TRANS_FLS);
@@ -1553,10 +1427,9 @@ retry:
         * Iterate through matching records and mark them as deleted.
         */
        while (error == 0) {
-               rec = cursor->record;
-               base = &rec->base.base;
+               leaf = cursor->leaf;
 
-               KKASSERT(base->delete_tid == 0);
+               KKASSERT(leaf->base.delete_tid == 0);
 
                /*
                 * Mark the record and B-Tree entry as deleted.  This will
@@ -1568,7 +1441,7 @@ retry:
                 * Directory entries (and delete-on-disk directory entries)
                 * must be synced and cannot be deleted.
                 */
-               if (rec->base.base.rec_type != HAMMER_RECTYPE_DIRENTRY) {
+               if (leaf->base.rec_type != HAMMER_RECTYPE_DIRENTRY) {
                        error = hammer_ip_delete_record(cursor, trans->tid);
                        ++*countp;
                }
@@ -1613,7 +1486,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
         * individually synchronized.  Thus there should be no confusion with
         * the interlock.
         */
-       if (cursor->record == &cursor->iprec->rec) {
+       if (cursor->leaf == &cursor->iprec->leaf) {
                KKASSERT((cursor->iprec->flags & HAMMER_RECF_INTERLOCK_BE) ==0);
                cursor->iprec->flags |= HAMMER_RECF_DELETED_FE;
                cursor->iprec->flags |= HAMMER_RECF_DELETED_BE;
@@ -1625,7 +1498,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
         * This does not effect their position in the B-Tree (which is based
         * on their create_tid).
         */
-       error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
+       error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF);
        elm = NULL;
        hmp = cursor->node->hmp;
 
@@ -1654,15 +1527,6 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
                         * this could result in a duplicate record.
                         */
                        KKASSERT(elm->leaf.base.delete_tid != elm->leaf.base.create_tid);
-
-                       hammer_modify_record_field(cursor->trans, cursor->record_buffer, cursor->record, base.base.delete_tid, dodelete);
-                       cursor->record->base.base.delete_tid = tid;
-                       if (dodelete) {
-                               cursor->record->base.signature =
-                                           HAMMER_RECORD_SIGNATURE_DESTROYED;
-                       }
-                       hammer_modify_record_done(cursor->record_buffer,
-                                                 cursor->record);
                }
        }
 
@@ -1680,7 +1544,6 @@ int
 hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes)
 {
        hammer_btree_elm_t elm;
-       hammer_off_t rec_offset;
        hammer_off_t data_offset;
        int32_t data_len;
        u_int16_t rec_type;
@@ -1689,7 +1552,6 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes)
        elm = &cursor->node->ondisk->elms[cursor->index];
        KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
 
-       rec_offset = elm->leaf.rec_offset;
        data_offset = elm->leaf.data_offset;
        data_len = elm->leaf.data_len;
        rec_type = elm->leaf.base.rec_type;
@@ -1706,10 +1568,6 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes)
                        cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
                }
        }
-       if (error == 0) {
-               hammer_blockmap_free(cursor->trans, rec_offset,
-                                    sizeof(union hammer_record_ondisk));
-       }
        if (error == 0) {
                switch(data_offset & HAMMER_OFF_ZONE_MASK) {
                case HAMMER_ZONE_LARGE_DATA:
@@ -1721,13 +1579,6 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes)
                        break;
                }
        }
-#if 0
-       kprintf("hammer_delete_at_cursor: %d:%d:%08x %08x/%d "
-               "(%d remain in cluster)\n",
-               cluster->volume->vol_no, cluster->clu_no,
-               rec_offset, data_offset, data_len,
-               cluster->ondisk->stat_records);
-#endif
        return (error);
 }
 
index 4dccece..6b88b54 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.41 2008/05/05 20:34:48 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.42 2008/05/12 21:17:18 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -1039,8 +1039,7 @@ hammer_rel_node(hammer_node_t node)
 }
 
 /*
- *
- *
+ * Free space on-media associated with a B-Tree node.
  */
 void
 hammer_delete_node(hammer_transaction_t trans, hammer_node_t node)
@@ -1194,6 +1193,8 @@ hammer_alloc_btree(hammer_transaction_t trans, int *errorp)
        return(node);
 }
 
+#if 0
+
 /*
  * The returned buffers are already appropriately marked as being modified.
  * If the caller marks them again unnecessary undo records may be generated.
@@ -1310,6 +1311,8 @@ hammer_alloc_record(hammer_transaction_t trans,
        return(rec);
 }
 
+#endif
+
 /*
  * Allocate data.  If the address of a data buffer is supplied then
  * any prior non-NULL *data_bufferp will be released and *data_bufferp
similarity index 57%
copy from sys/vfs/hammer/hammer_ioctl.c
copy to sys/vfs/hammer/hammer_prune.c
index 0eb7c87..2be96ff 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.16 2008/05/12 05:13:11 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.1 2008/05/12 21:17:18 dillon Exp $
  */
 
 #include "hammer.h"
 
-static int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
-                               struct hammer_ioc_prune *prune);
-static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
-                               struct hammer_ioc_history *hist);
-
-int
-hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
-            struct ucred *cred)
-{
-       struct hammer_transaction trans;
-       int error;
-
-       error = suser_cred(cred, PRISON_ROOT);
-
-       hammer_start_transaction(&trans, ip->hmp);
-
-       switch(com) {
-       case HAMMERIOC_PRUNE:
-               if (error == 0) {
-                       error = hammer_ioc_prune(&trans, ip,
-                                       (struct hammer_ioc_prune *)data);
-               }
-               break;
-       case HAMMERIOC_GETHISTORY:
-               error = hammer_ioc_gethistory(&trans, ip,
-                                       (struct hammer_ioc_history *)data);
-               break;
-       case HAMMERIOC_REBLOCK:
-               error = hammer_ioc_reblock(&trans, ip,
-                                       (struct hammer_ioc_reblock *)data);
-               break;
-       default:
-               error = EOPNOTSUPP;
-               break;
-       }
-       hammer_done_transaction(&trans);
-       return (error);
-}
-
 /*
  * Iterate through the specified range of object ids and remove any
  * deleted records that fall entirely within a prune modulo.
@@ -88,7 +49,7 @@ static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm,
 static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor,
                        int realign_cre, int realign_del);
 
-static int
+int
 hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
                 struct hammer_ioc_prune *prune)
 {
@@ -106,7 +67,7 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
        if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
                return(EINVAL);
 
-       prune->cur_obj_id = cursor.key_end.obj_id;
+       prune->cur_obj_id = prune->end_obj_id;
        prune->cur_key = HAMMER_MAX_KEY;
 
 retry:
@@ -132,6 +93,12 @@ retry:
        cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
        cursor.flags |= HAMMER_CURSOR_BACKEND;
 
+       /*
+        * This flag allows the B-Tree code to clean up loose ends while
+        * it is scanning.
+        */
+       cursor.flags |= HAMMER_CURSOR_PRUNING;
+
        error = hammer_btree_last(&cursor);
        while (error == 0) {
                elm = &cursor.node->ondisk->elms[cursor.index];
@@ -334,21 +301,13 @@ realign_prune(struct hammer_ioc_prune *prune,
                        error = hammer_btree_correct_rhb(cursor, tid + 1);
                        if (error == 0) {
                                error = hammer_btree_extract(cursor,
-                                                    HAMMER_CURSOR_GET_RECORD);
+                                                    HAMMER_CURSOR_GET_LEAF);
                        }
                        if (error == 0) {
                                /* can EDEADLK */
                                error = hammer_cursor_upgrade(cursor);
                        }
                        if (error == 0) {
-                               hammer_modify_record_field(cursor->trans,
-                                           cursor->record_buffer,
-                                           cursor->record,
-                                           base.base.create_tid, 0);
-                               cursor->record->base.base.create_tid = tid;
-                               hammer_modify_record_done(
-                                           cursor->record_buffer,
-                                           cursor->record);
                                hammer_modify_node(cursor->trans, cursor->node,
                                            &elm->leaf.base.create_tid,
                                            sizeof(elm->leaf.base.create_tid));
@@ -369,7 +328,7 @@ realign_prune(struct hammer_ioc_prune *prune,
                delta = elm->leaf.base.delete_tid % mod;
                if (delta) {
                        error = hammer_btree_extract(cursor,
-                                                    HAMMER_CURSOR_GET_RECORD);
+                                                    HAMMER_CURSOR_GET_LEAF);
                        if (error == 0) {
                                hammer_modify_node(cursor->trans, cursor->node,
                                            &elm->leaf.base.delete_tid,
@@ -378,224 +337,9 @@ realign_prune(struct hammer_ioc_prune *prune,
                                            elm->leaf.base.delete_tid -
                                            delta + mod;
                                hammer_modify_node_done(cursor->node);
-                               hammer_modify_record_field(cursor->trans,
-                                           cursor->record_buffer,
-                                           cursor->record,
-                                           base.base.delete_tid, 0);
-                               cursor->record->base.base.delete_tid =
-                                           elm->leaf.base.delete_tid;
-                               hammer_modify_record_done(cursor->record_buffer,
-                                           cursor->record);
                        }
                }
        }
        return (error);
 }
 
-/*
- * Iterate through an object's inode or an object's records and record
- * modification TIDs.
- */
-static void add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
-                       hammer_btree_elm_t elm);
-
-static
-int
-hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
-                     struct hammer_ioc_history *hist)
-{
-       struct hammer_cursor cursor;
-       hammer_btree_elm_t elm;
-       int error;
-
-       /*
-        * Validate the structure and initialize for return.
-        */
-       if (hist->beg_tid > hist->end_tid)
-               return(EINVAL);
-       if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
-               if (hist->key > hist->nxt_key)
-                       return(EINVAL);
-       }
-
-       hist->obj_id = ip->obj_id;
-       hist->count = 0;
-       hist->nxt_tid = hist->end_tid;
-       hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_TID;
-       hist->head.flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY;
-       hist->head.flags &= ~HAMMER_IOC_HISTORY_EOF;
-       hist->head.flags &= ~HAMMER_IOC_HISTORY_UNSYNCED;
-       if ((ip->flags & HAMMER_INODE_MODMASK) & ~HAMMER_INODE_ITIMES)
-               hist->head.flags |= HAMMER_IOC_HISTORY_UNSYNCED;
-
-       /*
-        * Setup the cursor.  We can't handle undeletable records
-        * (create_tid of 0) at the moment.  A create_tid of 0 has
-        * a special meaning and cannot be specified in the cursor.
-        */
-       error = hammer_init_cursor(trans, &cursor, &ip->cache[0], NULL);
-       if (error) {
-               hammer_done_cursor(&cursor);
-               return(error);
-       }
-
-       cursor.key_beg.obj_id = hist->obj_id;
-       cursor.key_beg.create_tid = hist->beg_tid;
-       cursor.key_beg.delete_tid = 0;
-       cursor.key_beg.obj_type = 0;
-       if (cursor.key_beg.create_tid == HAMMER_MIN_TID)
-               cursor.key_beg.create_tid = 1;
-
-       cursor.key_end.obj_id = hist->obj_id;
-       cursor.key_end.create_tid = hist->end_tid;
-       cursor.key_end.delete_tid = 0;
-       cursor.key_end.obj_type = 0;
-
-       cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE;
-
-       if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
-               /*
-                * key-range within the file.  For a regular file the
-                * on-disk key represents BASE+LEN, not BASE, so the
-                * first possible record containing the offset 'key'
-                * has an on-disk key of (key + 1).
-                */
-               cursor.key_beg.key = hist->key;
-               cursor.key_end.key = HAMMER_MAX_KEY;
-
-               switch(ip->ino_rec.base.base.obj_type) {
-               case HAMMER_OBJTYPE_REGFILE:
-                       ++cursor.key_beg.key;
-                       cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
-                       break;
-               case HAMMER_OBJTYPE_DIRECTORY:
-                       cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
-                       break;
-               case HAMMER_OBJTYPE_DBFILE:
-                       cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
-                       break;
-               default:
-                       error = EINVAL;
-                       break;
-               }
-               cursor.key_end.rec_type = cursor.key_beg.rec_type;
-       } else {
-               /*
-                * The inode itself.
-                */
-               cursor.key_beg.key = 0;
-               cursor.key_end.key = 0;
-               cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
-               cursor.key_end.rec_type = HAMMER_RECTYPE_INODE;
-       }
-
-       error = hammer_btree_first(&cursor);
-       while (error == 0) {
-               elm = &cursor.node->ondisk->elms[cursor.index];
-
-               add_history(ip, hist, elm);
-               if (hist->head.flags & (HAMMER_IOC_HISTORY_NEXT_TID |
-                                       HAMMER_IOC_HISTORY_NEXT_KEY |
-                                       HAMMER_IOC_HISTORY_EOF)) {
-                       break;
-               }
-               error = hammer_btree_iterate(&cursor);
-       }
-       if (error == ENOENT) {
-               hist->head.flags |= HAMMER_IOC_HISTORY_EOF;
-               error = 0;
-       }
-       hammer_done_cursor(&cursor);
-       return(error);
-}
-
-/*
- * Add the scanned element to the ioctl return structure.  Some special
- * casing is required for regular files to accomodate how data ranges are
- * stored on-disk.
- */
-static void
-add_history(hammer_inode_t ip, struct hammer_ioc_history *hist,
-           hammer_btree_elm_t elm)
-{
-       if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
-               return;
-       if ((hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) &&
-           ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE) {
-               /*
-                * Adjust nxt_key
-                */
-               if (hist->nxt_key > elm->leaf.base.key - elm->leaf.data_len &&
-                   hist->key < elm->leaf.base.key - elm->leaf.data_len) {
-                       hist->nxt_key = elm->leaf.base.key - elm->leaf.data_len;
-               }
-               if (hist->nxt_key > elm->leaf.base.key)
-                       hist->nxt_key = elm->leaf.base.key;
-
-               /*
-                * Record is beyond MAXPHYS, there won't be any more records
-                * in the iteration covering the requested offset (key).
-                */
-               if (elm->leaf.base.key >= MAXPHYS &&
-                   elm->leaf.base.key - MAXPHYS > hist->key) {
-                       hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
-               }
-
-               /*
-                * Data-range of record does not cover the key.
-                */
-               if (elm->leaf.base.key - elm->leaf.data_len > hist->key)
-                       return;
-
-       } else if (hist->head.flags & HAMMER_IOC_HISTORY_ATKEY) {
-               /*
-                * Adjust nxt_key
-                */
-               if (hist->nxt_key > elm->leaf.base.key &&
-                   hist->key < elm->leaf.base.key) {
-                       hist->nxt_key = elm->leaf.base.key;
-               }
-
-               /*
-                * Record is beyond the requested key.
-                */
-               if (elm->leaf.base.key > hist->key)
-                       hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_KEY;
-       }
-
-       /*
-        * Add create_tid if it is in-bounds.
-        */
-       if ((hist->count == 0 ||
-            elm->leaf.base.create_tid != hist->tid_ary[hist->count - 1]) &&
-           elm->leaf.base.create_tid >= hist->beg_tid &&
-           elm->leaf.base.create_tid < hist->end_tid) {
-               if (hist->count == HAMMER_MAX_HISTORY_ELMS) {
-                       hist->nxt_tid = elm->leaf.base.create_tid;
-                       hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
-                       return;
-               }
-               hist->tid_ary[hist->count++] = elm->leaf.base.create_tid;
-       }
-
-       /*
-        * Add delete_tid if it is in-bounds.  Note that different portions
-        * of the history may have overlapping data ranges with different
-        * delete_tid's.  If this case occurs the delete_tid may match the
-        * create_tid of a following record.  XXX
-        *
-        *      [        ]
-        *            [     ]
-        */
-       if (elm->leaf.base.delete_tid &&
-           elm->leaf.base.delete_tid >= hist->beg_tid &&
-           elm->leaf.base.delete_tid < hist->end_tid) {
-               if (hist->count == HAMMER_MAX_HISTORY_ELMS) {
-                       hist->nxt_tid = elm->leaf.base.delete_tid;
-                       hist->head.flags |= HAMMER_IOC_HISTORY_NEXT_TID;
-                       return;
-               }
-               hist->tid_ary[hist->count++] = elm->leaf.base.delete_tid;
-       }
-}
-
index 8fa5f76..7b3daa9 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.13 2008/05/12 05:13:11 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.14 2008/05/12 21:17:18 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -50,8 +50,6 @@ static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                                 hammer_btree_elm_t elm);
 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
                                hammer_cursor_t cursor, hammer_btree_elm_t elm);
-static int hammer_reblock_record(struct hammer_ioc_reblock *reblock,
-                               hammer_cursor_t cursor, hammer_btree_elm_t elm);
 static int hammer_reblock_node(struct hammer_ioc_reblock *reblock,
                                hammer_cursor_t cursor, hammer_btree_elm_t elm);
 
@@ -182,30 +180,6 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                }
        }
 
-       /*
-        * Reblock a record
-        */
-       tmp_offset = elm->leaf.rec_offset;
-       zone = HAMMER_ZONE_DECODE(tmp_offset);
-       if (zone == HAMMER_ZONE_RECORD_INDEX &&
-           error == 0 && (reblock->head.flags & HAMMER_IOC_DO_RECS)) {
-               ++reblock->record_count;
-               bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset,
-                                               &cur, &error);
-               if (error == 0 && cur == 0 && bytes >= reblock->free_level) {
-                       if (hammer_debug_general & 0x4000)
-                               kprintf("%6d ", bytes);
-                       error = hammer_cursor_upgrade(cursor);
-                       if (error == 0) {
-                               error = hammer_reblock_record(reblock,
-                                                             cursor, elm);
-                       }
-                       if (error == 0) {
-                               ++reblock->record_moves;
-                       }
-               }
-       }
-
        /*
         * Reblock a B-Tree node.  Adjust elm to point at the parent's
         * leaf entry.
@@ -253,7 +227,7 @@ hammer_reblock_data(struct hammer_ioc_reblock *reblock,
        void *ndata;
 
        error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
-                                            HAMMER_CURSOR_GET_RECORD);
+                                            HAMMER_CURSOR_GET_LEAF);
        if (error)
                return (error);
        ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
@@ -271,11 +245,6 @@ hammer_reblock_data(struct hammer_ioc_reblock *reblock,
        hammer_blockmap_free(cursor->trans,
                             elm->leaf.data_offset, elm->leaf.data_len);
 
-       hammer_modify_record_field(cursor->trans, cursor->record_buffer,
-                                  cursor->record, base.data_off, 0);
-       cursor->record->base.data_off = ndata_offset;
-       hammer_modify_record_done(cursor->record_buffer, cursor->record);
-
        hammer_modify_node(cursor->trans, cursor->node,
                           &elm->leaf.data_offset, sizeof(hammer_off_t));
        elm->leaf.data_offset = ndata_offset;
@@ -287,83 +256,6 @@ done:
        return (error);
 }
 
-/*
- * Reblock a record.  The B-Tree must be adjusted to point to the new record
- * and the existing record must be physically destroyed so a FS rebuild
- * does not see two versions of the same record.
- */
-static int
-hammer_reblock_record(struct hammer_ioc_reblock *reblock,
-                     hammer_cursor_t cursor, hammer_btree_elm_t elm)
-{
-       struct hammer_buffer *rec_buffer = NULL;
-       hammer_off_t nrec_offset;
-       hammer_off_t ndata_offset;
-       hammer_record_ondisk_t orec;
-       hammer_record_ondisk_t nrec;
-       int error;
-       int inline_data;
-
-       error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
-       if (error)
-               return (error);
-
-       nrec = hammer_alloc_record(cursor->trans, &nrec_offset,
-                                  elm->leaf.base.rec_type, &rec_buffer,
-                                  0, NULL, NULL, NULL, &error);
-       if (error)
-               goto done;
-
-       /*
-        * Move the record.  Check for an inline data reference and move that
-        * too if necessary.
-        */
-       orec = cursor->record;
-       hammer_modify_buffer(cursor->trans, rec_buffer, NULL, 0);
-       bcopy(orec, nrec, sizeof(*nrec));
-
-       if ((orec->base.data_off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RECORD) {
-               ndata_offset = orec->base.data_off - elm->leaf.rec_offset;
-               KKASSERT(ndata_offset < sizeof(*nrec));
-               ndata_offset += nrec_offset;
-               inline_data = 1;
-       } else {
-               ndata_offset = 0;
-               inline_data = 0;
-       }
-       hammer_modify_record_all(cursor->trans, cursor->record_buffer, orec);
-       orec->base.base.rec_type |= HAMMER_RECTYPE_MOVED;
-       orec->base.signature = HAMMER_RECORD_SIGNATURE_DESTROYED;
-       hammer_modify_record_done(cursor->record_buffer, orec);
-
-       hammer_blockmap_free(cursor->trans,
-                            elm->leaf.rec_offset, sizeof(*nrec));
-
-       if (hammer_debug_general & 0x4000) {
-               kprintf("REBLOCK RECD %016llx -> %016llx\n",
-                       elm->leaf.rec_offset, nrec_offset);
-       }
-
-       hammer_modify_node(cursor->trans, cursor->node,
-                          &elm->leaf.rec_offset, sizeof(hammer_off_t));
-       elm->leaf.rec_offset = nrec_offset;
-       hammer_modify_node_done(cursor->node);
-       if (inline_data) {
-               hammer_modify_node(cursor->trans, cursor->node,
-                                &elm->leaf.data_offset, sizeof(hammer_off_t));
-               elm->leaf.data_offset = ndata_offset;
-               hammer_modify_node_done(cursor->node);
-               nrec->base.data_off = ndata_offset;
-       }
-       nrec->base.rec_crc = crc32(&nrec->base.data_crc, HAMMER_RECORD_CRCSIZE);
-       hammer_modify_buffer_done(rec_buffer);
-
-done:
-       if (rec_buffer)
-               hammer_rel_buffer(rec_buffer, 0);
-       return (error);
-}
-
 /*
  * Reblock a B-Tree (leaf) node.  The parent must be adjusted to point to
  * the new copy of the leaf node.  elm is a pointer to the parent element
index 3d05691..4618508 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.18 2008/05/06 00:21:08 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.19 2008/05/12 21:17:18 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -386,16 +386,6 @@ hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
        return (ondisk->vol_crc == crc);
 }
 
-int
-hammer_crc_test_record(hammer_record_ondisk_t ondisk)
-{
-       hammer_crc_t crc;
-
-       crc = crc32(&ondisk->base.rec_crc + 1, HAMMER_RECORD_CRCSIZE);
-       return (ondisk->base.rec_crc == crc &&
-               ondisk->base.signature == HAMMER_RECORD_SIGNATURE_GOOD);
-}
-
 int
 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
 {
index 6318535..80c6996 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.51 2008/05/09 22:17:43 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.52 2008/05/12 21:17:18 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -207,10 +207,10 @@ hammer_vop_read(struct vop_read_args *ap)
         * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
         */
        uio = ap->a_uio;
-       while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
+       while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
                offset = uio->uio_offset & HAMMER_BUFMASK;
 #if 0
-               error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
+               error = cluster_read(ap->a_vp, ip->ino_data.size,
                                     uio->uio_offset - offset, HAMMER_BUFSIZE,
                                     MAXBSIZE, seqcount, &bp);
 #endif
@@ -224,8 +224,8 @@ hammer_vop_read(struct vop_read_args *ap)
                n = HAMMER_BUFSIZE - offset;
                if (n > uio->uio_resid)
                        n = uio->uio_resid;
-               if (n > ip->ino_rec.ino_size - uio->uio_offset)
-                       n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
+               if (n > ip->ino_data.size - uio->uio_offset)
+                       n = (int)(ip->ino_data.size - uio->uio_offset);
                error = uiomove((char *)bp->b_data + offset, n, uio);
                if (error) {
                        bqrelse(bp);
@@ -235,7 +235,7 @@ hammer_vop_read(struct vop_read_args *ap)
        }
        if ((ip->flags & HAMMER_INODE_RO) == 0 &&
            (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
-               ip->ino_rec.ino_atime = trans.time;
+               ip->ino_leaf.atime = trans.time;
                hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
        }
        hammer_done_transaction(&trans);
@@ -277,7 +277,7 @@ hammer_vop_write(struct vop_write_args *ap)
         * Check append mode
         */
        if (ap->a_ioflag & IO_APPEND)
-               uio->uio_offset = ip->ino_rec.ino_size;
+               uio->uio_offset = ip->ino_data.size;
 
        /*
         * Check for illegal write offsets.  Valid range is 0...2^63-1
@@ -308,7 +308,7 @@ hammer_vop_write(struct vop_write_args *ap)
                n = HAMMER_BUFSIZE - offset;
                if (n > uio->uio_resid)
                        n = uio->uio_resid;
-               if (uio->uio_offset + n > ip->ino_rec.ino_size) {
+               if (uio->uio_offset + n > ip->ino_data.size) {
                        vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
                        fixsize = 1;
                }
@@ -339,7 +339,7 @@ hammer_vop_write(struct vop_write_args *ap)
                                    HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
                        if ((bp->b_flags & B_CACHE) == 0)
                                vfs_bio_clrbuf(bp);
-               } else if (uio->uio_offset - offset >= ip->ino_rec.ino_size) {
+               } else if (uio->uio_offset - offset >= ip->ino_data.size) {
                        /*
                         * If the base offset of the buffer is beyond the
                         * file EOF, we don't have to issue a read.
@@ -367,21 +367,22 @@ hammer_vop_write(struct vop_write_args *ap)
                if (error) {
                        brelse(bp);
                        if (fixsize) {
-                               vtruncbuf(ap->a_vp, ip->ino_rec.ino_size,
+                               vtruncbuf(ap->a_vp, ip->ino_data.size,
                                          HAMMER_BUFSIZE);
                        }
                        break;
                }
                /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
-               if (ip->ino_rec.ino_size < uio->uio_offset) {
-                       ip->ino_rec.ino_size = uio->uio_offset;
-                       flags = HAMMER_INODE_RDIRTY;
-                       vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
+               if (ip->ino_data.size < uio->uio_offset) {
+                       ip->ino_data.size = uio->uio_offset;
+                       flags = HAMMER_INODE_DDIRTY;
+                       vnode_pager_setsize(ap->a_vp, ip->ino_data.size);
                } else {
                        flags = 0;
                }
-               ip->ino_rec.ino_mtime = trans.time;
+               ip->ino_data.mtime = trans.time;
                flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
+               flags |= HAMMER_INODE_DDIRTY;   /* XXX mtime */
                hammer_modify_inode(&trans, ip, flags);
 
                if (ap->a_ioflag & IO_SYNC) {
@@ -441,7 +442,7 @@ hammer_vop_advlock(struct vop_advlock_args *ap)
 {
        struct hammer_inode *ip = VTOI(ap->a_vp);
 
-       return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
+       return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
 }
 
 /*
@@ -543,32 +544,32 @@ hammer_vop_getattr(struct vop_getattr_args *ap)
 #endif
 
        vap->va_fsid = ip->hmp->fsid_udev;
-       vap->va_fileid = ip->ino_rec.base.base.obj_id;
+       vap->va_fileid = ip->ino_leaf.base.obj_id;
        vap->va_mode = ip->ino_data.mode;
-       vap->va_nlink = ip->ino_rec.ino_nlinks;
+       vap->va_nlink = ip->ino_data.nlinks;
        vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
        vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
        vap->va_rmajor = 0;
        vap->va_rminor = 0;
-       vap->va_size = ip->ino_rec.ino_size;
-       hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
-       hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
+       vap->va_size = ip->ino_data.size;
+       hammer_to_timespec(ip->ino_leaf.atime, &vap->va_atime);
+       hammer_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
        hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
        vap->va_flags = ip->ino_data.uflags;
        vap->va_gen = 1;        /* hammer inums are unique for all time */
        vap->va_blocksize = HAMMER_BUFSIZE;
-       vap->va_bytes = (ip->ino_rec.ino_size + 63) & ~63;
-       vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
+       vap->va_bytes = (ip->ino_data.size + 63) & ~63;
+       vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
        vap->va_filerev = 0;    /* XXX */
        /* mtime uniquely identifies any adjustments made to the file */
-       vap->va_fsmid = ip->ino_rec.ino_mtime;
+       vap->va_fsmid = ip->ino_data.mtime;
        vap->va_uid_uuid = ip->ino_data.uid;
        vap->va_gid_uuid = ip->ino_data.gid;
        vap->va_fsid_uuid = ip->hmp->fsid;
        vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
                          VA_FSID_UUID_VALID;
 
-       switch (ip->ino_rec.base.base.obj_type) {
+       switch (ip->ino_data.obj_type) {
        case HAMMER_OBJTYPE_CDEV:
        case HAMMER_OBJTYPE_BDEV:
                vap->va_rmajor = ip->ino_data.rmajor;
@@ -596,7 +597,6 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        hammer_inode_t ip;
        hammer_tid_t asof;
        struct hammer_cursor cursor;
-       union hammer_record_ondisk *rec;
        struct vnode *vp;
        int64_t namekey;
        int error;
@@ -680,16 +680,14 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        obj_id = 0;
 
        if (error == 0) {
-               rec = NULL;
                error = hammer_ip_first(&cursor);
                while (error == 0) {
                        error = hammer_ip_resolve_data(&cursor);
                        if (error)
                                break;
-                       rec = cursor.record;
-                       if (nlen == rec->entry.base.data_len &&
-                           bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
-                               obj_id = rec->entry.obj_id;
+                       if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
+                           bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
+                               obj_id = cursor.data->entry.obj_id;
                                break;
                        }
                        error = hammer_ip_next(&cursor);
@@ -992,7 +990,6 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
        struct hammer_cursor cursor;
        struct hammer_inode *ip;
        struct uio *uio;
-       hammer_record_ondisk_t rec;
        hammer_base_elm_t base;
        int error;
        int cookie_index;
@@ -1073,21 +1070,21 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
        error = hammer_ip_first(&cursor);
 
        while (error == 0) {
-               error = hammer_ip_resolve_record_and_data(&cursor);
+               error = hammer_ip_resolve_data(&cursor);
                if (error)
                        break;
-               rec = cursor.record;
-               base = &rec->base.base;
+               base = &cursor.leaf->base;
                saveoff = base->key;
+               KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
 
                if (base->obj_id != ip->obj_id)
                        panic("readdir: bad record at %p", cursor.node);
 
                r = vop_write_dirent(
-                            &error, uio, rec->entry.obj_id,
-                            hammer_get_dtype(rec->entry.base.base.obj_type),
-                            rec->entry.base.data_len,
-                            (void *)cursor.data);
+                            &error, uio, cursor.data->entry.obj_id,
+                            hammer_get_dtype(cursor.leaf->base.obj_type),
+                            cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
+                            (void *)cursor.data->entry.name);
                if (r)
                        break;
                ++saveoff;
@@ -1160,8 +1157,11 @@ hammer_vop_readlink(struct vop_readlink_args *ap)
        if (error == 0) {
                error = hammer_ip_resolve_data(&cursor);
                if (error == 0) {
-                       error = uiomove((char *)cursor.data,
-                                       cursor.record->base.data_len,
+                       KKASSERT(cursor.leaf->data_len >=
+                                HAMMER_SYMLINK_NAME_OFF);
+                       error = uiomove(cursor.data->symlink.name,
+                                       cursor.leaf->data_len -
+                                               HAMMER_SYMLINK_NAME_OFF,
                                        ap->a_uio);
                }
        }
@@ -1201,9 +1201,8 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
        struct hammer_inode *tdip;
        struct hammer_inode *ip;
        struct hammer_cursor cursor;
-       union hammer_record_ondisk *rec;
        int64_t namekey;
-       int error;
+       int nlen, error;
 
        fdip = VTOI(ap->a_fdvp);
        tdip = VTOI(ap->a_tdvp);
@@ -1274,9 +1273,10 @@ retry:
        while (error == 0) {
                if (hammer_ip_resolve_data(&cursor) != 0)
                        break;
-               rec = cursor.record;
-               if (fncp->nc_nlen == rec->entry.base.data_len &&
-                   bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
+               nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
+               KKASSERT(nlen > 0);
+               if (fncp->nc_nlen == nlen &&
+                   bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
                        break;
                }
                error = hammer_ip_next(&cursor);
@@ -1394,17 +1394,17 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                        modflags |= HAMMER_INODE_DDIRTY;
                }
        }
-       while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
+       while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
                switch(ap->a_vp->v_type) {
                case VREG:
-                       if (vap->va_size == ip->ino_rec.ino_size)
+                       if (vap->va_size == ip->ino_data.size)
                                break;
                        /*
                         * XXX break atomicy, we can deadlock the backend
                         * if we do not release the lock.  Probably not a
                         * big deal here.
                         */
-                       if (vap->va_size < ip->ino_rec.ino_size) {
+                       if (vap->va_size < ip->ino_data.size) {
                                vtruncbuf(ap->a_vp, vap->va_size,
                                          HAMMER_BUFSIZE);
                                truncating = 1;
@@ -1412,8 +1412,8 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                                vnode_pager_setsize(ap->a_vp, vap->va_size);
                                truncating = 0;
                        }
-                       ip->ino_rec.ino_size = vap->va_size;
-                       modflags |= HAMMER_INODE_RDIRTY;
+                       ip->ino_data.size = vap->va_size;
+                       modflags |= HAMMER_INODE_DDIRTY;
                        aligned_size = (vap->va_size + HAMMER_BUFMASK) &
                                       ~HAMMER_BUFMASK64;
 
@@ -1459,8 +1459,8 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                        } else if (ip->trunc_off > vap->va_size) {
                                ip->trunc_off = vap->va_size;
                        }
-                       ip->ino_rec.ino_size = vap->va_size;
-                       modflags |= HAMMER_INODE_RDIRTY;
+                       ip->ino_data.size = vap->va_size;
+                       modflags |= HAMMER_INODE_DDIRTY;
                        break;
                default:
                        error = EINVAL;
@@ -1469,12 +1469,12 @@ hammer_vop_setattr(struct vop_setattr_args *ap)
                break;
        }
        if (vap->va_atime.tv_sec != VNOVAL) {
-               ip->ino_rec.ino_atime =
+               ip->ino_leaf.atime =
                        hammer_timespec_to_transid(&vap->va_atime);
                modflags |= HAMMER_INODE_ITIMES;
        }
        if (vap->va_mtime.tv_sec != VNOVAL) {
-               ip->ino_rec.ino_mtime =
+               ip->ino_data.mtime =
                        hammer_timespec_to_transid(&vap->va_mtime);
                modflags |= HAMMER_INODE_ITIMES;
        }
@@ -1536,24 +1536,23 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
         * as pure data, not a string, and is no \0 terminated.
         */
        if (error == 0) {
-               record = hammer_alloc_mem_record(nip);
-               record->type = HAMMER_MEM_RECORD_GENERAL;
                bytes = strlen(ap->a_target);
+               record = hammer_alloc_mem_record(nip, bytes);
+               record->type = HAMMER_MEM_RECORD_GENERAL;
 
-               record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK;
-               record->rec.base.base.rec_type = HAMMER_RECTYPE_FIX;
-               record->rec.base.data_len = bytes;
-               record->rec.base.signature = HAMMER_RECORD_SIGNATURE_GOOD;
-               record->data = (void *)ap->a_target;
-               /* will be reallocated by routine below */
+               record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
+               record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
+               record->leaf.data_len = bytes;
+               KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
+               bcopy(ap->a_target, record->data->symlink.name, bytes);
                error = hammer_ip_add_record(&trans, record);
 
                /*
                 * Set the file size to the length of the link.
                 */
                if (error == 0) {
-                       nip->ino_rec.ino_size = bytes;
-                       hammer_modify_inode(&trans, nip, HAMMER_INODE_RDIRTY);
+                       nip->ino_data.size = bytes;
+                       hammer_modify_inode(&trans, nip, HAMMER_INODE_DDIRTY);
                }
        }
        if (error == 0)
@@ -1682,7 +1681,6 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
        struct hammer_transaction trans;
        struct hammer_inode *ip;
        struct hammer_cursor cursor;
-       hammer_record_ondisk_t rec;
        hammer_base_elm_t base;
        struct bio *bio;
        struct buf *bp;
@@ -1715,9 +1713,9 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
        cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK;
 
        cursor.key_end = cursor.key_beg;
-       KKASSERT(ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE);
+       KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
 #if 0
-       if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
+       if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
                cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
                cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
                cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
@@ -1742,10 +1740,9 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                error = hammer_ip_resolve_data(&cursor);
                if (error)
                        break;
-               rec = cursor.record;
-               base = &rec->base.base;
+               base = &cursor.leaf->base;
 
-               rec_offset = base->key - rec->data.base.data_len;
+               rec_offset = base->key - cursor.leaf->data_len;
 
                /*
                 * Calculate the gap, if any, and zero-fill it.
@@ -1768,7 +1765,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                 */
                roff = -n;
                rec_offset += roff;
-               n = rec->data.base.data_len - roff;
+               n = cursor.leaf->data_len - roff;
                KKASSERT(n > 0);
                if (n > bp->b_bufsize - boff)
                        n = bp->b_bufsize - boff;
@@ -1905,7 +1902,7 @@ hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio)
         * Delete any records overlapping our range.  This function will
         * (eventually) properly truncate partial overlaps.
         */
-       if (ip->sync_ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
+       if (ip->sync_ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
                error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
                                               bio->bio_offset);
        } else {
@@ -1925,11 +1922,11 @@ hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio)
        if (error == 0) {
                int limit_size;
 
-               if (ip->sync_ino_rec.ino_size - bio->bio_offset > 
+               if (ip->sync_ino_data.size - bio->bio_offset > 
                    bp->b_bufsize) {
                            limit_size = bp->b_bufsize;
                } else {
-                       limit_size = (int)(ip->sync_ino_rec.ino_size -
+                       limit_size = (int)(ip->sync_ino_data.size -
                                           bio->bio_offset);
                        KKASSERT(limit_size >= 0);
                        limit_size = (limit_size + 63) & ~63;
@@ -1966,10 +1963,9 @@ hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
        struct namecache *ncp;
        hammer_inode_t dip;
        hammer_inode_t ip;
-       hammer_record_ondisk_t rec;
        struct hammer_cursor cursor;
        int64_t namekey;
-       int error;
+       int nlen, error;
 
        /*
         * Calculate the namekey and setup the key range for the scan.  This
@@ -2008,16 +2004,16 @@ retry:
         * The hammer_ip_*() functions merge in-memory records with on-disk
         * records for the purposes of the search.
         */
-       rec = NULL;
        error = hammer_ip_first(&cursor);
 
        while (error == 0) {
                error = hammer_ip_resolve_data(&cursor);
                if (error)
                        break;
-               rec = cursor.record;
-               if (ncp->nc_nlen == rec->entry.base.data_len &&
-                   bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
+               nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
+               KKASSERT(nlen > 0);
+               if (ncp->nc_nlen == nlen &&
+                   bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
                        break;
                }
                error = hammer_ip_next(&cursor);
@@ -2030,10 +2026,10 @@ retry:
         */
        if (error == 0) {
                ip = hammer_get_inode(trans, &dip->cache[1],
-                                     rec->entry.obj_id,
+                                     cursor.data->entry.obj_id,
                                      dip->hmp->asof, 0, &error);
                if (error == ENOENT) {
-                       kprintf("obj_id %016llx\n", rec->entry.obj_id);
+                       kprintf("obj_id %016llx\n", cursor.data->entry.obj_id);
                        Debugger("ENOENT unlinking object that should exist");
                }
 
@@ -2045,7 +2041,7 @@ retry:
                 * terminate the cursor to avoid a deadlock.  It is ok to
                 * call hammer_done_cursor() twice.
                 */
-               if (error == 0 && ip->ino_rec.base.base.obj_type ==
+               if (error == 0 && ip->ino_data.obj_type ==
                                  HAMMER_OBJTYPE_DIRECTORY) {
                        error = hammer_ip_check_directory_empty(trans, &cursor,
                                                                ip);