HAMMER 28/many: Implement zoned blockmap
authorMatthew Dillon <dillon@dragonflybsd.org>
Sun, 10 Feb 2008 09:51:01 +0000 (09:51 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sun, 10 Feb 2008 09:51:01 +0000 (09:51 +0000)
* Implement a zoned blockmap.  Separate B-Tree nodes, records, small blocks
  of data, and large blocks of data into their own zones.  Use 8MB large
  blocks, 32-byte blockmap entry structures, and two layers to support
  59 bits (512 petabytes).

* Create a temporary freeblock allocator so the blockmap can be tested.
  It just allocates sequentially and asserts when it hits the end of the
  volume.   This will be replaced with a real freeblock allocator soon.

* Clean up some of the mess I created from the temporary fifo mechanism
  that had been put in-place to test the major rewiring in 27.

* Adjust newfs_hammer.  The 'hammer' utility has not yet been adjusted
  (it can't decode blockmaps yet but will soon).

19 files changed:
sbin/hammer/hammer_util.h
sbin/hammer/ondisk.c
sbin/newfs_hammer/newfs_hammer.c
sys/conf/files
sys/vfs/hammer/Makefile
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_blockmap.c [new file with mode: 0644]
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_freemap.c [new file with mode: 0644]
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_spike.c [deleted file]
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index e343f1d..81b1c71 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.8 2008/02/08 08:30:56 dillon Exp $
+ * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.9 2008/02/10 09:50:55 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -105,15 +105,17 @@ struct volume_info *setup_volume(int32_t vol_no, const char *filename,
                                int isnew, int oflags);
 struct volume_info *get_volume(int32_t vol_no);
 struct buffer_info *get_buffer(hammer_off_t buf_offset, int isnew);
+void *get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
+                               int isnew);
 hammer_node_ondisk_t get_node(hammer_off_t node_offset,
                                struct buffer_info **bufp);
 
 void rel_volume(struct volume_info *volume);
 void rel_buffer(struct buffer_info *buffer);
 
+void format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off);
 void *alloc_btree_element(hammer_off_t *offp);
 hammer_record_ondisk_t alloc_record_element(hammer_off_t *offp,
-                               u_int8_t rec_type, int32_t rec_len,
                                int32_t data_len, void **datap);
 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
 
index a009b41..1a87884 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/ondisk.c,v 1.10 2008/02/08 08:30:56 dillon Exp $
+ * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $
  */
 
 #include <sys/types.h>
 #include <fcntl.h>
 #include "hammer_util.h"
 
+static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
+                      struct buffer_info **bufferp);
+static hammer_off_t alloc_bigblock(void);
+#if 0
 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
                        struct buffer_info **bufp, u_int16_t hdr_type);
-#if 0
 static void readhammerbuf(struct volume_info *vol, void *data,
                        int64_t offset);
 #endif
@@ -126,7 +129,7 @@ setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
        vol->vol_no = vol_no;
 
        if (isnew) {
-               init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);
+               /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
                vol->cache.modified = 1;
         }
 
@@ -242,6 +245,19 @@ rel_buffer(struct buffer_info *buffer)
        }
 }
 
+void *
+get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
+               int isnew)
+{
+       struct buffer_info *buffer;
+
+       if (*bufferp) {
+               rel_buffer(*bufferp);
+       }
+       buffer = *bufferp = get_buffer(buf_offset, isnew);
+       return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK));
+}
+
 /*
  * Retrieve a pointer to a B-Tree node given a cluster offset.  The underlying
  * bufp is freed if non-NULL and a referenced buffer is loaded into it.
@@ -267,35 +283,38 @@ get_node(hammer_off_t node_offset, struct buffer_info **bufp)
 void *
 alloc_btree_element(hammer_off_t *offp)
 {
-       struct buffer_info *buf;
-       void *item;
-
-       *offp = hammer_alloc_fifo(sizeof(struct hammer_node_ondisk), 0,
-                                 &buf, HAMMER_HEAD_TYPE_BTREE);
-       item = (char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK);
-       /* XXX buf not released, ptr remains valid */
-       return(item);
+       struct buffer_info *buffer = NULL;
+       hammer_node_ondisk_t node;
+
+       node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
+                             offp, &buffer);
+       bzero(node, sizeof(*node));
+       /* XXX buffer not released, pointer remains valid */
+       return(node);
 }
 
 hammer_record_ondisk_t
-alloc_record_element(hammer_off_t *offp, u_int8_t rec_type,
-                    int32_t rec_len, int32_t data_len, void **datap)
+alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap)
 {
-       struct buffer_info *buf;
+       struct buffer_info *record_buffer = NULL;
+       struct buffer_info *data_buffer = NULL;
        hammer_record_ondisk_t rec;
-       int32_t aligned_rec_len;
 
-       aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) &
-                         ~HAMMER_HEAD_ALIGN_MASK;
+       rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec),
+                            offp, &record_buffer);
+       bzero(rec, sizeof(*rec));
 
-       *offp = hammer_alloc_fifo(aligned_rec_len, data_len, &buf,
-                                 HAMMER_HEAD_TYPE_RECORD);
-       rec = (void *)((char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK));
-       rec->base.base.rec_type = rec_type;
-       if (data_len) {
-               rec->base.data_off = *offp + aligned_rec_len;
+       if (data_len >= HAMMER_BUFSIZE) {
+               assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
+               *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
+                                       &rec->base.data_off, &data_buffer);
                rec->base.data_len = data_len;
-               *datap = (char *)rec + aligned_rec_len;
+               bzero(*datap, data_len);
+       } else if (data_len) {
+               *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
+                                       &rec->base.data_off, &data_buffer);
+               rec->base.data_len = data_len;
+               bzero(*datap, data_len);
        } else {
                *datap = NULL;
        }
@@ -303,14 +322,117 @@ alloc_record_element(hammer_off_t *offp, u_int8_t rec_type,
        return(rec);
 }
 
+/*
+ * Format a new blockmap
+ */
+void
+format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off)
+{
+       blockmap->phys_offset = alloc_bigblock();
+       blockmap->alloc_offset = zone_off;
+}
+
+static
+void *
+alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
+              struct buffer_info **bufferp)
+{
+       struct buffer_info *buffer;
+       struct volume_info *volume;
+       hammer_blockmap_entry_t rootmap;
+       hammer_blockmap_entry_t blockmap;
+       void *ptr;
+       int i;
+
+       volume = get_volume(RootVolNo);
+
+       rootmap = &volume->ondisk->vol0_blockmap[zone];
+
+       /*
+        * Alignment and buffer-boundary issues
+        */
+       bytes = (bytes + 7) & ~7;
+       if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) &
+           ~HAMMER_BUFMASK64) {
+               volume->cache.modified = 1;
+               rootmap->phys_offset = (rootmap->phys_offset + bytes) &
+                                      ~HAMMER_BUFMASK64;
+       }
+
+       /*
+        * Dive layer 2
+        */
+       i = (rootmap->alloc_offset >> (HAMMER_LARGEBLOCK_BITS +
+            HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = get_buffer_data(rootmap->phys_offset + i * sizeof(*blockmap),
+                                  bufferp, 0);
+       buffer = *bufferp;
+       if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) {
+               buffer->cache.modified = 1;
+               bzero(blockmap, sizeof(*blockmap));
+               blockmap->phys_offset = alloc_bigblock();
+       }
+
+       /*
+        * Dive layer 1
+        */
+       i = (rootmap->alloc_offset >> HAMMER_LARGEBLOCK_BITS) &
+           HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = get_buffer_data(
+               blockmap->phys_offset + i * sizeof(*blockmap), bufferp, 0);
+       buffer = *bufferp;
+
+       if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
+               buffer->cache.modified = 1;
+               bzero(blockmap, sizeof(*blockmap));
+               blockmap->phys_offset = alloc_bigblock();
+               blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE;
+       }
+
+       buffer->cache.modified = 1;
+       volume->cache.modified = 1;
+       blockmap->bytes_free -= bytes;
+       *result_offp = rootmap->alloc_offset;
+       rootmap->alloc_offset += bytes;
+
+       i = (rootmap->phys_offset >> HAMMER_BUFFER_BITS) &
+           HAMMER_BUFFERS_PER_LARGEBLOCK_MASK;
+       ptr = get_buffer_data(
+               blockmap->phys_offset + i * HAMMER_BUFSIZE +
+                ((int32_t)*result_offp & HAMMER_BUFMASK), bufferp, 0);
+       buffer->cache.modified = 1;
+
+       rel_volume(volume);
+       return(ptr);
+}
+
+static
+hammer_off_t
+alloc_bigblock(void)
+{
+       struct volume_info *volume;
+       hammer_off_t result_offset;
+
+       volume = get_volume(RootVolNo);
+       result_offset = volume->ondisk->vol0_free_off;
+       volume->ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE;
+       if ((volume->ondisk->vol0_free_off & HAMMER_OFF_SHORT_MASK) >
+           (hammer_off_t)(volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)) {
+               panic("alloc_bigblock: Ran out of room, filesystem too small");
+       }
+       rel_volume(volume);
+       return(result_offset);
+}
+
+#if 0
 /*
  * Reserve space from the FIFO.  Make sure that bytes does not cross a 
  * record boundary.
  *
- * Initialize the fifo header, keep track of the previous entry's size
- * so the reverse poitner can be initialized (using lastBlk), and also
- * store a terminator (used by the recovery code) which will be overwritten
- * by the next allocation.
+ * Zero out base_bytes and initialize the fifo head and tail.  The
+ * data area is not zerod.
  */
 static
 hammer_off_t
@@ -320,12 +442,12 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
        struct buffer_info *buf;
        struct volume_info *volume;
        hammer_fifo_head_t head;
+       hammer_fifo_tail_t tail;
        hammer_off_t off;
        int32_t aligned_bytes;
-       static u_int32_t lastBlk;
 
-       aligned_bytes = (base_bytes + ext_bytes + HAMMER_HEAD_ALIGN_MASK) &
-                       ~HAMMER_HEAD_ALIGN_MASK;
+       aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE +
+                        HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
 
        volume = get_volume(RootVolNo);
        off = volume->ondisk->vol0_fifo_end;
@@ -335,7 +457,7 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
         * only newfs_hammer uses this function.
         */
        assert((off & ~HAMMER_BUFMASK64) ==
-                ((off + aligned_bytes + sizeof(*head)) & ~HAMMER_BUFMASK));
+               ((off + aligned_bytes) & ~HAMMER_BUFMASK));
 
        *bufp = buf = get_buffer(off, 0);
 
@@ -345,27 +467,26 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
        head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
        bzero(head, base_bytes);
 
+       head->hdr_signature = HAMMER_HEAD_SIGNATURE;
        head->hdr_type = hdr_type;
-       head->hdr_rev_link = lastBlk;
-       head->hdr_fwd_link = aligned_bytes;
+       head->hdr_size = aligned_bytes;
        head->hdr_seq = volume->ondisk->vol0_next_seq++;
-       lastBlk = head->hdr_fwd_link;
+
+       tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE);
+       tail->tail_signature = HAMMER_TAIL_SIGNATURE;
+       tail->tail_type = hdr_type;
+       tail->tail_size = aligned_bytes;
 
        volume->ondisk->vol0_fifo_end += aligned_bytes;
        volume->cache.modified = 1;
-       head = (void *)((char *)head + aligned_bytes);
-       head->hdr_signature = HAMMER_HEAD_SIGNATURE;
-       head->hdr_type = HAMMER_HEAD_TYPE_TERM;
-       head->hdr_rev_link = lastBlk;
-       head->hdr_fwd_link = 0;
-       head->hdr_crc = 0;
-       head->hdr_seq = volume->ondisk->vol0_next_seq;
 
        rel_volume(volume);
 
        return(off);
 }
 
+#endif
+
 /*
  * Flush various tracking structures to disk
  */
@@ -400,6 +521,7 @@ flush_buffer(struct buffer_info *buffer)
        buffer->cache.modified = 0;
 }
 
+#if 0
 /*
  * Generic buffer initialization
  */
@@ -408,12 +530,13 @@ init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
 {
        head->hdr_signature = HAMMER_HEAD_SIGNATURE;
        head->hdr_type = hdr_type;
-       head->hdr_rev_link = 0;
-       head->hdr_fwd_link = 0;
+       head->hdr_size = 0;
        head->hdr_crc = 0;
        head->hdr_seq = 0;
 }
 
+#endif
+
 #if 0
 /*
  * Core I/O operations
index d85d527..72a36b4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.17 2008/02/08 08:30:58 dillon Exp $
+ * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.18 2008/02/10 09:50:56 dillon Exp $
  */
 
 #include "newfs_hammer.h"
@@ -57,6 +57,8 @@ main(int ac, char **av)
         * if it gets broken!
         */
        assert(sizeof(struct hammer_volume_ondisk) <= HAMMER_BUFSIZE);
+       assert(sizeof(union hammer_record_ondisk) == HAMMER_RECORD_SIZE);
+       assert(sizeof(struct hammer_blockmap_entry) == 32);
 
        /*
         * Generate a filesysem id and lookup the filesystem type
@@ -346,6 +348,11 @@ format_volume(struct volume_info *vol, int nvols, const char *label)
        vol->vol_alloc += BootAreaSize;
        ondisk->vol_mem_beg = vol->vol_alloc;
        vol->vol_alloc += MemAreaSize;
+
+       /*
+        * The remaining area is the zone 2 buffer allocation area.  These
+        * buffers
+        */
        ondisk->vol_buf_beg = vol->vol_alloc;
        ondisk->vol_buf_end = vol->size & ~(int64_t)HAMMER_BUFMASK;
 
@@ -369,10 +376,21 @@ format_volume(struct volume_info *vol, int nvols, const char *label)
                 * in volume 0.  hammer_off_t must be properly formatted
                 * (see vfs/hammer/hammer_disk.h)
                 */
-               ondisk->vol0_fifo_beg = HAMMER_ENCODE_RAW_BUFFER(0, 0);
-               ondisk->vol0_fifo_end = ondisk->vol0_fifo_beg;
+               ondisk->vol0_free_off = HAMMER_ENCODE_RAW_BUFFER(0, 0);
                ondisk->vol0_next_tid = createtid();
                ondisk->vol0_next_seq = 1;
+               format_blockmap(
+                       &ondisk->vol0_blockmap[HAMMER_ZONE_BTREE_INDEX],
+                       HAMMER_ZONE_BTREE);
+               format_blockmap(
+                       &ondisk->vol0_blockmap[HAMMER_ZONE_RECORD_INDEX],
+                       HAMMER_ZONE_RECORD);
+               format_blockmap(
+                       &ondisk->vol0_blockmap[HAMMER_ZONE_LARGE_DATA_INDEX],
+                       HAMMER_ZONE_LARGE_DATA);
+               format_blockmap(
+                       &ondisk->vol0_blockmap[HAMMER_ZONE_SMALL_DATA_INDEX],
+                       HAMMER_ZONE_SMALL_DATA);
 
                ondisk->vol0_btree_root = format_root();
                ++ondisk->vol0_stat_inodes;     /* root inode */
@@ -395,9 +413,7 @@ format_root(void)
        hammer_btree_elm_t elm;
 
        bnode = alloc_btree_element(&btree_off);
-       rec = alloc_record_element(&rec_off, HAMMER_RECTYPE_INODE,
-                                  sizeof(rec->inode), sizeof(*idata),
-                                  (void **)&idata);
+       rec = alloc_record_element(&rec_off, sizeof(*idata), (void **)&idata);
 
        /*
         * Populate the inode data and inode record for the root directory.
@@ -414,7 +430,7 @@ format_root(void)
        rec->base.base.obj_type = HAMMER_OBJTYPE_DIRECTORY;
        /* rec->base.data_offset - initialized by alloc_record_element */
        /* rec->base.data_len    - initialized by alloc_record_element */
-       rec->base.head.hdr_crc = crc32(idata, sizeof(*idata));
+       rec->base.data_crc = crc32(idata, sizeof(*idata));
        rec->inode.ino_atime  = rec->base.base.create_tid;
        rec->inode.ino_mtime  = rec->base.base.create_tid;
        rec->inode.ino_size   = 0;
@@ -432,7 +448,7 @@ format_root(void)
        elm->leaf.rec_offset = rec_off;
        elm->leaf.data_offset = rec->base.data_off;
        elm->leaf.data_len = rec->base.data_len;
-       elm->leaf.data_crc = rec->base.head.hdr_crc;
+       elm->leaf.data_crc = rec->base.data_crc;
        return(btree_off);
 }
 
index 67871b7..e18a2de 100644 (file)
@@ -1,5 +1,5 @@
 # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $
-# $DragonFly: src/sys/conf/files,v 1.203 2008/02/08 08:30:55 dillon Exp $
+# $DragonFly: src/sys/conf/files,v 1.204 2008/02/10 09:50:59 dillon Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -1139,9 +1139,10 @@ vfs/hammer/hammer_btree.c        optional hammer
 vfs/hammer/hammer_io.c         optional hammer
 vfs/hammer/hammer_transaction.c        optional hammer
 vfs/hammer/hammer_object.c     optional hammer
-vfs/hammer/hammer_spike.c      optional hammer
 vfs/hammer/hammer_recover.c    optional hammer
 vfs/hammer/hammer_ioctl.c      optional hammer
+vfs/hammer/hammer_blockmap.c   optional hammer
+vfs/hammer/hammer_freemap.c    optional hammer
 vm/default_pager.c             standard
 vm/device_pager.c              standard
 vm/phys_pager.c                        standard
index af5b718..afcffb0 100644 (file)
@@ -1,12 +1,12 @@
 #
-# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.7 2008/02/08 08:30:59 dillon Exp $
+# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.8 2008/02/10 09:51:01 dillon Exp $
 
 KMOD=  hammer
 SRCS=  hammer_vfsops.c hammer_vnops.c hammer_inode.c \
        hammer_subs.c hammer_ondisk.c hammer_io.c \
        hammer_cursor.c hammer_btree.c hammer_transaction.c \
-       hammer_object.c hammer_spike.c \
-       hammer_recover.c hammer_ioctl.c
+       hammer_object.c hammer_recover.c hammer_ioctl.c \
+       hammer_blockmap.c hammer_freemap.c
 
 NOMAN=
 
index 2ca209e..811c210 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.35 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.36 2008/02/10 09:51:01 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -217,7 +217,6 @@ struct hammer_record {
        union hammer_record_ondisk      rec;
        union hammer_data_ondisk        *data;
        int                             flags;
-       int                             rec_len;
        int                             blocked;
 };
 
@@ -226,7 +225,7 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_ALLOCDATA          0x0001
 #define HAMMER_RECF_ONRBTREE           0x0002
 #define HAMMER_RECF_DELETED            0x0004
-#define HAMMER_RECF_UNUSED0008         0x0008
+#define HAMMER_RECF_INBAND             0x0008
 #define HAMMER_RECF_SYNCING            0x0010
 #define HAMMER_RECF_WANTED             0x0020
 
@@ -287,7 +286,6 @@ typedef struct hammer_io *hammer_io_t;
 struct hammer_volume {
        struct hammer_io io;
        RB_ENTRY(hammer_volume) rb_node;
-       struct hammer_nod_rb_tree rb_nods_root;
        struct hammer_buf_rb_tree rb_bufs_root;
        struct hammer_volume_ondisk *ondisk;
        int32_t vol_no;
@@ -335,8 +333,8 @@ struct hammer_node {
        TAILQ_ENTRY(hammer_node) entry;         /* per-buffer linkage */
        RB_ENTRY(hammer_node)   rb_node;        /* per-cluster linkage */
        hammer_off_t            node_offset;    /* full offset spec */
+       struct hammer_mount     *hmp;
        struct hammer_buffer    *buffer;        /* backing buffer */
-       struct hammer_volume    *volume;        /* backing volume */
        hammer_node_ondisk_t    ondisk;         /* ptr to on-disk structure */
        struct hammer_node      **cache1;       /* passive cache(s) */
        struct hammer_node      **cache2;
@@ -383,6 +381,7 @@ struct hammer_mount {
        /*struct vnode *rootvp;*/
        struct hammer_ino_rb_tree rb_inos_root;
        struct hammer_vol_rb_tree rb_vols_root;
+       struct hammer_nod_rb_tree rb_nods_root;
        struct hammer_volume *rootvol;
        struct hammer_base_elm root_btree_beg;
        struct hammer_base_elm root_btree_end;
@@ -396,6 +395,7 @@ struct hammer_mount {
        hammer_tid_t asof;
        u_int32_t namekey_iterator;
        struct netexport export;
+       struct lock blockmap_lock;
 };
 
 typedef struct hammer_mount    *hammer_mount_t;
@@ -445,6 +445,7 @@ int hammer_install_volume(hammer_mount_t hmp, const char *volname);
 int    hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip);
 int    hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip);
 int    hammer_ip_next(hammer_cursor_t cursor);
+int    hammer_ip_resolve_record_and_data(hammer_cursor_t cursor);
 int    hammer_ip_resolve_data(hammer_cursor_t cursor);
 int    hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid);
 int    hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes);
@@ -455,7 +456,7 @@ int hammer_sync_volume(hammer_volume_t volume, void *data);
 int    hammer_sync_buffer(hammer_buffer_t buffer, void *data);
 
 hammer_record_t
-       hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len);
+       hammer_alloc_mem_record(hammer_inode_t ip);
 void   hammer_rel_mem_record(hammer_record_t record);
 
 int    hammer_cursor_up(hammer_cursor_t cursor);
@@ -550,20 +551,22 @@ void hammer_dup_buffer(struct hammer_buffer **bufferp,
 hammer_node_t hammer_alloc_btree(hammer_mount_t hmp, int *errorp);
 void *hammer_alloc_record(hammer_mount_t hmp,
                        hammer_off_t *rec_offp, u_int8_t rec_type,
-                       int32_t rec_len, struct hammer_buffer **rec_bufferp,
-                       hammer_off_t *data_offp, int32_t data_len,
-                       void **data1p, void **data2p, int32_t *data2_index,
-                       struct hammer_buffer **data2_bufferp,
-                       int *errorp);
-void hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset);
-void hammer_unwind_fifo(hammer_mount_t hmp, hammer_off_t rec_offset);
-void hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type);
+                       struct hammer_buffer **rec_bufferp,
+                       int32_t data_len, void **datap,
+                       struct hammer_buffer **data_bufferp, int *errorp);
 int hammer_generate_undo(hammer_mount_t hmp, hammer_off_t undo_offset,
                        void *base, int len);
 
 void hammer_put_volume(struct hammer_volume *volume, int flush);
 void hammer_put_buffer(struct hammer_buffer *buffer, int flush);
 
+hammer_off_t hammer_freemap_alloc(hammer_mount_t hmp, int *errorp);
+hammer_off_t hammer_blockmap_alloc(hammer_mount_t hmp, int zone,
+                       int bytes, int *errorp);
+int hammer_blockmap_free(hammer_mount_t hmp, hammer_off_t bmap_off, int bytes);
+hammer_off_t hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off,
+                       int *errorp);
+
 void hammer_start_transaction(struct hammer_transaction *trans,
                              struct hammer_mount *hmp);
 void hammer_start_transaction_tid(struct hammer_transaction *trans,
@@ -595,8 +598,6 @@ int  hammer_ip_sync_data(struct hammer_transaction *trans,
                        hammer_inode_t ip, int64_t offset,
                        void *data, int bytes);
 int  hammer_ip_sync_record(hammer_record_t rec);
-int  hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t rec,
-                       int32_t rec_len, void *data, int cursor_flags);
 
 int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                        struct ucred *cred);
diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c
new file mode 100644 (file)
index 0000000..517cb38
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.1 2008/02/10 09:51:01 dillon Exp $
+ */
+
+/*
+ * HAMMER blockmap
+ */
+#include "hammer.h"
+
+/*
+ * Allocate bytes from a zone
+ */
+hammer_off_t
+hammer_blockmap_alloc(hammer_mount_t hmp, int zone, int bytes, int *errorp)
+{
+       hammer_volume_t root_volume;
+       hammer_blockmap_entry_t rootmap;
+       hammer_blockmap_entry_t blockmap;
+       hammer_buffer_t buffer = NULL;
+       hammer_off_t alloc_offset;
+       hammer_off_t result_offset;
+       int32_t i;
+
+       KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
+       root_volume = hammer_get_root_volume(hmp, errorp);
+       if (*errorp)
+               return(0);
+       rootmap = &root_volume->ondisk->vol0_blockmap[zone];
+       KKASSERT(rootmap->phys_offset != 0);
+       KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
+                HAMMER_ZONE_RAW_BUFFER_INDEX);
+       KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
+
+       /*
+        * Deal with alignment and buffer-boundary issues.
+        *
+        * Be careful, certain primary alignments are used below to allocate
+        * new blockmap blocks.
+        */
+       bytes = (bytes + 7) & ~7;
+       KKASSERT(bytes <= HAMMER_BUFSIZE);
+
+       lockmgr(&hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY);
+       alloc_offset = rootmap->alloc_offset;
+       result_offset = alloc_offset + bytes;
+       if ((alloc_offset ^ (result_offset - 1)) & ~HAMMER_BUFMASK64) {
+               alloc_offset = (result_offset - 1) & ~HAMMER_BUFMASK64;
+       }
+
+       /*
+        * Dive layer 2, each entry is a layer-1 entry.  If we are at the
+        * start of a new entry, allocate a layer 1 large-block
+        */
+       i = (alloc_offset >> (HAMMER_LARGEBLOCK_BITS +
+            HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = hammer_bread(hmp, rootmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer);
+       KKASSERT(*errorp == 0);
+
+       if ((alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) {
+               hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap));
+               bzero(blockmap, sizeof(*blockmap));
+               blockmap->phys_offset = hammer_freemap_alloc(hmp, errorp);
+               KKASSERT(*errorp == 0);
+               kprintf("ALLOC LAYER2 %016llx\n", blockmap->phys_offset);
+       }
+#if 0
+       kprintf("blkmap_alloc %016llx [%2d@%016llx]", alloc_offset, i, blockmap->phys_offset);
+#endif
+       KKASSERT(blockmap->phys_offset);
+
+       /*
+        * Dive layer 1, each entry is a large-block.  If we are at the
+        * start of a new entry, allocate a large-block.
+        */
+       i = (alloc_offset >> HAMMER_LARGEBLOCK_BITS) &
+           HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = hammer_bread(hmp, blockmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer);
+       KKASSERT(*errorp == 0);
+
+       if ((alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
+               hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap));
+               /* XXX rootmap changed */
+               bzero(blockmap, sizeof(*blockmap));
+               blockmap->phys_offset = hammer_freemap_alloc(hmp, errorp);
+               blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE;
+               KKASSERT(*errorp == 0);
+               kprintf("ALLOC LAYER1 %016llx\n", blockmap->phys_offset);
+       }
+
+       hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap));
+       blockmap->bytes_free -= bytes;
+#if 0
+       kprintf("[%2d@%016llx] free=%d phys %016llx\n", i, blockmap->phys_offset, blockmap->bytes_free, blockmap->phys_offset + (result_offset & HAMMER_LARGEBLOCK_MASK64));
+#endif
+
+       hammer_modify_volume(root_volume, &rootmap->alloc_offset,
+                            sizeof(rootmap->alloc_offset));
+       result_offset = alloc_offset;
+       rootmap->alloc_offset = alloc_offset + bytes;
+
+       /*
+        * Calling bnew on the buffer backing the allocation gets it into
+        * the system without a disk read.
+        *
+        * XXX This can only be done when appending into a new buffer.
+        */
+       if (((int32_t)result_offset & HAMMER_BUFMASK) == 0) {
+               hammer_bnew(hmp, blockmap->phys_offset + (result_offset & HAMMER_LARGEBLOCK_MASK64), errorp, &buffer);
+       }
+
+       if (buffer)
+               hammer_rel_buffer(buffer, 0);
+       hammer_rel_volume(root_volume, 0);
+       lockmgr(&hmp->blockmap_lock, LK_RELEASE);
+       return(result_offset);
+}
+
+/*
+ * Free (offset,bytes) in a zone
+ */
+int
+hammer_blockmap_free(hammer_mount_t hmp, hammer_off_t bmap_off, int bytes)
+{
+       kprintf("hammer_blockmap_free %016llx %d\n", bmap_off, bytes);
+       return(0);
+}
+
+/*
+ * Lookup a blockmap offset.
+ */
+hammer_off_t
+hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp)
+{
+       hammer_volume_t root_volume;
+       hammer_blockmap_entry_t rootmap;
+       hammer_blockmap_entry_t blockmap;
+       hammer_buffer_t buffer = NULL;
+       hammer_off_t result_offset;
+       int zone;
+       int i;
+
+       zone = HAMMER_ZONE_DECODE(bmap_off);
+       KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
+       root_volume = hammer_get_root_volume(hmp, errorp);
+       if (*errorp)
+               return(0);
+       rootmap = &root_volume->ondisk->vol0_blockmap[zone];
+       KKASSERT(rootmap->phys_offset != 0);
+       KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) ==
+                HAMMER_ZONE_RAW_BUFFER_INDEX);
+       KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone);
+
+       if (bmap_off >= rootmap->alloc_offset) {
+               panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx",
+                     bmap_off, rootmap->alloc_offset);
+               result_offset = 0;
+               goto done;
+       }
+
+       /*
+        * Dive layer 2, each entry is a layer-1 entry.  If we are at the
+        * start of a new entry, allocate a layer 1 large-block
+        */
+       i = (bmap_off >> (HAMMER_LARGEBLOCK_BITS +
+            HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = hammer_bread(hmp, rootmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer);
+       KKASSERT(*errorp == 0);
+       KKASSERT(blockmap->phys_offset);
+
+       /*
+        * Dive layer 1, entry entry is a large-block.  If we are at the
+        * start of a new entry, allocate a large-block.
+        */
+       i = (bmap_off >> HAMMER_LARGEBLOCK_BITS) & HAMMER_BLOCKMAP_RADIX_MASK;
+
+       blockmap = hammer_bread(hmp, blockmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer);
+       KKASSERT(*errorp == 0);
+       KKASSERT(blockmap->phys_offset);
+       result_offset = blockmap->phys_offset +
+                       (bmap_off & HAMMER_LARGEBLOCK_MASK64);
+done:
+       if (buffer)
+               hammer_rel_buffer(buffer, 0);
+       hammer_rel_volume(root_volume, 0);
+       if (hammer_debug_general & 0x0800) {
+               kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
+                       bmap_off, result_offset);
+       }
+       return(result_offset);
+}
+
index 812c044..20318b8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.29 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.30 2008/02/10 09:51:01 dillon Exp $
  */
 
 /*
@@ -569,7 +569,6 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        hammer_btree_elm_t elm;
        hammer_off_t rec_off;
        hammer_off_t data_off;
-       hammer_off_t data_end;
        int error;
 
        /*
@@ -578,10 +577,8 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
         */
        node = cursor->node->ondisk;
        elm = &node->elms[cursor->index];
-       cursor->data1 = NULL;
-       cursor->data2 = NULL;
-       cursor->data_split = 0;
-       hmp = cursor->node->volume->hmp;
+       cursor->data = NULL;
+       hmp = cursor->node->hmp;
        flags |= cursor->flags & HAMMER_CURSOR_DATAEXTOK;
 
        /*
@@ -597,7 +594,6 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
                flags &= ~HAMMER_CURSOR_GET_DATA;
        data_off = elm->leaf.data_offset;
-       data_end = data_off + elm->leaf.data_len - 1;
        if (data_off == 0)
                flags &= ~HAMMER_CURSOR_GET_DATA;
        rec_off = elm->leaf.rec_offset;
@@ -618,44 +614,17 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) {
                if ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) {
                        /*
-                        * The data is not in the same buffer as the last
-                        * record we cached, but it could still be embedded
-                        * in a record.  Note that we may not have loaded the
-                        * record's buffer above, depending on flags.
-                        *
-                        * Assert that the data does not cross into additional
-                        * buffers.
+                        * Data and record are in different buffers.
                         */
-                       cursor->data_split = 0;
-                       cursor->data2 = hammer_bread(hmp, data_off,
-                                                 &error, &cursor->data_buffer);
-                       KKASSERT(((data_off ^ data_end) &
-                                ~HAMMER_BUFMASK64) == 0);
+                       cursor->data = hammer_bread(hmp, data_off, &error,
+                                                   &cursor->data_buffer);
                } else {
                        /*
-                        * The data starts in same buffer as record.  Check
-                        * to determine if the data extends into another
-                        * buffer.
+                        * Data resides in same buffer as record.
                         */
-                       cursor->data1 = (void *)
+                       cursor->data = (void *)
                                ((char *)cursor->record_buffer->ondisk +
                                ((int32_t)data_off & HAMMER_BUFMASK));
-                       if ((data_off ^ data_end) & ~HAMMER_BUFMASK64) {
-                               cursor->data_split = HAMMER_BUFSIZE -
-                                       ((int32_t)data_off & HAMMER_BUFMASK);
-                               if (flags & HAMMER_CURSOR_DATAEXTOK) {
-                                       /*
-                                        * NOTE: Assumes data buffer does not
-                                        * cross a volume boundary.
-                                        */
-                                       cursor->data2 = hammer_bread(hmp, data_off + cursor->data_split,
-                                                                 &error, &cursor->data_buffer);
-                               } else {
-                                       panic("Illegal data extension");
-                               }
-                       } else {
-                               cursor->data_split = elm->leaf.data_len;
-                       }
                }
        }
        return(error);
@@ -1292,7 +1261,7 @@ btree_split_internal(hammer_cursor_t cursor)
        split = (ondisk->count + 1) / 2;
        if (cursor->index <= split)
                --split;
-       hmp = node->volume->hmp;
+       hmp = node->hmp;
 
        /*
         * If we are at the root of the filesystem, create a new root node
@@ -1506,7 +1475,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        if (cursor->index <= split)
                --split;
        error = 0;
-       hmp = leaf->volume->hmp;
+       hmp = leaf->hmp;
 
        elm = &ondisk->elms[split];
 
@@ -2044,7 +2013,7 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
        switch(elm->base.btype) {
        case HAMMER_BTREE_TYPE_INTERNAL:
        case HAMMER_BTREE_TYPE_LEAF:
-               child = hammer_get_node(node->volume->hmp,
+               child = hammer_get_node(node->hmp,
                                        elm->internal.subtree_offset, &error);
                if (error == 0) {
                        hammer_modify_node(child);
@@ -2090,7 +2059,7 @@ hammer_btree_lock_children(hammer_cursor_t cursor,
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_INTERNAL:
                case HAMMER_BTREE_TYPE_LEAF:
-                       child = hammer_get_node(node->volume->hmp,
+                       child = hammer_get_node(node->hmp,
                                                elm->internal.subtree_offset,
                                                &error);
                        break;
index 08b09de..5dc0e91 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.11 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.12 2008/02/10 09:51:01 dillon Exp $
  */
 
 /*
@@ -150,9 +150,9 @@ union hammer_btree_elm {
 typedef union hammer_btree_elm *hammer_btree_elm_t;
 
 /*
- * B-Tree node (normal or meta)
+ * B-Tree node (normal or meta)        (16x64 = 1K structure)
  *
- * Each node contains 14 elements.  The last element for an internal node
+ * Each node contains 15 elements.  The last element for an internal node
  * is the right-boundary so internal nodes have one fewer logical elements
  * then leaf nodes.
  *
@@ -169,7 +169,7 @@ typedef union hammer_btree_elm *hammer_btree_elm_t;
  * reserved for left/right leaf linkage fields, flags, and other future
  * features.
  */
-#define HAMMER_BTREE_LEAF_ELMS 14
+#define HAMMER_BTREE_LEAF_ELMS 15
 #define HAMMER_BTREE_INT_ELMS  (HAMMER_BTREE_LEAF_ELMS - 1)
 
 /*
@@ -187,7 +187,8 @@ struct hammer_node_ondisk {
        /*
         * B-Tree node header (64 bytes)
         */
-       struct hammer_fifo_head head;
+       u_int32_t       signature;
+       u_int32_t       crc;
        hammer_off_t    parent;         /* 0 if at root of cluster */
        int32_t         count;
        u_int8_t        type;
@@ -197,6 +198,7 @@ struct hammer_node_ondisk {
        hammer_off_t    reserved04;     /* future link_right */
        hammer_off_t    reserved05;
        hammer_off_t    reserved06;
+       hammer_off_t    reserved07;
 
        /*
         * Element array.  Internal nodes have one less logical element
index db3c603..eddfde2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.17 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.18 2008/02/10 09:51:01 dillon Exp $
  */
 
 /*
@@ -142,9 +142,7 @@ hammer_done_cursor(hammer_cursor_t cursor)
                cursor->deadlk_node = NULL;
        }
 
-       cursor->data1 = NULL;
-       cursor->data2 = NULL;
-       cursor->data_split = 0;
+       cursor->data = NULL;
        cursor->record = NULL;
        cursor->left_bound = NULL;
        cursor->right_bound = NULL;
@@ -241,11 +239,11 @@ hammer_load_cursor_parent(hammer_cursor_t cursor)
        int error;
        int i;
 
-       hmp = cursor->node->volume->hmp;
+       hmp = cursor->node->hmp;
 
        if (cursor->node->ondisk->parent) {
                node = cursor->node;
-               parent = hammer_get_node(node->volume->hmp,
+               parent = hammer_get_node(node->hmp,
                                         node->ondisk->parent, &error);
                if (error)
                        return(error);
@@ -354,11 +352,10 @@ hammer_cursor_down(hammer_cursor_t cursor)
                KKASSERT(elm->internal.subtree_offset != 0);
                cursor->left_bound = &elm[0].internal.base;
                cursor->right_bound = &elm[1].internal.base;
-               node = hammer_get_node(node->volume->hmp,
-                                      elm->internal.subtree_offset,
+               node = hammer_get_node(node->hmp, elm->internal.subtree_offset,
                                       &error);
                if (error == 0) {
-                       KKASSERT(elm->base.btype == node->ondisk->type);
+                       KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node));
                        if (node->ondisk->parent != cursor->parent->node_offset)
                                panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset);
                        KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
index 795d5d3..adedcb6 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.12 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.13 2008/02/10 09:51:01 dillon Exp $
  */
 
 /*
@@ -92,12 +92,10 @@ struct hammer_cursor {
         * can be NULL when data and/or record is not, typically indicating
         * information referenced via an in-memory record.
         */
-       struct hammer_buffer *record_buffer;    /* record+data */
+       struct hammer_buffer *record_buffer;    /* record (+ built-in data) */
        struct hammer_buffer *data_buffer;      /* extended data */
        union hammer_record_ondisk *record;
-       union hammer_data_ondisk *data1;
-       union hammer_data_ondisk *data2;
-       int     data_split;                     /* data split point if any */
+       union hammer_data_ondisk *data;
 
        /*
         * Iteration and extraction control variables
index 3692ad9..d88a556 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.22 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.23 2008/02/10 09:51:01 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
  * I/O is done in multiples of 16K.  Most buffer-sized headers such as those
  * used by volumes, super-clusters, clusters, and basic filesystem buffers
  * use fixed-sized A-lists which are heavily dependant on HAMMER_BUFSIZE.
+ *
+ * Per-volume storage limit: 52 bits           4096 TB
+ * Per-Zone storage limit: 59 bits             512 KTB (due to blockmap)
+ * Per-filesystem storage limit: 60 bits       1 MTB
  */
-#define HAMMER_BUFSIZE 16384
-#define HAMMER_BUFMASK (HAMMER_BUFSIZE - 1)
-#define HAMMER_MAXDATA (256*1024)
+#define HAMMER_BUFSIZE         16384
+#define HAMMER_BUFMASK         (HAMMER_BUFSIZE - 1)
+#define HAMMER_MAXDATA         (256*1024)
+#define HAMMER_BUFFER_BITS     14
+
+#if (1 << HAMMER_BUFFER_BITS) != HAMMER_BUFSIZE
+#error "HAMMER_BUFFER_BITS BROKEN"
+#endif
 
 #define HAMMER_BUFSIZE64       ((u_int64_t)HAMMER_BUFSIZE)
 #define HAMMER_BUFMASK64       ((u_int64_t)HAMMER_BUFMASK)
@@ -104,16 +113,45 @@ typedef u_int64_t hammer_off_t;
  * zone 0 (z,v,o):     reserved (for sanity)
  * zone 1 (z,v,o):     raw volume relative (offset 0 is the volume header)
  * zone 2 (z,v,o):     raw buffer relative (offset 0 is the first buffer)
- * zone 3-15     :     reserved
+ * zone 3 (z,o):       undo fifo       - blockmap backed
+ *
+ * zone 8 (z,o):       B-Tree          - blkmap-backed
+ * zone 9 (z,o):       Record          - blkmap-backed
+ * zone 10 (z,o):      Large-data      - blkmap-backed
  */
 
 #define HAMMER_ZONE_RAW_VOLUME         0x1000000000000000ULL
 #define HAMMER_ZONE_RAW_BUFFER         0x2000000000000000ULL
+#define HAMMER_ZONE_UNDO               0x3000000000000000ULL
+#define HAMMER_ZONE_RESERVED04         0x4000000000000000ULL
+#define HAMMER_ZONE_RESERVED05         0x5000000000000000ULL
+#define HAMMER_ZONE_RESERVED06         0x6000000000000000ULL
+#define HAMMER_ZONE_RESERVED07         0x7000000000000000ULL
+#define HAMMER_ZONE_BTREE              0x8000000000000000ULL
+#define HAMMER_ZONE_RECORD             0x9000000000000000ULL
+#define HAMMER_ZONE_LARGE_DATA         0xA000000000000000ULL
+#define HAMMER_ZONE_SMALL_DATA         0xB000000000000000ULL
+#define HAMMER_ZONE_RESERVED0C         0xC000000000000000ULL
+#define HAMMER_ZONE_RESERVED0D         0xD000000000000000ULL
+#define HAMMER_ZONE_RESERVED0E         0xE000000000000000ULL
+#define HAMMER_ZONE_RESERVED0F         0xF000000000000000ULL
+
+#define HAMMER_ZONE_RAW_VOLUME_INDEX   1
+#define HAMMER_ZONE_RAW_BUFFER_INDEX   2
+#define HAMMER_ZONE_UNDO_INDEX         3
+#define HAMMER_ZONE_BTREE_INDEX                8
+#define HAMMER_ZONE_RECORD_INDEX       9
+#define HAMMER_ZONE_LARGE_DATA_INDEX   10
+#define HAMMER_ZONE_SMALL_DATA_INDEX   11
+
+#define HAMMER_MAX_ZONES               16
 
 #define HAMMER_VOL_ENCODE(vol_no)                      \
        ((hammer_off_t)((vol_no) & 255) << 52)
 #define HAMMER_VOL_DECODE(ham_off)                     \
        (int32_t)(((hammer_off_t)(ham_off) >> 52) & 255)
+#define HAMMER_ZONE_DECODE(ham_off)                    \
+       (int32_t)(((hammer_off_t)(ham_off) >> 60))
 #define HAMMER_SHORT_OFF_ENCODE(offset)                        \
        ((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK)
 #define HAMMER_LONG_OFF_ENCODE(offset)                 \
@@ -129,12 +167,64 @@ typedef u_int64_t hammer_off_t;
        HAMMER_VOL_ENCODE(vol_no) |                     \
        HAMMER_SHORT_OFF_ENCODE(offset))
 
+/*
+ * Large-Block backing store
+ *
+ * A blockmap is a two-level map which translates a blockmap-backed zone
+ * offset into a raw zone 2 offset.  Each layer handles 18 bits.  The 8M
+ * large-block size is 23 bits so two layers gives us 23+18+18 = 59 bits
+ * of address space.
+ */
+#define HAMMER_LARGEBLOCK_SIZE         (8192 * 1024)
+#define HAMMER_LARGEBLOCK_MASK         (HAMMER_LARGEBLOCK_SIZE - 1)
+#define HAMMER_LARGEBLOCK_MASK64       ((u_int64_t)HAMMER_LARGEBLOCK_SIZE - 1)
+#define HAMMER_LARGEBLOCK_BITS         23
+#if (1 << HAMMER_LARGEBLOCK_BITS) != HAMMER_LARGEBLOCK_SIZE
+#error "HAMMER_LARGEBLOCK_BITS BROKEN"
+#endif
+
+#define HAMMER_BUFFERS_PER_LARGEBLOCK                  \
+       (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE)
+#define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK             \
+       (HAMMER_BUFFERS_PER_LARGEBLOCK - 1)
+#define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK64           \
+       ((hammer_off_t)HAMMER_BUFFERS_PER_LARGEBLOCK_MASK)
+
+#define HAMMER_BLOCKMAP_RADIX                          \
+       (HAMMER_LARGEBLOCK_SIZE / sizeof(struct hammer_blockmap_entry))
+#define HAMMER_BLOCKMAP_RADIX_MASK                     \
+       (HAMMER_BLOCKMAP_RADIX - 1)
+#define HAMMER_BLOCKMAP_BITS           18
+#if (1 << HAMMER_BLOCKMAP_BITS) != (HAMMER_LARGEBLOCK_SIZE / 32)
+#error "HAMMER_BLOCKMAP_BITS BROKEN"
+#endif
+
+#define HAMMER_LARGEBLOCK_LAYER1                       \
+       ((hammer_off_t)HAMMER_LARGEBLOCK_SIZE * HAMMER_BLOCKMAP_RADIX)
+#define HAMMER_LARGEBLOCK_LAYER2                       \
+       (HAMMER_LARGEBLOCK_LAYER1 * HAMMER_BLOCKMAP_RADIX)
+
+#define HAMMER_LARGEBLOCK_LAYER1_MASK  (HAMMER_LARGEBLOCK_LAYER1 - 1)
+#define HAMMER_LARGEBLOCK_LAYER2_MASK  (HAMMER_LARGEBLOCK_LAYER2 - 1)
+
+struct hammer_blockmap_entry {
+       hammer_off_t    phys_offset;    /* zone-2 physical offset */
+       int32_t         bytes_free;     /* bytes free within the big-block */
+       u_int32_t       entry_crc;
+       u_int32_t       reserved01;
+       u_int32_t       reserved02;
+       hammer_off_t    alloc_offset;   /* zone-X logical offset */
+};
+
+typedef struct hammer_blockmap_entry *hammer_blockmap_entry_t;
 
 /*
  * All on-disk HAMMER structures which make up elements of the FIFO contain
- * a hammer_fifo_head structure.  This structure contains all the information
- * required to validate the fifo element and to scan the fifo in either
- * direction.
+ * a hammer_fifo_head and hammer_fifo_tail structure.  This structure
+ * contains all the information required to validate the fifo element
+ * and to scan the fifo in either direction.  The head is typically embedded
+ * in higher level hammer on-disk structures while the tail is typically
+ * out-of-band.  hdr_size is the size of the whole mess, including the tail.
  *
  * Nearly all such structures are guaranteed to not cross a 16K filesystem
  * buffer boundary.  The one exception is a record, whos related data may
@@ -144,38 +234,49 @@ typedef u_int64_t hammer_off_t;
  * (i.e. the base of the buffer will not be in the middle of a data record).
  * This is used to allow the recovery code to re-sync after hitting corrupted
  * data.
+ *
+ * PAD elements are allowed to take up only 8 bytes of space as a special
+ * case, containing only hdr_signature, hdr_type, and hdr_size fields,
+ * and with the tail overloaded onto the head structure for 8 bytes total.
  */
-#define HAMMER_HEAD_ONDISK_SIZE                32
+#define HAMMER_HEAD_ONDISK_SIZE                24
 #define HAMMER_HEAD_RECOVERY_ALIGNMENT  (16 * 1024 * 1024)
-#define HAMMER_HEAD_ALIGN              32
+#define HAMMER_HEAD_ALIGN              8
 #define HAMMER_HEAD_ALIGN_MASK         (HAMMER_HEAD_ALIGN - 1)
+#define HAMMER_TAIL_ONDISK_SIZE                8
 
 struct hammer_fifo_head {
        u_int16_t hdr_signature;
        u_int16_t hdr_type;
-       u_int32_t hdr_fwd_link;
-       u_int32_t hdr_rev_link;
+       u_int32_t hdr_size;     /* aligned size of the whole mess */
        u_int32_t hdr_crc;
-       hammer_tid_t hdr_seq;
-       hammer_tid_t hdr_tid;
+       u_int32_t hdr_reserved02;
+       hammer_tid_t hdr_seq;   /* related sequence number */
+};
+
+struct hammer_fifo_tail {
+       u_int16_t tail_signature;
+       u_int16_t tail_type;
+       u_int32_t tail_size;    /* aligned size of the whole mess */
 };
 
 typedef struct hammer_fifo_head *hammer_fifo_head_t;
+typedef struct hammer_fifo_tail *hammer_fifo_tail_t;
 
 /*
  * Fifo header types.
  */
-#define HAMMER_HEAD_TYPE_PAD   0xF000U         /* FIFO pad (also FREED) */
-#define HAMMER_HEAD_TYPE_VOL   0x7001U         /* Volume (dummy header) */
-#define HAMMER_HEAD_TYPE_BTREE 0x7002U         /* B-Tree node */
-#define HAMMER_HEAD_TYPE_UNDO  0x7003U         /* random UNDO information */
-#define HAMMER_HEAD_TYPE_DELETE        0x7004U         /* record deletion */
-#define HAMMER_HEAD_TYPE_RECORD        0x7005U         /* Filesystem record */
-#define HAMMER_HEAD_TYPE_TERM  0x7009U         /* Dummy Terminator */
+#define HAMMER_HEAD_TYPE_PAD   (0x0040U|HAMMER_HEAD_FLAG_FREE)
+#define HAMMER_HEAD_TYPE_VOL   0x0041U         /* Volume (dummy header) */
+#define HAMMER_HEAD_TYPE_BTREE 0x0042U         /* B-Tree node */
+#define HAMMER_HEAD_TYPE_UNDO  0x0043U         /* random UNDO information */
+#define HAMMER_HEAD_TYPE_DELETE        0x0044U         /* record deletion */
+#define HAMMER_HEAD_TYPE_RECORD        0x0045U         /* Filesystem record */
 
-#define HAMMER_HEAD_TYPEF_FREED        0x8000U         /* Indicates object freed */
+#define HAMMER_HEAD_FLAG_FREE  0x8000U         /* Indicates object freed */
 
 #define HAMMER_HEAD_SIGNATURE  0xC84EU
+#define HAMMER_TAIL_SIGNATURE  0xC74FU
 
 /*
  * Misc FIFO structures (except for the B-Tree node and hammer record)
@@ -224,11 +325,6 @@ typedef struct hammer_fifo_undo *hammer_fifo_undo_t;
  *     any records remaining in memory can be flushed to the memory log
  *     area.  This allows the kernel to immediately return success.
  */
-#define HAMMER_VOL_MAXCLUSTERS         32768   /* 1-layer */
-#define HAMMER_VOL_MAXSUPERCLUSTERS    4096    /* 2-layer */
-#define HAMMER_VOL_SUPERCLUSTER_GROUP  16
-#define HAMMER_VOL_METAELMS_1LYR       HAMMER_ALIST_METAELMS_32K_1LYR
-#define HAMMER_VOL_METAELMS_2LYR       HAMMER_ALIST_METAELMS_16K_2LYR
 
 #define HAMMER_BOOT_MINBYTES           (32*1024)
 #define HAMMER_BOOT_NOMBYTES           (64LL*1024*1024)
@@ -239,7 +335,8 @@ typedef struct hammer_fifo_undo *hammer_fifo_undo_t;
 #define HAMMER_MEM_MAXBYTES            (64LL*1024*1024*1024)
 
 struct hammer_volume_ondisk {
-       struct hammer_fifo_head head;
+       u_int64_t vol_signature;/* Signature */
+
        int64_t vol_bot_beg;    /* byte offset of boot area or 0 */
        int64_t vol_mem_beg;    /* byte offset of memory log or 0 */
        int64_t vol_buf_beg;    /* byte offset of first buffer in volume */
@@ -250,7 +347,6 @@ struct hammer_volume_ondisk {
        uuid_t    vol_fstype;   /* identify filesystem type */
        char      vol_name[64]; /* Name of volume */
 
-       u_int64_t vol_signature;/* Signature #2 */
        int32_t vol_no;         /* volume number within filesystem */
        int32_t vol_count;      /* number of volumes making up FS */
 
@@ -259,7 +355,7 @@ struct hammer_volume_ondisk {
        u_int32_t vol_flags;    /* volume flags */
        u_int32_t vol_rootvol;  /* which volume is the root volume? */
 
-       int32_t vol_reserved04; /* cluster size (same for all volumes) */
+       int32_t vol_reserved04;
        int32_t vol_reserved05;
        u_int32_t vol_reserved06;
        u_int32_t vol_reserved07;
@@ -268,6 +364,14 @@ struct hammer_volume_ondisk {
        int32_t vol_reserved08;
        int64_t vol_nblocks;            /* total allocatable hammer bufs */
 
+       /*
+        * bigblock freemap. 
+        *
+        * XXX not implemented yet, just use a sequential index at
+        * the moment.
+        */
+       hammer_off_t vol0_free_off;
+
        /*
         * These fields are initialized and space is reserved in every
         * volume making up a HAMMER filesytem, but only the master volume
@@ -276,11 +380,15 @@ struct hammer_volume_ondisk {
        int64_t vol0_stat_bytes;        /* for statfs only */
        int64_t vol0_stat_inodes;       /* for statfs only */
        int64_t vol0_stat_records;      /* total records in filesystem */
-       hammer_off_t vol0_fifo_beg;     /* CIRCULAR FIFO START */
-       hammer_off_t vol0_fifo_end;     /* CIRCULAR FIFO END */
        hammer_off_t vol0_btree_root;   /* B-Tree root */
        hammer_tid_t vol0_next_tid;     /* highest synchronized TID */
-       hammer_tid_t vol0_next_seq;     /* next SEQ no */
+       hammer_tid_t vol0_next_seq;     /* next SEQ no for undo */
+
+       /*
+        * Blockmaps for zones.  Not all zones use a blockmap.
+        */
+       struct hammer_blockmap_entry vol0_blockmap[HAMMER_MAX_ZONES];
+
 };
 
 typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t;
@@ -289,20 +397,20 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t;
 #define HAMMER_VOLF_OPEN               0x0002  /* volume is open */
 
 /*
- * All HAMMER records have a common 72-byte base and a variable-length
- * extension, plus a possible data reference.  The data portion of the
- * HAMMER record can cross a filesystem buffer boundary (but not the primary
- * record portion).
- *
- * Current only relative in-band data offsets are supported, but the field
- * is large enough for future out-of-band references.
+ * All HAMMER records have a common 64-byte base and a 32 byte extension,
+ * plus a possible data reference.  The data reference can be in-band or
+ * out-of-band.
  */
+
+#define HAMMER_RECORD_SIZE             (64+32)
+
 struct hammer_base_record {
-       struct hammer_fifo_head head;   /* 16 byte fifo header */
+       u_int32_t       signature;      /* record signature */
+       u_int32_t       data_crc;       /* data crc */
        struct hammer_base_elm base;    /* 40 byte base element */
        hammer_off_t    data_off;       /* in-band or out-of-band */
        int32_t         data_len;       /* size of data in bytes */
-       u_int32_t       reserved03;
+       u_int32_t       reserved02;
 };
 
 /*
@@ -407,6 +515,7 @@ struct hammer_inode_record {
  */
 struct hammer_data_record {
        struct hammer_base_record base;
+       char    data[32];
 };
 
 /*
@@ -431,6 +540,7 @@ struct hammer_entry_record {
        struct hammer_base_record base;
        u_int64_t obj_id;               /* object being referenced */
        u_int64_t reserved01;
+       char    name[16];
 };
 
 /*
diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c
new file mode 100644 (file)
index 0000000..7560d0f
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.1 2008/02/10 09:51:01 dillon Exp $
+ */
+
+/*
+ * HAMMER freemap - bigblock allocator
+ */
+#include "hammer.h"
+
+hammer_off_t
+hammer_freemap_alloc(hammer_mount_t hmp, int *errorp)
+{
+       hammer_volume_t root_volume;
+       hammer_volume_ondisk_t ondisk;
+       hammer_off_t raw_offset;
+
+       root_volume = hammer_get_root_volume(hmp, errorp);
+       if (*errorp)
+               return(0);
+       ondisk = root_volume->ondisk;
+
+       hammer_modify_volume(root_volume, &ondisk->vol0_free_off,
+                            sizeof(ondisk->vol0_free_off));
+       raw_offset = ondisk->vol0_free_off;
+       ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE;
+       KKASSERT(ondisk->vol0_free_off <= root_volume->maxbuf_off);
+       hammer_rel_volume(root_volume, 0);
+       return(raw_offset);
+}
+
index 458904d..8989564 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.29 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.30 2008/02/10 09:51:01 dillon Exp $
  */
 
 #include "hammer.h"
@@ -237,7 +237,7 @@ retry:
         */
        if (*errorp == 0) {
                ip->ino_rec = cursor.record->inode;
-               ip->ino_data = cursor.data1->inode;
+               ip->ino_data = cursor.data->inode;
                hammer_cache_node(cursor.node, &ip->cache[0]);
                if (cache)
                        hammer_cache_node(cursor.node, cache);
@@ -417,7 +417,7 @@ retry:
         * will remain set and prevent further updates.
         */
        if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { 
-               record = hammer_alloc_mem_record(ip, sizeof(struct hammer_inode_record));
+               record = hammer_alloc_mem_record(ip);
                record->rec.inode = ip->ino_rec;
                record->rec.inode.base.base.create_tid = last_tid;
                record->rec.inode.base.data_len = sizeof(ip->ino_data);
index 34e14e5..1bdc0b3 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.29 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.30 2008/02/10 09:51:01 dillon Exp $
  */
 
 #include "hammer.h"
@@ -133,7 +133,7 @@ RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node,
  * returned referenced.
  */
 hammer_record_t
-hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len)
+hammer_alloc_mem_record(hammer_inode_t ip)
 {
        hammer_record_t record;
 
@@ -141,7 +141,6 @@ hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len)
        record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO);
        record->ip = ip;
        record->rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
-       record->rec_len = rec_len;
        hammer_ref(&record->lock);
        return (record);
 }
@@ -352,7 +351,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
        int error;
        int bytes;
 
-       record = hammer_alloc_mem_record(dip, sizeof(struct hammer_entry_record));
+       record = hammer_alloc_mem_record(dip);
 
        bytes = ncp->nc_nlen;   /* NOTE: terminating \0 is NOT included */
        if (++trans->hmp->namekey_iterator == 0)
@@ -456,9 +455,7 @@ hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip,
        hammer_record_ondisk_t rec;
        union hammer_btree_elm elm;
        hammer_off_t rec_offset;
-       hammer_off_t data_offset;
-       void *bdata1, *bdata2;
-       int32_t data2_index;
+       void *bdata;
        int error;
 
        KKASSERT((offset & HAMMER_BUFMASK) == 0);
@@ -494,12 +491,13 @@ retry:
         * can cross buffer boundaries so we may have to split our bcopy.
         */
        rec = hammer_alloc_record(ip->hmp, &rec_offset, HAMMER_RECTYPE_DATA,
-                                 sizeof(rec->data), &cursor.record_buffer,
-                                 &data_offset, bytes, 
-                                 &bdata1, &bdata2, &data2_index,
+                                 &cursor.record_buffer,
+                                 bytes, &bdata,
                                  &cursor.data_buffer, &error);
        if (rec == NULL)
                goto done;
+       if (hammer_debug_general & 0x1000)
+               kprintf("OOB RECOR2 DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, rec->base.data_len);
 
        /*
         * Fill everything in and insert our B-Tree node.
@@ -514,22 +512,16 @@ retry:
        rec->base.base.create_tid = trans->tid;
        rec->base.base.delete_tid = 0;
        rec->base.base.rec_type = HAMMER_RECTYPE_DATA;
-       rec->base.head.hdr_crc = crc32(data, bytes);
-       KKASSERT(rec->base.data_off == data_offset);
+       rec->base.data_crc = crc32(data, bytes);
        KKASSERT(rec->base.data_len == bytes);
 
-       if (data2_index < bytes) {
-               bcopy(data, bdata1, data2_index);
-               bcopy((char *)data + data2_index, bdata2, bytes - data2_index);
-       } else {
-               bcopy(data, bdata1, bytes);
-       }
+       bcopy(data, bdata, bytes);
 
        elm.leaf.base = rec->base.base;
        elm.leaf.rec_offset = rec_offset;
        elm.leaf.data_offset = rec->base.data_off;
        elm.leaf.data_len = bytes;
-       elm.leaf.data_crc = rec->base.head.hdr_crc;
+       elm.leaf.data_crc = rec->base.data_crc;
 
        /*
         * Data records can wind up on-disk before the inode itself is
@@ -542,11 +534,7 @@ retry:
        if (error == 0)
                goto done;
 
-       /*
-        * If we fail we may be able to unwind the allocation.
-        */
-       rec->base.head.hdr_type |= HAMMER_HEAD_TYPEF_FREED;
-       hammer_unwind_fifo(ip->hmp, rec_offset);
+       hammer_blockmap_free(ip->hmp, rec_offset, HAMMER_RECORD_SIZE);
 done:
        hammer_done_cursor(&cursor);
        if (error == EDEADLK)
@@ -555,7 +543,7 @@ done:
 }
 
 /*
- * Sync an in-memory record to the disk.  this is typically called via fsync
+ * Sync an in-memory record to the disk.  This is typically called via fsync
  * from a cached record source.  This code is responsible for actually
  * writing a record out to the disk.
  */
@@ -567,9 +555,7 @@ hammer_ip_sync_record(hammer_record_t record)
        hammer_mount_t hmp;
        union hammer_btree_elm elm;
        hammer_off_t rec_offset;
-       hammer_off_t data_offset;
-       void *bdata1;
-       int32_t alloc_data_len;
+       void *bdata;
        int error;
 
        hmp = record->ip->hmp;
@@ -653,19 +639,33 @@ retry:
         * marked as being modified and further calls to
         * hammer_modify_buffer() will result in unneeded UNDO records.
         *
-        * Support zero-fill records.
+        * Support zero-fill records (data == NULL and data_len != 0)
         */
-       if (record->data == NULL)
-               alloc_data_len = 0;
-       else
-               alloc_data_len = record->rec.base.data_len;
-
-       rec = hammer_alloc_record(hmp, &rec_offset,
-                                 record->rec.base.base.rec_type,
-                                 record->rec_len, &cursor.record_buffer,
-                                 &data_offset, alloc_data_len,
-                                 &bdata1, NULL, NULL,
-                                 NULL, &error);
+       if (record->data == NULL) {
+               rec = hammer_alloc_record(hmp, &rec_offset,
+                                         record->rec.base.base.rec_type,
+                                         &cursor.record_buffer,
+                                         0, &bdata,
+                                         NULL, &error);
+               if (hammer_debug_general & 0x1000)
+                       kprintf("NULL RECORD DATA\n");
+       } else if (record->flags & HAMMER_RECF_INBAND) {
+               rec = hammer_alloc_record(hmp, &rec_offset,
+                                         record->rec.base.base.rec_type,
+                                         &cursor.record_buffer,
+                                         record->rec.base.data_len, &bdata,
+                                         NULL, &error);
+               if (hammer_debug_general & 0x1000)
+                       kprintf("INBAND RECORD DATA %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len);
+       } else {
+               rec = hammer_alloc_record(hmp, &rec_offset,
+                                         record->rec.base.base.rec_type,
+                                         &cursor.record_buffer,
+                                         record->rec.base.data_len, &bdata,
+                                         &cursor.data_buffer, &error);
+               if (hammer_debug_general & 0x1000)
+                       kprintf("OOB RECORD DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len);
+       }
 
        if (rec == NULL)
                goto done;
@@ -674,27 +674,24 @@ retry:
         * Fill in the remaining fields and insert our B-Tree node.
         */
        rec->base.base = record->rec.base.base;
-       if (record->rec_len > sizeof(rec->base)) {
-               bcopy(&record->rec.base + 1, &rec->base + 1,
-                     record->rec_len - sizeof(rec->base));
-       }
+       bcopy(&record->rec.base + 1, &rec->base + 1,
+             HAMMER_RECORD_SIZE - sizeof(record->rec.base));
 
        /*
         * Copy the data and deal with zero-fill support.
         */
        if (record->data) {
-               rec->base.head.hdr_crc = crc32(record->data, alloc_data_len);
-               KKASSERT(alloc_data_len == rec->base.data_len);
-               bcopy(record->data, bdata1, alloc_data_len);
+               rec->base.data_crc = crc32(record->data, rec->base.data_len);
+               bcopy(record->data, bdata, rec->base.data_len);
        } else {
                rec->base.data_len = record->rec.base.data_len;
        }
 
        elm.leaf.base = record->rec.base.base;
        elm.leaf.rec_offset = rec_offset;
-       elm.leaf.data_offset = data_offset;
+       elm.leaf.data_offset = rec->base.data_off;
        elm.leaf.data_len = rec->base.data_len;
-       elm.leaf.data_crc = rec->base.head.hdr_crc;
+       elm.leaf.data_crc = rec->base.data_crc;
 
        error = hammer_btree_insert(&cursor, &elm);
 
@@ -709,8 +706,7 @@ retry:
        /*
         * Try to unwind the fifo allocation
         */
-       rec->base.head.hdr_type |= HAMMER_HEAD_TYPEF_FREED;
-       hammer_unwind_fifo(hmp, rec_offset);
+       hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE);
 done:
        record->flags &= ~HAMMER_RECF_SYNCING;
        hammer_done_cursor(&cursor);
@@ -735,8 +731,9 @@ static
 int
 hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
 {
-       int bytes;
        void *data;
+       int bytes;
+       int reclen;
                
        /*
         * Make a private copy of record->data
@@ -747,11 +744,22 @@ hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record)
                 * union, otherwise allocate a copy.
                 */
                bytes = record->rec.base.data_len;
-               if (bytes <= (int)sizeof(record->rec) - record->rec_len) {
-                       bcopy(record->data,
-                             (char *)&record->rec + record->rec_len, bytes);
-                       record->data = (void *)((char *)&record->rec +
-                                                       record->rec_len);
+               switch(record->rec.base.base.rec_type) {
+               case HAMMER_RECTYPE_DIRENTRY:
+                       reclen = offsetof(struct hammer_entry_record, name[0]);
+                       break;
+               case HAMMER_RECTYPE_DATA:
+                       reclen = offsetof(struct hammer_data_record, data[0]);
+                       break;
+               default:
+                       reclen = sizeof(record->rec);
+                       break;
+               }
+               if (reclen + bytes <= HAMMER_RECORD_SIZE) {
+                       bcopy(record->data, (char *)&record->rec + reclen,
+                             bytes);
+                       record->data = (void *)((char *)&record->rec + reclen);
+                       record->flags |= HAMMER_RECF_INBAND;
                } else {
                        ++hammer_count_record_datas;
                        data = kmalloc(bytes, M_HAMMER, M_WAITOK);
@@ -1019,7 +1027,7 @@ hammer_ip_next(hammer_cursor_t cursor)
 }
 
 /*
- * Resolve the cursor->data1/2 pointer for the current cursor position in
+ * Resolve the cursor->data pointer for the current cursor position in
  * a merged iteration.
  */
 int
@@ -1028,9 +1036,7 @@ hammer_ip_resolve_data(hammer_cursor_t cursor)
        int error;
 
        if (cursor->iprec && cursor->record == &cursor->iprec->rec) {
-               cursor->data1 = cursor->iprec->data;
-               cursor->data2 = NULL;
-               cursor->data_split = cursor->iprec->rec.base.data_len;
+               cursor->data = cursor->iprec->data;
                error = 0;
        } else {
                error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA);
@@ -1038,6 +1044,21 @@ hammer_ip_resolve_data(hammer_cursor_t cursor)
        return(error);
 }
 
+int
+hammer_ip_resolve_record_and_data(hammer_cursor_t cursor)
+{
+       int error;
+
+       if (cursor->iprec && cursor->record == &cursor->iprec->rec) {
+               cursor->data = cursor->iprec->data;
+               error = 0;
+       } else {
+               error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
+                                                    HAMMER_CURSOR_GET_RECORD);
+       }
+       return(error);
+}
+
 /*
  * Delete all records within the specified range for inode ip.
  *
@@ -1260,7 +1281,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid)
         */
        error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
        elm = NULL;
-       hmp = cursor->node->volume->hmp;
+       hmp = cursor->node->hmp;
 
        dodelete = 0;
        if (error == 0) {
@@ -1320,7 +1341,14 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes)
                        cursor->flags |= HAMMER_CURSOR_DELBTREE;
                        cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
                }
-               hammer_free_fifo(cursor->node->volume->hmp, rec_offset);
+       }
+       if (error == 0) {
+               hammer_blockmap_free(cursor->node->hmp, rec_offset,
+                                    sizeof(union hammer_record_ondisk));
+       }
+       if (error == 0 &&
+           (data_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_LARGE_DATA) {
+               hammer_blockmap_free(cursor->node->hmp, data_offset, data_len);
        }
 #if 0
        kprintf("hammer_delete_at_cursor: %d:%d:%08x %08x/%d "
index 336739d..1022e34 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.28 2008/02/08 08:30:59 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.29 2008/02/10 09:51:01 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -49,6 +49,7 @@ static void hammer_free_volume(hammer_volume_t volume);
 static int hammer_load_volume(hammer_volume_t volume);
 static int hammer_load_buffer(hammer_buffer_t buffer, int isnew);
 static int hammer_load_node(hammer_node_t node);
+#if 0
 static hammer_off_t hammer_advance_fifo(hammer_volume_t volume,
                hammer_off_t off, int32_t bytes);
 
@@ -56,6 +57,7 @@ static hammer_off_t hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len,
                int32_t data_len, struct hammer_buffer **rec_bufferp,
                u_int16_t hdr_type, int can_cross, 
                struct hammer_buffer **data2_bufferp, int *errorp);
+#endif
 
 /*
  * Red-Black tree support for various structures
@@ -487,8 +489,8 @@ hammer_rel_volume(hammer_volume_t volume, int flush)
  *                             BUFFERS                                 *
  ************************************************************************
  *
- * Manage buffers.  Note that a buffer holds a reference to its associated
- * cluster, and its cluster will hold a reference to the cluster's volume.
+ * Manage buffers.  Currently all blockmap-backed zones are translated
+ * to zone-2 buffer offsets.
  */
 hammer_buffer_t
 hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset,
@@ -497,13 +499,21 @@ hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset,
        hammer_buffer_t buffer;
        hammer_volume_t volume;
        int vol_no;
+       int zone;
 
+       zone = HAMMER_ZONE_DECODE(buf_offset);
+       if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
+               buf_offset = hammer_blockmap_lookup(hmp, buf_offset, errorp);
+               KKASSERT(*errorp == 0);
+       }
        buf_offset &= ~HAMMER_BUFMASK64;
-       KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
+       KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) ==
+                HAMMER_ZONE_RAW_BUFFER);
        vol_no = HAMMER_VOL_DECODE(buf_offset);
        volume = hammer_get_volume(hmp, vol_no, errorp);
        if (volume == NULL)
                return(NULL);
+
        /*
         * NOTE: buf_offset and maxbuf_off are both full offset
         * specifications.
@@ -777,30 +787,21 @@ hammer_bnew(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp,
 hammer_node_t
 hammer_get_node(hammer_mount_t hmp, hammer_off_t node_offset, int *errorp)
 {
-       hammer_volume_t volume;
        hammer_node_t node;
-       int32_t vol_no;
 
-       KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) ==
-                HAMMER_ZONE_RAW_BUFFER);
-       vol_no = HAMMER_VOL_DECODE(node_offset);
-       volume = hammer_get_volume(hmp, vol_no, errorp);
-       if (volume == NULL)
-               return(NULL);
+       KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_BTREE);
 
        /*
         * Locate the structure, allocating one if necessary.
         */
 again:
-       node = RB_LOOKUP(hammer_nod_rb_tree, &volume->rb_nods_root,
-                        node_offset);
+       node = RB_LOOKUP(hammer_nod_rb_tree, &hmp->rb_nods_root, node_offset);
        if (node == NULL) {
                ++hammer_count_nodes;
                node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
                node->node_offset = node_offset;
-               node->volume = volume;  /* not directly referenced */
-               if (RB_INSERT(hammer_nod_rb_tree, &volume->rb_nods_root,
-                             node)) {
+               node->hmp = hmp;
+               if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) {
                        --hammer_count_nodes;
                        kfree(node, M_HAMMER);
                        goto again;
@@ -812,7 +813,6 @@ again:
                hammer_rel_node(node);
                node = NULL;
        }
-       hammer_rel_volume(volume, 0);
        return(node);
 }
 
@@ -854,7 +854,7 @@ hammer_load_node(hammer_node_t node)
                if ((buffer = node->buffer) != NULL) {
                        error = hammer_ref_buffer(buffer);
                } else {
-                       buffer = hammer_get_buffer(node->volume->hmp,
+                       buffer = hammer_get_buffer(node->hmp,
                                                   node->node_offset, 0,
                                                   &error);
                        if (buffer) {
@@ -943,8 +943,10 @@ hammer_rel_node(hammer_node_t node)
         * it as being free.  Note that the disk space is physically
         * freed when the fifo cycles back through the node.
         */
-       if (node->flags & HAMMER_NODE_DELETED)
-               hammer_free_fifo(node->volume->hmp, node->node_offset);
+       if (node->flags & HAMMER_NODE_DELETED) {
+               hammer_blockmap_free(node->hmp, node->node_offset,
+                                    sizeof(*node->ondisk));
+       }
 
        /*
         * Destroy the node.  Record pertainant data because the node
@@ -1031,8 +1033,7 @@ hammer_flush_node(hammer_node_t node)
        if (node->cache2)
                *node->cache2 = NULL;
        if (node->lock.refs == 0 && node->ondisk == NULL) {
-               RB_REMOVE(hammer_nod_rb_tree, &node->volume->rb_nods_root,
-                         node);
+               RB_REMOVE(hammer_nod_rb_tree, &node->hmp->rb_nods_root, node);
                if ((buffer = node->buffer) != NULL) {
                        node->buffer = NULL;
                        TAILQ_REMOVE(&buffer->clist, node, entry);
@@ -1076,12 +1077,14 @@ hammer_alloc_btree(hammer_mount_t hmp, int *errorp)
        hammer_node_t node = NULL;
        hammer_off_t node_offset;
 
-       node_offset = hammer_alloc_fifo(hmp, sizeof(struct hammer_node_ondisk),
-                                       0, &buffer, HAMMER_HEAD_TYPE_BTREE,
-                                       0, NULL,
-                                       errorp);
-       if (*errorp == 0)
+       node_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_BTREE_INDEX,
+                                           sizeof(struct hammer_node_ondisk),
+                                           errorp);
+       if (*errorp == 0) {
                node = hammer_get_node(hmp, node_offset, errorp);
+               hammer_modify_node(node);
+               bzero(node->ondisk, sizeof(*node->ondisk));
+       }
        if (buffer)
                hammer_rel_buffer(buffer, 0);
        return(node);
@@ -1091,60 +1094,102 @@ hammer_alloc_btree(hammer_mount_t hmp, int *errorp)
  * The returned buffers are already appropriately marked as being modified.
  * If the caller marks them again unnecessary undo records may be generated.
  *
- * The core record (rec_len) cannot cross a buffer boundary.  The record + data
- * is only allowed to cross a buffer boundary for HAMMER_RECTYPE_DATA
+ * In-band data is indicated by data_bufferp == NULL.  Pass a data_len of 0
+ * for zero-fill (caller modifies data_len afterwords).
  */
 void *
 hammer_alloc_record(hammer_mount_t hmp, 
-                        hammer_off_t *rec_offp, u_int8_t rec_type, 
-                        int32_t rec_len, struct hammer_buffer **rec_bufferp,
-                        hammer_off_t *data_offp, int32_t data_len, 
-                        void **data1p, void **data2p, int32_t *data2_index,
-                        struct hammer_buffer **data2_bufferp,
-                        int *errorp)
+                   hammer_off_t *rec_offp, u_int8_t rec_type, 
+                   struct hammer_buffer **rec_bufferp,
+                   int32_t data_len, void **datap,
+                   struct hammer_buffer **data_bufferp, int *errorp)
 {
-       int32_t aligned_rec_len, n;
-       hammer_off_t rec_offset;
        hammer_record_ondisk_t rec;
-       int can_cross;
+       hammer_off_t rec_offset;
+       hammer_off_t data_offset;
+       int32_t reclen;
 
-       aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) &
-                         ~HAMMER_HEAD_ALIGN_MASK;
-       can_cross = (rec_type == HAMMER_RECTYPE_DATA);
+       if (datap)
+               *datap = NULL;
 
-       rec_offset = hammer_alloc_fifo(hmp, aligned_rec_len, data_len,
-                                      rec_bufferp, HAMMER_HEAD_TYPE_RECORD,
-                                      can_cross, data2_bufferp, errorp);
+       /*
+        * Allocate the record
+        */
+       rec_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_RECORD_INDEX,
+                                          HAMMER_RECORD_SIZE, errorp);
        if (*errorp)
                return(NULL);
 
+       /*
+        * Allocate data
+        */
+       if (data_len) {
+               if (data_bufferp == NULL) {
+                       switch(rec_type) {
+                       case HAMMER_RECTYPE_DATA:
+                               reclen = offsetof(struct hammer_data_record,
+                                                 data[0]);
+                               break;
+                       case HAMMER_RECTYPE_DIRENTRY:
+                               reclen = offsetof(struct hammer_entry_record,
+                                                 name[0]);
+                               break;
+                       default:
+                               panic("hammer_alloc_record: illegal "
+                                     "in-band data");
+                               /* NOT REACHED */
+                               reclen = 0;
+                               break;
+                       }
+                       KKASSERT(reclen + data_len <= HAMMER_RECORD_SIZE);
+                       data_offset = rec_offset + reclen;
+               } else if (data_len < HAMMER_BUFSIZE) {
+                       data_offset = hammer_blockmap_alloc(hmp,
+                                               HAMMER_ZONE_SMALL_DATA_INDEX,
+                                               data_len, errorp);
+               } else {
+                       data_offset = hammer_blockmap_alloc(hmp,
+                                               HAMMER_ZONE_LARGE_DATA_INDEX,
+                                               data_len, errorp);
+               }
+       } else {
+               data_offset = 0;
+       }
+       if (*errorp) {
+               hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE);
+               return(NULL);
+       }
+
        /*
         * Basic return values.
         */
        *rec_offp = rec_offset;
-       if (data_offp)
-               *data_offp = rec_offset + aligned_rec_len;
-       rec = (void *)((char *)(*rec_bufferp)->ondisk +
-                      ((int32_t)rec_offset & HAMMER_BUFMASK));
-       if (data_len)
-               rec->base.data_off = rec_offset + aligned_rec_len;
+       rec = hammer_bread(hmp, rec_offset, errorp, rec_bufferp);
+       KKASSERT(*errorp == 0);
+       rec->base.data_off = data_offset;
        rec->base.data_len = data_len;
-       if (data1p)
-               *data1p = (void *)((char *)rec + aligned_rec_len);
-       if (data2_index) {
-               n = ((int32_t)rec_offset & HAMMER_BUFMASK) +
-                    aligned_rec_len + data_len;
-               if (n > HAMMER_BUFSIZE) {
-                       *data2_index = data_len - (n - HAMMER_BUFSIZE);
-                       KKASSERT(can_cross != 0);
-                       *data2p = (*data2_bufferp)->ondisk;
+       hammer_modify_buffer(*rec_bufferp, NULL, 0);
+
+       if (data_bufferp) {
+               if (data_len) {
+                       *datap = hammer_bread(hmp, data_offset, errorp,
+                                             data_bufferp);
+                       KKASSERT(*errorp == 0);
+                       hammer_modify_buffer(*data_bufferp, NULL, 0);
                } else {
-                       *data2_index = data_len;
-                       *data2p = NULL;
+                       *datap = NULL;
+               }
+       } else if (data_len) {
+               KKASSERT(data_offset + data_len - rec_offset <=
+                        HAMMER_RECORD_SIZE); 
+               if (datap) {
+                       *datap = (void *)((char *)rec +
+                                         (int32_t)(data_offset - rec_offset));
                }
        } else {
-               KKASSERT(data2p == NULL);
+               KKASSERT(datap == NULL);
        }
+       KKASSERT(*errorp == 0);
        return(rec);
 }
 
@@ -1156,6 +1201,8 @@ hammer_alloc_record(hammer_mount_t hmp,
 int
 hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len)
 {
+       return(0);
+#if 0
        hammer_off_t rec_offset;
        hammer_fifo_undo_t undo;
        hammer_buffer_t buffer = NULL;
@@ -1173,8 +1220,11 @@ hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len)
        if (buffer)
                hammer_rel_buffer(buffer, 0);
        return(error);
+#endif
 }
 
+#if 0
+
 /*
  * Allocate space from the FIFO.  The first rec_len bytes will be zero'd.
  * The entire space is marked modified (the caller should not remark it as
@@ -1191,6 +1241,7 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len,
        hammer_volume_t end_volume;
        hammer_volume_ondisk_t ondisk;
        hammer_fifo_head_t head;
+       hammer_fifo_tail_t tail;
        hammer_off_t end_off = 0;
        hammer_off_t tmp_off = 0;
        int32_t end_vol_no;
@@ -1199,12 +1250,14 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len,
        int32_t aligned_bytes;
        int must_pad;
 
-       aligned_bytes = (rec_len + data_len + HAMMER_HEAD_ALIGN_MASK) &
-                       ~HAMMER_HEAD_ALIGN_MASK;
+       aligned_bytes = (rec_len + data_len + HAMMER_TAIL_ONDISK_SIZE +
+                        HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
 
        root_volume = hammer_get_root_volume(hmp, errorp);
-       while (root_volume) {
+       if (root_volume)
                hammer_modify_volume(root_volume, NULL, 0);
+
+       while (root_volume) {
                ondisk = root_volume->ondisk;
 
                end_off = ondisk->vol0_fifo_end;
@@ -1278,6 +1331,10 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len,
                 * The entire record cannot cross a buffer boundary if
                 * can_cross is 0.
                 *
+                * The entire record cannot cover more then two whole buffers
+                * regardless.  Even if the data portion is 16K, this case
+                * can occur due to the addition of the fifo_tail.
+                *
                 * It is illegal for a record to cross a volume boundary.
                 *
                 * It is illegal for a record to cross a recovery boundary
@@ -1302,36 +1359,79 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len,
                            HAMMER_OFF_SHORT_REC_MASK) {
                                must_pad = 1;
                        }
+                       if (xoff + aligned_bytes - HAMMER_BUFSIZE >
+                           HAMMER_BUFSIZE) {
+                               KKASSERT(xoff != 0);
+                               must_pad = 1;
+                       }
                }
+
+               /*
+                * Pad to end of the buffer if necessary.  PADs can be
+                * squeezed into as little as 8 bytes (hence our alignment
+                * requirement).  The crc, reserved, and sequence number
+                * fields are not used, but initialize them anyway if there
+                * is enough room.
+                */
                if (must_pad) {
-                       must_pad = HAMMER_BUFSIZE - xoff;
+                       xoff = HAMMER_BUFSIZE - xoff;
                        head->hdr_signature = HAMMER_HEAD_SIGNATURE;
                        head->hdr_type = HAMMER_HEAD_TYPE_PAD;
-                       head->hdr_fwd_link = must_pad;
-                       head->hdr_seq = 0; /* XXX seq */
-                       KKASSERT((must_pad & 7) == 0);
+                       head->hdr_size = xoff;
+                       if (xoff >= HAMMER_HEAD_ONDISK_SIZE +
+                                   HAMMER_TAIL_ONDISK_SIZE) {
+                               head->hdr_crc = 0;
+                               head->hdr_reserved02 = 0;
+                               head->hdr_seq = 0;
+                       }
+
+                       tail = (void *)((char *)head + xoff -
+                                       HAMMER_TAIL_ONDISK_SIZE);
+                       if ((void *)head != (void *)tail) {
+                               tail->tail_signature = HAMMER_TAIL_SIGNATURE;
+                               tail->tail_type = HAMMER_HEAD_TYPE_PAD;
+                               tail->tail_size = xoff;
+                       }
+                       KKASSERT((xoff & HAMMER_HEAD_ALIGN_MASK) == 0);
                        ondisk->vol0_fifo_end =
                                hammer_advance_fifo((*rec_bufferp)->volume,
-                                                   end_off, must_pad);
-                       /* XXX rev_link */
+                                                   end_off, xoff);
                        continue;
                }
 
                if (xoff + aligned_bytes > HAMMER_BUFSIZE) {
-                       KKASSERT(xoff + aligned_bytes <= HAMMER_BUFSIZE * 2);
-                       hammer_bnew(hmp, end_off + (HAMMER_BUFSIZE - xoff),
-                                   errorp, data2_bufferp);
+                       xoff = xoff + aligned_bytes - HAMMER_BUFSIZE;
+
+                       KKASSERT(xoff <= HAMMER_BUFSIZE);
+                       tail = hammer_bnew(hmp, end_off + aligned_bytes -
+                                               HAMMER_TAIL_ONDISK_SIZE,
+                                          errorp, data2_bufferp);
                        hammer_modify_buffer(*data2_bufferp, NULL, 0);
                        if (*errorp)
                                goto done;
+
+                       /*
+                        * Retry if someone else appended to the fifo while
+                        * we were blocked.
+                        */
+                       if (ondisk->vol0_fifo_end != end_off)
+                               continue;
+               } else {
+                       tail = (void *)((char *)head + aligned_bytes -
+                                       HAMMER_TAIL_ONDISK_SIZE);
                }
 
+               bzero(head, rec_len);
                head->hdr_signature = HAMMER_HEAD_SIGNATURE;
                head->hdr_type = hdr_type;
-               head->hdr_fwd_link = aligned_bytes / 64;
-               head->hdr_rev_link = -1; /* XXX */
+               head->hdr_size = aligned_bytes;
                head->hdr_crc = 0;
-               head->hdr_seq = 0;      /* XXX */
+               head->hdr_seq = root_volume->ondisk->vol0_next_seq++;
+
+               tail->tail_signature = HAMMER_TAIL_SIGNATURE;
+               tail->tail_type = hdr_type;
+               tail->tail_size = aligned_bytes;
+
                ondisk->vol0_fifo_end =
                        hammer_advance_fifo((*rec_bufferp)->volume,
                                            end_off, aligned_bytes);
@@ -1358,7 +1458,7 @@ hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset)
        if (head) {
                hammer_modify_buffer(buffer, &head->hdr_type,
                                     sizeof(head->hdr_type));
-               head->hdr_type |= HAMMER_HEAD_TYPEF_FREED;
+               head->hdr_type |= HAMMER_HEAD_FLAG_FREE;
        }
        if (buffer)
                hammer_rel_buffer(buffer, 0);
@@ -1394,6 +1494,7 @@ hammer_advance_fifo(hammer_volume_t volume, hammer_off_t off, int32_t bytes)
        }
        return(off);
 }
+#endif
 
 /*
  * Sync dirty buffers to the media
@@ -1471,6 +1572,7 @@ hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
        return(0);
 }
 
+#if 0
 /*
  * Generic buffer initialization.  Initialize the A-list into an all-allocated
  * state with the free block limit properly set.
@@ -1483,9 +1585,10 @@ hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type)
 {
        head->hdr_signature = HAMMER_HEAD_SIGNATURE;
        head->hdr_type = type;
-       head->hdr_rev_link = 0;
-       head->hdr_fwd_link = 0;
+       head->hdr_size = 0;
        head->hdr_crc = 0;
        head->hdr_seq = 0;
 }
 
+#endif
+
diff --git a/sys/vfs/hammer/hammer_spike.c b/sys/vfs/hammer/hammer_spike.c
deleted file mode 100644 (file)
index 8b9a52d..0000000
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- * 
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.15 2008/02/08 08:31:00 dillon Exp $
- */
-
-#include "hammer.h"
-
-#if 0
-
-/*
- * Load spike info given a cursor.  The cursor must point to the leaf node
- * that needs to be spiked after a failed insertion.
- */
-void
-hammer_load_spike(hammer_cursor_t cursor, struct hammer_cursor **spikep)
-{
-       hammer_cursor_t spike;
-
-       KKASSERT(cursor->node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
-       KKASSERT(*spikep == NULL);
-       *spikep = spike = kmalloc(sizeof(*spike), M_HAMMER, M_WAITOK|M_ZERO);
-       ++hammer_count_spikes;
-
-       spike->parent = cursor->parent;
-       spike->parent_index = cursor->parent_index;
-       spike->node = cursor->node;
-       spike->index = cursor->index;
-       spike->left_bound = cursor->left_bound;
-       spike->right_bound = cursor->right_bound;
-       spike->key_beg = cursor->key_beg;
-
-       if (spike->parent) {
-               hammer_ref_node(spike->parent);
-               hammer_lock_sh(&spike->parent->lock);
-       }
-       hammer_ref_node(spike->node);
-       hammer_lock_sh(&spike->node->lock);
-       if (hammer_debug_general & 0x40)
-               kprintf("LOAD SPIKE %p\n", spike);
-}
-
-/*
- * Spike code - make room in a cluster by spiking in a new cluster.
- *
- * The spike structure contains a locked and reference B-Tree leaf node.
- * The spike at a minimum must move the contents of the leaf into a
- * new cluster and replace the leaf with two elements representing the
- * SPIKE_BEG and SPIKE_END.
- *
- * Various optimizations are desireable, including merging the spike node
- * with an adjacent node that has already been spiked, if its cluster is
- * not full, or promoting the spike node to the parent cluster of the current
- * cluster when it represents the right hand boundary leaf node in the
- * cluster (to avoid append chains).
- */
-int
-hammer_spike(struct hammer_cursor **spikep)
-{
-       hammer_cursor_t spike;
-       struct hammer_cursor ncursor;
-       hammer_cluster_t ocluster;
-       hammer_cluster_t ncluster;
-       hammer_node_ondisk_t ondisk;
-       hammer_btree_elm_t elm;
-       hammer_node_t onode;
-       hammer_record_ondisk_t rec;
-       hammer_node_locklist_t locklist = NULL;
-       int error;
-       int b, e;
-       const int esize = sizeof(*elm);
-
-       if (hammer_debug_general & 0x40)
-               kprintf("hammer_spike: ENOSPC in cluster, spiking\n");
-       /*Debugger("ENOSPC");*/
-
-       /*
-        * Validate and lock the spike.  If this fails due to a deadlock
-        * we still return 0 since a spike is only called when the
-        * caller intends to retry the operation.
-        */
-       spike = *spikep;
-       KKASSERT(spike != NULL);
-       KKASSERT(spike->parent &&
-                spike->parent->cluster == spike->node->cluster);
-       KKASSERT(spike->node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
-
-       error = hammer_cursor_upgrade(spike);
-       if (error) {
-               error = 0;
-               goto failed4;
-       }
-
-       /*
-        * Our leaf may contain spikes.  We have to lock the root node
-        * in each target cluster.
-        */
-       error = hammer_btree_lock_children(spike, &locklist);
-       if (error) {
-               error = 0;
-               goto failed4;
-       }
-
-       onode = spike->node;
-       ocluster = onode->cluster;
-       ondisk = onode->ondisk;
-       hammer_lock_ex(&ocluster->io.lock);
-
-       /*
-        * Calculate the range of elements in the leaf that we will push
-        * down into our spike.  For the moment push them all down.
-        */
-       b = 0;
-       e = ondisk->count;
-
-       /*
-        * Use left-bound for spike if b == 0, else use the base element
-        * for the item to the left and adjust it past one unit.
-        */
-       if (b == 0) {
-               spike->key_beg = *spike->left_bound;
-       } else {
-               spike->key_beg = ondisk->elms[b-1].leaf.base;
-               if (spike->key_beg.create_tid != 0) {
-                       ++spike->key_beg.create_tid;
-               } else if (spike->key_beg.key != HAMMER_MAX_KEY) {
-                       ++spike->key_beg.key;
-                       spike->key_beg.create_tid = 1;
-               } else if (spike->key_beg.rec_type != HAMMER_MAX_RECTYPE) {
-                       ++spike->key_beg.rec_type;
-                       spike->key_beg.key = HAMMER_MIN_KEY;
-                       spike->key_beg.create_tid = 1;
-               } else if (spike->key_beg.obj_id != HAMMER_MAX_OBJID) {
-                       ++spike->key_beg.obj_id;
-                       spike->key_beg.key = HAMMER_MIN_KEY;
-                       spike->key_beg.create_tid = 1;
-                       spike->key_beg.rec_type = HAMMER_MIN_RECTYPE;
-               } else {
-                       panic("hammer_spike: illegal key");
-               }
-               KKASSERT(hammer_btree_cmp(&ondisk->elms[b-1].base, &spike->key_beg) < 0);
-       }
-
-       /*
-        * Use the right-bound if e is terminal, otherwise use the element
-        * at [e].  key_end is exclusive for the call to hammer_init_cluster()
-        * and is then made inclusive later to construct the SPIKE_END
-        * element.
-        */
-       if (e == ondisk->count)
-               spike->key_end = *spike->right_bound;
-       else
-               spike->key_end = ondisk->elms[e].leaf.base;
-
-       /*
-        * Heuristic:  Attempt to size the spike range according to
-        * expected traffic.  This is primarily responsible for the
-        * initial layout of the filesystem.
-        */
-       if (e && b != e) {
-               int32_t clsize = ocluster->volume->ondisk->vol_clsize;
-               int64_t delta = 1000000000;
-               int64_t dkey;
-
-               elm = &ondisk->elms[e-1];
-               if (elm->base.obj_id == spike->key_end.obj_id &&
-                   elm->base.rec_type == spike->key_end.rec_type) {
-                       /* 
-                        * NOTE: dkey can overflow.
-                        */
-                       dkey = elm->base.key + clsize;
-                       if (dkey > elm->base.key && dkey < spike->key_end.key)
-                               spike->key_end.key = elm->base.key + clsize;
-               } else if (elm->base.obj_id + delta < spike->key_end.obj_id) {
-                       spike->key_end.obj_id = elm->base.obj_id + delta;
-               }
-       }
-
-       /*
-        * Allocate and lock a new cluster, initialize its bounds.
-        */
-       ncluster = hammer_alloc_cluster(ocluster->volume->hmp, ocluster,
-                                       &error);
-       if (ncluster == NULL)
-               goto failed3;
-       hammer_init_cluster(ncluster, &spike->key_beg, &spike->key_end);
-
-       /*
-        * Get a cursor for the new cluster.  Operations will be limited to
-        * this cluster.  Set HAMMER_CURSOR_RECOVER to force internal
-        * boundary elements in a way that allows us to copy spikes.
-        */
-       error = hammer_init_cursor_cluster(&ncursor, ncluster);
-       if (error)
-               goto failed1;
-       ncursor.flags |= HAMMER_CURSOR_INSERT | HAMMER_CURSOR_RECOVER;
-
-       /*
-        * Copy the elements in the leaf node to the new target cluster.
-        */
-       for (spike->index = b; spike->index < e; ++spike->index) {
-               elm = &onode->ondisk->elms[spike->index];
-
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END)
-                       continue;
-               error = hammer_btree_extract(spike,
-                                            HAMMER_CURSOR_GET_RECORD |
-                                            HAMMER_CURSOR_GET_DATA);
-               if (error == 0) {
-                       ncursor.key_beg = elm->leaf.base;
-                       error = hammer_write_record(&ncursor, spike->record,
-                                                   spike->data, spike->flags);
-               }
-
-               KKASSERT(error != EDEADLK);
-               if (error == ENOSPC) {
-                       kprintf("impossible ENOSPC error on spike\n");
-                       error = EIO;
-               }
-               if (error)
-                       goto failed1;
-       }
-
-       /*
-        * Delete the records and data associated with the old leaf node,
-        * replacing them with the spike elements.
-        *
-        * XXX I/O ordering issue, we're destroying these records too
-        * early, but we need one for the spike allocation.  What to do?
-        */
-       for (spike->index = b; spike->index < e; ++spike->index) {
-               int32_t roff;
-               u_int8_t rec_type;
-
-               elm = &onode->ondisk->elms[spike->index];
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG)
-                       continue;
-               KKASSERT(elm->leaf.rec_offset > 0);
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_RECORD)
-                       rec_type = elm->leaf.base.rec_type;
-               else
-                       rec_type = HAMMER_RECTYPE_CLUSTER;
-               hammer_free_record(ocluster, elm->leaf.rec_offset, rec_type);
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_RECORD &&
-                   elm->leaf.data_offset) {
-                       roff = elm->leaf.data_offset - elm->leaf.rec_offset;
-                       if (roff < 0 || roff >= HAMMER_RECORD_SIZE) {
-                               hammer_free_data(ocluster,
-                                                elm->leaf.data_offset,
-                                                elm->leaf.data_len);
-                       }
-               }
-       }
-
-       /*
-        * Add a record representing the spike using space freed up by the
-        * above deletions.
-        */
-       rec = hammer_alloc_record(ocluster, &error,
-                                 HAMMER_RECTYPE_CLUSTER,
-                                 &spike->record_buffer);
-       KKASSERT(error == 0);
-       rec->spike.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
-       rec->spike.base.base.rec_type = HAMMER_RECTYPE_CLUSTER;
-       rec->spike.base.rec_id = hammer_alloc_recid(ocluster);
-       rec->spike.clu_no = ncluster->clu_no;
-       rec->spike.vol_no = ncluster->volume->vol_no;
-       rec->spike.clu_id = 0;
-
-       /*
-        * Construct the spike elements.  Note that the right boundary
-        * is range-exclusive whereas the SPIKE_END must be range-inclusive.
-        */
-       hammer_modify_node(onode);
-       ondisk = onode->ondisk;
-       elm = &ondisk->elms[b];
-
-       if (e - b != 2)
-               bcopy(&elm[e - b], &elm[2], (ondisk->count - e) * esize);
-       ondisk->count = ondisk->count - (e - b) + 2;
-
-       elm[0].leaf.base = spike->key_beg;
-       elm[0].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_BEG;
-       elm[0].leaf.rec_offset = hammer_bclu_offset(spike->record_buffer, rec);
-       elm[0].leaf.spike_clu_no = ncluster->clu_no;
-       elm[0].leaf.spike_vol_no = ncluster->volume->vol_no;
-       elm[0].leaf.spike_unused01 = 0;
-
-       elm[1].leaf.base = spike->key_end;
-       elm[1].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_END;
-       elm[1].leaf.rec_offset = elm[0].leaf.rec_offset;
-       elm[1].leaf.spike_clu_no = ncluster->clu_no;
-       elm[1].leaf.spike_vol_no = ncluster->volume->vol_no;
-       elm[1].leaf.spike_unused01 = 0;
-
-       /*
-        * Make the SPIKE_END element inclusive.
-        */
-       if (elm[1].leaf.base.create_tid != 1) {
-               --elm[1].leaf.base.create_tid;
-       } else if (elm[0].leaf.base.key != HAMMER_MIN_KEY) {
-               --elm[0].leaf.base.key;
-               elm[0].leaf.base.create_tid = 0; /* max value */
-       } else if (elm[0].leaf.base.rec_type != HAMMER_MIN_RECTYPE) {
-               --elm[0].leaf.base.rec_type;
-               elm[0].leaf.base.key = HAMMER_MAX_KEY;
-               elm[0].leaf.base.create_tid = 0; /* max value */
-       } else if (elm[0].leaf.base.obj_id != HAMMER_MIN_OBJID) {
-               --elm[0].leaf.base.obj_id;
-               elm[0].leaf.base.rec_type = HAMMER_MAX_RECTYPE;
-               elm[0].leaf.base.key = HAMMER_MAX_KEY;
-               elm[0].leaf.base.create_tid = 0; /* max value */
-       } else {
-               panic("hammer_spike: illegal key");
-       }
-
-       /*
-        * Adjust ncluster
-        */
-       {
-               hammer_cluster_ondisk_t ondisk;
-
-               hammer_modify_cluster(ncluster);
-               ondisk = ncluster->ondisk;
-               ondisk->clu_btree_parent_vol_no = ocluster->volume->vol_no;
-               ondisk->clu_btree_parent_clu_no = ocluster->clu_no;
-               ondisk->clu_btree_parent_offset = onode->node_offset;
-               ondisk->clu_btree_parent_clu_gen = ocluster->ondisk->clu_gen;
-       }
-
-       /*
-        * XXX I/O dependancy - new cluster must be flushed before current
-        * cluster can be flushed.
-        */
-       /*Debugger("COPY COMPLETE");*/
-       hammer_done_cursor(&ncursor);
-       goto success;
-
-       /*
-        * Cleanup
-        */
-failed1:
-       hammer_done_cursor(&ncursor);
-       hammer_free_cluster(ncluster);
-success:
-       hammer_unlock(&ncluster->io.lock);
-       hammer_rel_cluster(ncluster, 0);
-failed3:
-       if (hammer_debug_general & 0x40)
-               kprintf("UNLOAD SPIKE %p %d\n", spike, error);
-       hammer_unlock(&ocluster->io.lock);
-failed4:
-       hammer_btree_unlock_children(&locklist);
-       hammer_done_cursor(spike);
-       --hammer_count_spikes;
-       kfree(spike, M_HAMMER);
-       *spikep = NULL;
-       return (error);
-}
-
-
-#endif
index 9ade65a..674a7bf 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.18 2008/02/08 08:31:00 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.19 2008/02/10 09:51:01 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -175,6 +175,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
                hmp->root_btree_end.delete_tid = 0;   /* special case */
                hmp->root_btree_end.rec_type = 0xFFFFU;
                hmp->root_btree_end.obj_type = 0;
+               lockinit(&hmp->blockmap_lock, "blkmap", 0, 0);
        }
        hmp->hflags = info.hflags;
        if (info.asof) {
@@ -200,6 +201,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
 
        RB_INIT(&hmp->rb_vols_root);
        RB_INIT(&hmp->rb_inos_root);
+       RB_INIT(&hmp->rb_nods_root);
        hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
 
        /*
@@ -349,6 +351,7 @@ hammer_free_hmp(struct mount *mp)
        mp->mnt_flag &= ~MNT_LOCAL;
        hmp->mp = NULL;
        kfree(hmp->zbuf, M_HAMMER);
+       lockuninit(&hmp->blockmap_lock);
        kfree(hmp, M_HAMMER);
 }
 
@@ -404,9 +407,6 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
        hammer_volume_ondisk_t ondisk;
        int error;
        int64_t bfree;
-       int32_t vol_no;
-       hammer_off_t fifo_beg;
-       hammer_off_t fifo_end;
 
        volume = hammer_get_root_volume(hmp, &error);
        if (error)
@@ -417,9 +417,11 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
         * Basic stats
         */
        mp->mnt_stat.f_files = ondisk->vol0_stat_inodes;
+       bfree = 0;
+       hammer_rel_volume(volume, 0);
+#if 0
        fifo_beg = ondisk->vol0_fifo_beg;
        fifo_end = ondisk->vol0_fifo_end;
-       hammer_rel_volume(volume, 0);
 
        /*
         * Calculate how many free blocks we have by counting the
@@ -442,6 +444,7 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
                fifo_end = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0);
                hammer_rel_volume(volume, 0);
        }
+#endif
        mp->mnt_stat.f_bfree = bfree / HAMMER_BUFSIZE;
        mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
        if (mp->mnt_stat.f_files < 0)
index 4d76beb..50365fe 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.29 2008/02/08 08:31:00 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.30 2008/02/10 09:51:01 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -666,7 +666,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                        break;
                rec = cursor.record;
                if (nlen == rec->entry.base.data_len &&
-                   bcmp(ncp->nc_name, cursor.data1, nlen) == 0) {
+                   bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
                        obj_id = rec->entry.obj_id;
                        break;
                }
@@ -1047,7 +1047,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
        error = hammer_ip_first(&cursor, ip);
 
        while (error == 0) {
-               error = hammer_ip_resolve_data(&cursor);
+               error = hammer_ip_resolve_record_and_data(&cursor);
                if (error)
                        break;
                rec = cursor.record;
@@ -1061,7 +1061,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
                             &error, uio, rec->entry.obj_id,
                             hammer_get_dtype(rec->entry.base.base.obj_type),
                             rec->entry.base.data_len,
-                            (void *)cursor.data1);
+                            (void *)cursor.data);
                if (r)
                        break;
                ++saveoff;
@@ -1128,7 +1128,7 @@ hammer_vop_readlink(struct vop_readlink_args *ap)
        if (error == 0) {
                error = hammer_ip_resolve_data(&cursor);
                if (error == 0) {
-                       error = uiomove((char *)cursor.data1,
+                       error = uiomove((char *)cursor.data,
                                        cursor.record->base.data_len,
                                        ap->a_uio);
                }
@@ -1237,7 +1237,7 @@ retry:
                        break;
                rec = cursor.record;
                if (fncp->nc_nlen == rec->entry.base.data_len &&
-                   bcmp(fncp->nc_name, cursor.data1, fncp->nc_nlen) == 0) {
+                   bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
                        break;
                }
                error = hammer_ip_next(&cursor);
@@ -1475,7 +1475,7 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
         * as pure data, not a string, and is no \0 terminated.
         */
        if (error == 0) {
-               record = hammer_alloc_mem_record(nip, sizeof(struct hammer_base_record));
+               record = hammer_alloc_mem_record(nip);
                bytes = strlen(ap->a_target);
 
                record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK;
@@ -1621,7 +1621,6 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
        int error;
        int boff;
        int roff;
-       int x;
        int n;
 
        bio = ap->a_bio;
@@ -1695,25 +1694,8 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
                KKASSERT(n > 0);
                if (n > bp->b_bufsize - boff)
                        n = bp->b_bufsize - boff;
-               if (roff + n > cursor.data_split) {
-                       if (roff < cursor.data_split) {
-                               x = cursor.data_split - roff;
-                               bcopy((char *)cursor.data1 + roff,
-                                     (char *)bp->b_data + boff,
-                                     x);
-                               bcopy((char *)cursor.data2,
-                                     (char *)bp->b_data + boff + x,
-                                     n - x);
-                       } else {
-                               bcopy((char *)cursor.data2 + roff -
-                                     cursor.data_split,
-                                     (char *)bp->b_data + boff,
-                                     n);
-                       }
-               } else {
-                       bcopy((char *)cursor.data1 + roff,
-                             (char *)bp->b_data + boff, n);
-               }
+               bcopy((char *)cursor.data + roff,
+                     (char *)bp->b_data + boff, n);
                boff += n;
                if (boff == bp->b_bufsize)
                        break;
@@ -1868,7 +1850,7 @@ retry:
                        break;
                rec = cursor.record;
                if (ncp->nc_nlen == rec->entry.base.data_len &&
-                   bcmp(ncp->nc_name, cursor.data1, ncp->nc_nlen) == 0) {
+                   bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
                        break;
                }
                error = hammer_ip_next(&cursor);