HAMMER 27/many: Major surgery - change allocation model
authorMatthew Dillon <dillon@dragonflybsd.org>
Fri, 8 Feb 2008 08:31:00 +0000 (08:31 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Fri, 8 Feb 2008 08:31:00 +0000 (08:31 +0000)
After getting stuck on the recovery code and highly unoptimal write
performance issues, remove the super-cluster/cluster and radix tree bitmap
infrastructure and replace it with a circular FIFO.

* Nothing is localized yet with this major surgery commit, which means
  radix nodes, hammer records, file data, and undo fifo elements are
  all being written to a single fifo.  These elements will soon get their
  own abstracted fifos (and in particular, the undo elements will get a
  fixed-sized circular fifo and be considered temporary data).

* No sequence numbers or transaction spaces are generated yet.

* Create a 'hammer_off_t' type (64 bits).  This type reserves 4 bits for
  a zone.  Zones which encode volume numbers reserve another 8 bits,
  giving us a 52 bit byte offset able to represent up to 4096 TB per
  volume.  Zones which do not encode volume numbers have 60 bits available
  for an abstracted offset, resulting in a maximum filesystem size of
  2^60 bytes (1 MTB).  Up to 15 zones can be encoded.

  As of this commit only 2 zones are implemented to wrap up existing
  functionality.

* Adjust the B-Tree to use full 64 bit hammer offsets.  Have one global B-Tree
  for the entire filesystem.  The tree is no longer per-cluster.

* Scrap the recovery and spike code.  Scrap the cluster and super-cluster
  code.  Scrap those portions of the B-Tree code that dealt with spikes.
  Scrap those portions of the IO subsystem that dealt with marking a
  cluster open or closed.

* Expand the hammer_modify_*() functions to include a data range and add
  UNDO record generation.  Do not implement buffer ordering dependancies
  yet (ordering issues are going change radically with the FIFO model).

31 files changed:
sbin/hammer/Makefile
sbin/hammer/buffer_alist.c [deleted file]
sbin/hammer/cache.c
sbin/hammer/cmd_show.c
sbin/hammer/hammer.c
sbin/hammer/hammer.h
sbin/hammer/hammer_util.h
sbin/hammer/ondisk.c
sbin/hammer/super_alist.c [deleted file]
sbin/newfs_hammer/Makefile
sbin/newfs_hammer/newfs_hammer.c
sys/conf/files
sys/vfs/hammer/Makefile
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_alist.c [deleted file]
sys/vfs/hammer/hammer_alist.h [deleted file]
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_recover.c
sys/vfs/hammer/hammer_spike.c
sys/vfs/hammer/hammer_transaction.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 506867b..39aa20b 100644 (file)
@@ -1,8 +1,8 @@
 #
-# $DragonFly: src/sbin/hammer/Makefile,v 1.4 2008/02/04 08:34:22 dillon Exp $
+# $DragonFly: src/sbin/hammer/Makefile,v 1.5 2008/02/08 08:30:56 dillon Exp $
 
 PROG=  hammer
-SRCS=  hammer.c buffer_alist.c ondisk.c cache.c super_alist.c \
+SRCS=  hammer.c ondisk.c cache.c \
        misc.c cmd_show.c cmd_prune.c cmd_history.c
 MAN=   hammer.8
 
@@ -10,7 +10,5 @@ CFLAGS+= -I${.CURDIR}/../../sys -DALIST_NO_DEBUG
 
 .PATH: ${.CURDIR}/../../sys/libkern
 SRCS+= crc32.c
-.PATH: ${.CURDIR}/../../sys/vfs/hammer
-SRCS+= hammer_alist.c
 
 .include <bsd.prog.mk>
diff --git a/sbin/hammer/buffer_alist.c b/sbin/hammer/buffer_alist.c
deleted file mode 100644 (file)
index 9fe610a..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- * 
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * $DragonFly: src/sbin/hammer/Attic/buffer_alist.c,v 1.3 2008/01/03 06:48:45 dillon Exp $
- */
-/*
- * Implement the super-cluster A-list recursion for the cluster allocator.
- *
- * Volume A-list -> supercluster A-list -> cluster
- */
-
-#include <sys/types.h>
-#include <assert.h>
-#include "hammer_util.h"
-
-/*
- * Buffers are already initialized by alloc_new_buffer() so our init/destroy
- * code shouldn't do anything.
- */
-static int
-buffer_alist_init(void *info __unused, int32_t blk __unused,
-                 int32_t radix __unused, hammer_alloc_state_t state __unused)
-{
-       return(0);
-}
-
-static int
-buffer_alist_destroy(void *info __unused, int32_t blk __unused,
-                    int32_t radix __unused)
-{
-       return(0);
-}
-
-static int
-buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix __unused,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       struct cluster_info *cluster = info;
-       struct buffer_info *buf;
-       int32_t buf_no;
-       int32_t r;
-
-       buf_no = blk / HAMMER_FSBUF_MAXBLKS;
-       buf = get_buffer(cluster, buf_no, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-
-       r = hammer_alist_alloc_fwd(&buf->alist, count, atblk - blk);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       *fullp = hammer_alist_isfull(&buf->alist);
-       rel_buffer(buf);
-       return(r);
-}
-
-static int
-buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix __unused,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       struct cluster_info *cluster = info;
-       struct buffer_info *buf;
-       int32_t buf_no;
-       int32_t r;
-
-       buf_no = blk / HAMMER_FSBUF_MAXBLKS;
-       buf = get_buffer(cluster, buf_no, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-
-       r = hammer_alist_alloc_rev(&buf->alist, count, atblk - blk);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       *fullp = hammer_alist_isfull(&buf->alist);
-       rel_buffer(buf);
-       return(r);
-}
-
-static void
-buffer_alist_free(void *info, int32_t blk, int32_t radix __unused,
-                int32_t base_blk, int32_t count, int32_t *emptyp)
-{
-       struct cluster_info *cluster = info;
-       struct buffer_info *buf;
-       int32_t buf_no;
-
-       buf_no = blk / HAMMER_FSBUF_MAXBLKS;
-       buf = get_buffer(cluster, buf_no, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-       hammer_alist_free(&buf->alist, base_blk, count);
-       *emptyp = hammer_alist_isempty(&buf->alist);
-       rel_buffer(buf);
-}
-
-static void
-buffer_alist_print(void *info __unused, int32_t blk __unused,
-                  int32_t radix __unused, int tab __unused)
-{
-}
-
-void
-hammer_buffer_alist_template(hammer_alist_config_t config)
-{
-        config->bl_radix_init = buffer_alist_init;
-        config->bl_radix_destroy = buffer_alist_destroy;
-        config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
-        config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
-        config->bl_radix_free = buffer_alist_free;
-        config->bl_radix_print = buffer_alist_print;
-}
-
index e2b5ac2..0346c9c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/cache.c,v 1.3 2008/01/21 00:03:31 dillon Exp $
+ * $DragonFly: src/sbin/hammer/cache.c,v 1.4 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -95,12 +95,6 @@ hammer_cache_flush(void)
                        case ISVOLUME:
                                rel_volume(cache->u.volume);
                                break;
-                       case ISSUPERCL:
-                               rel_supercl(cache->u.supercl);
-                               break;
-                       case ISCLUSTER:
-                               rel_cluster(cache->u.cluster);
-                               break;
                        case ISBUFFER:
                                rel_buffer(cache->u.buffer);
                                break;
index 4e78839..4c25f28 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/cmd_show.c,v 1.4 2008/01/25 05:53:41 dillon Exp $
+ * $DragonFly: src/sbin/hammer/cmd_show.c,v 1.5 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include "hammer.h"
@@ -40,8 +40,7 @@
 #define FLAG_TOOFARRIGHT       0x0002
 #define FLAG_BADTYPE           0x0004
 
-static void print_btree_node(struct cluster_info *cluster,
-                       int32_t node_offset, int depth, int spike,
+static void print_btree_node(hammer_off_t node_offset, int depth, int spike,
                        hammer_base_elm_t left_bound,
                        hammer_base_elm_t right_bound);
 static void print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
@@ -51,48 +50,27 @@ static int print_elm_flags(hammer_node_ondisk_t node, hammer_btree_elm_t elm,
                        hammer_base_elm_t right_bound);
 
 void
-hammer_cmd_show(int32_t vol_no, int32_t clu_no, int depth)
+hammer_cmd_show(hammer_off_t node_offset, int depth,
+               hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
 {
        struct volume_info *volume;
-       struct cluster_info *cluster;
-       int32_t node_offset;
 
-       if (vol_no == -1) {
-               if (RootVolNo < 0)
-                       errx(1, "hammer show: root volume number unknown");
-               vol_no = RootVolNo;
-       }
-       volume = get_volume(vol_no);
-       if (volume == NULL)
-               errx(1, "hammer show: Unable to locate volume %d", vol_no);
-       if (clu_no == -1)
-               clu_no = volume->ondisk->vol0_root_clu_no;
-       cluster = get_cluster(volume, clu_no, 0);
-       printf("show %d:%d root@%08x parent@%d:%d:%08x depth %d\n",
-              vol_no, clu_no,
-              cluster->ondisk->clu_btree_root,
-              cluster->ondisk->clu_btree_parent_vol_no,
-              cluster->ondisk->clu_btree_parent_clu_no,
-              cluster->ondisk->clu_btree_parent_offset,
-              depth);
-       if (VerboseOpt) {
-               printf("\trecords=%d\n",
-                      cluster->ondisk->stat_records);
+       if (node_offset == (hammer_off_t)-1) {
+               volume = get_volume(RootVolNo);
+               node_offset = volume->ondisk->vol0_btree_root;
+               if (VerboseOpt) {
+                       printf("\trecords=%lld\n",
+                              volume->ondisk->vol0_stat_records);
+               }
+               rel_volume(volume);
        }
-       node_offset = cluster->ondisk->clu_btree_root;
-       print_btree_node(cluster, node_offset, depth, 0,
-                        &cluster->ondisk->clu_btree_beg,
-                        &cluster->ondisk->clu_btree_end);
-       print_btree_node(cluster, node_offset, depth, 1,
-                        &cluster->ondisk->clu_btree_beg,
-                        &cluster->ondisk->clu_btree_end);
-       rel_cluster(cluster);
-       rel_volume(volume);
+       printf("show %016llx depth %d\n", node_offset, depth);
+       print_btree_node(node_offset, depth, 0, left_bound, right_bound);
+       print_btree_node(node_offset, depth, 1, left_bound, right_bound);
 }
 
 static void
-print_btree_node(struct cluster_info *cluster, int32_t node_offset, int depth,
-                int spike,
+print_btree_node(hammer_off_t node_offset, int depth, int spike,
                 hammer_base_elm_t left_bound, hammer_base_elm_t right_bound)
 {
        struct buffer_info *buffer = NULL;
@@ -101,16 +79,13 @@ print_btree_node(struct cluster_info *cluster, int32_t node_offset, int depth,
        int i;
        int flags;
 
-       node = get_node(cluster, node_offset, &buffer);
+       node = get_node(node_offset, &buffer);
 
        if (spike == 0) {
-               printf("    NODE %d:%d:%08x count=%d parent=%08x "
+               printf("    NODE %016llx count=%d parent=%016llx "
                       "type=%c depth=%d {\n",
-                      cluster->volume->vol_no,
-                      cluster->clu_no,
-                       node_offset, node->count, node->parent,
-                       (node->type ? node->type : '?'),
-                       depth);
+                      node_offset, node->count, node->parent,
+                      (node->type ? node->type : '?'), depth);
 
                for (i = 0; i < node->count; ++i) {
                        elm = &node->elms[i];
@@ -133,19 +108,12 @@ print_btree_node(struct cluster_info *cluster, int32_t node_offset, int depth,
                switch(node->type) {
                case HAMMER_BTREE_TYPE_INTERNAL:
                        if (elm->internal.subtree_offset) {
-                               print_btree_node(cluster,
-                                                elm->internal.subtree_offset,
+                               print_btree_node(elm->internal.subtree_offset,
                                                 depth + 1, spike,
                                                 &elm[0].base, &elm[1].base);
                        }
                        break;
-               case HAMMER_BTREE_TYPE_LEAF:
-                       if (RecurseOpt && spike &&
-                           elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END) {
-                               hammer_cmd_show(elm->leaf.spike_vol_no,
-                                               elm->leaf.spike_clu_no,
-                                               depth + 1);
-                       }
+               default:
                        break;
                }
        }
@@ -181,18 +149,12 @@ print_btree_elm(hammer_btree_elm_t elm, int i, u_int8_t type,
 
        switch(type) {
        case HAMMER_BTREE_TYPE_INTERNAL:
-               printf("suboff=%08x", elm->internal.subtree_offset);
+               printf("suboff=%016llx", elm->internal.subtree_offset);
                break;
        case HAMMER_BTREE_TYPE_LEAF:
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_RECORD:
-                       printf("recoff=%08x", elm->leaf.rec_offset);
-                       break;
-               case HAMMER_BTREE_TYPE_SPIKE_BEG:
-               case HAMMER_BTREE_TYPE_SPIKE_END:
-                       printf("spike %d:%d",
-                              elm->leaf.spike_vol_no,
-                              elm->leaf.spike_clu_no);
+                       printf("recoff=%016llx", elm->leaf.rec_offset);
                        break;
                }
                break;
@@ -214,12 +176,16 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_btree_elm_t elm,
        case HAMMER_BTREE_TYPE_INTERNAL:
                switch(btype) {
                case HAMMER_BTREE_TYPE_INTERNAL:
+                       if (left_bound == NULL || right_bound == NULL)
+                               break;
                        if (hammer_btree_cmp(&elm->base, left_bound) < 0)
                                flags |= FLAG_TOOFARLEFT;
                        if (hammer_btree_cmp(&elm->base, right_bound) > 0)
                                flags |= FLAG_TOOFARRIGHT;
                        break;
                case HAMMER_BTREE_TYPE_LEAF:
+                       if (left_bound == NULL || right_bound == NULL)
+                               break;
                        if (hammer_btree_cmp(&elm->base, left_bound) < 0)
                                flags |= FLAG_TOOFARLEFT;
                        if (hammer_btree_cmp(&elm->base, right_bound) >= 0)
@@ -232,14 +198,9 @@ print_elm_flags(hammer_node_ondisk_t node, hammer_btree_elm_t elm,
                break;
        case HAMMER_BTREE_TYPE_LEAF:
                switch(btype) {
-               case HAMMER_BTREE_TYPE_SPIKE_END:
-                       if (hammer_btree_cmp(&elm->base, left_bound) < 0)
-                               flags |= FLAG_TOOFARLEFT;
-                       if (hammer_btree_cmp(&elm->base, right_bound) > 0)
-                               flags |= FLAG_TOOFARRIGHT;
-                       break;
-               case HAMMER_BTREE_TYPE_SPIKE_BEG:
                case HAMMER_BTREE_TYPE_RECORD:
+                       if (left_bound == NULL || right_bound == NULL)
+                               break;
                        if (hammer_btree_cmp(&elm->base, left_bound) < 0)
                                flags |= FLAG_TOOFARLEFT;
                        if (hammer_btree_cmp(&elm->base, right_bound) >= 0)
index 53c1a56..c3dc2ec 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/hammer.c,v 1.7 2008/02/04 08:34:22 dillon Exp $
+ * $DragonFly: src/sbin/hammer/hammer.c,v 1.8 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include "hammer.h"
@@ -48,7 +48,7 @@ main(int ac, char **av)
 {
        u_int64_t tid;
        int ch;
-       int status;
+       u_int32_t status;
        char *blkdevs = NULL;
 
        while ((ch = getopt(ac, av, "hf:rv")) != -1) {
@@ -127,15 +127,13 @@ main(int ac, char **av)
                        "HAMMER filesystem type");
        }
 
-       init_alist_templates();
        if (strcmp(av[0], "show") == 0) {
-               int32_t vol_no = -1;
-               int32_t clu_no = -1;
+               hammer_off_t node_offset = (hammer_off_t)-1;
 
                hammer_parsedevs(blkdevs);
                if (ac > 1)
-                       sscanf(av[1], "%d:%d", &vol_no, &clu_no);
-               hammer_cmd_show(vol_no, clu_no, 0);
+                       sscanf(av[1], "%llx", &node_offset);
+               hammer_cmd_show(node_offset, 0, NULL, NULL);
                exit(0);
        }
        usage(1);
index 245ff96..a9b8a21 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/hammer.h,v 1.5 2008/02/04 08:34:22 dillon Exp $
+ * $DragonFly: src/sbin/hammer/hammer.h,v 1.6 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include <sys/types.h>
@@ -58,7 +58,8 @@
 extern int RecurseOpt;
 extern int VerboseOpt;
 
-void hammer_cmd_show(int32_t vol_no, int32_t clu_no, int depth);
+void hammer_cmd_show(hammer_tid_t node_offset, int depth,
+               hammer_base_elm_t left_bound, hammer_base_elm_t right_bound);
 void hammer_cmd_prune(char **av, int ac);
 void hammer_cmd_history(const char *offset_str, char **av, int ac);
 
index 651d4ed..e343f1d 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.7 2008/01/24 02:16:47 dillon Exp $
+ * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.8 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include <sys/types.h>
 #include <sys/tree.h>
 #include <sys/queue.h>
 
-#include <vfs/hammer/hammer_alist.h>
 #include <vfs/hammer/hammer_disk.h>
 #include <uuid.h>
 
@@ -46,8 +45,6 @@
  * Cache management - so the user code can keep its memory use under control
  */
 struct volume_info;
-struct supercl_info;
-struct cluster_info;
 struct buffer_info;
 
 TAILQ_HEAD(volume_list, volume_info);
@@ -56,11 +53,9 @@ struct cache_info {
        TAILQ_ENTRY(cache_info) entry;
        union {
                struct volume_info *volume;
-               struct supercl_info *supercl;
-               struct cluster_info *cluster;
                struct buffer_info *buffer;
        } u;
-       enum cache_type { ISVOLUME, ISSUPERCL, ISCLUSTER, ISBUFFER } type;
+       enum cache_type { ISVOLUME, ISBUFFER } type;
        int refs;       /* structural references */
        int modified;   /* ondisk modified flag */
        int delete;     /* delete flag - delete on last ref */
@@ -81,71 +76,25 @@ struct volume_info {
        int                     fd;
        off_t                   size;
        const char              *type;
-       int                     using_supercl;
 
-       struct hammer_alist_live clu_alist;     /* cluster allocator */
-       struct hammer_alist_live buf_alist;     /* buffer head only */
        struct hammer_volume_ondisk *ondisk;
 
-       TAILQ_HEAD(, cluster_info) cluster_list;
-       TAILQ_HEAD(, supercl_info) supercl_list;
-};
-
-struct supercl_info {
-       struct cache_info       cache;
-       TAILQ_ENTRY(supercl_info) entry;
-       int32_t                 scl_no;
-       int64_t                 scl_offset;
-
-       struct volume_info      *volume;
-
-       struct hammer_alist_live clu_alist;
-       struct hammer_alist_live buf_alist;
-       struct hammer_supercl_ondisk *ondisk;
-};
-
-struct cluster_info {
-       struct cache_info       cache;
-       TAILQ_ENTRY(cluster_info) entry;
-       int32_t                 clu_no;
-       int64_t                 clu_offset;
-
-       struct supercl_info     *supercl;
-       struct volume_info      *volume;
-
-       struct hammer_alist_live alist_master;
-       struct hammer_alist_live alist_btree;
-       struct hammer_alist_live alist_record;
-       struct hammer_alist_live alist_mdata;
-       struct hammer_cluster_ondisk *ondisk;
-
        TAILQ_HEAD(, buffer_info) buffer_list;
 };
 
 struct buffer_info {
        struct cache_info       cache;
        TAILQ_ENTRY(buffer_info) entry;
-       int32_t                 buf_no;
-       int64_t                 buf_offset;
-
-       struct cluster_info     *cluster;
+       hammer_off_t            buf_offset;     /* full hammer offset spec */
+       int64_t                 buf_disk_offset;/* relative to blkdev */
        struct volume_info      *volume;
-
-       struct hammer_alist_live alist;
-       hammer_fsbuf_ondisk_t   ondisk;
+       void                    *ondisk;
 };
 
-extern struct hammer_alist_config Buf_alist_config;
-extern struct hammer_alist_config Vol_normal_alist_config;
-extern struct hammer_alist_config Vol_super_alist_config;
-extern struct hammer_alist_config Supercl_alist_config;
-extern struct hammer_alist_config Clu_master_alist_config;
-extern struct hammer_alist_config Clu_slave_alist_config;
 extern uuid_t Hammer_FSType;
 extern uuid_t Hammer_FSId;
 extern int64_t BootAreaSize;
 extern int64_t MemAreaSize;
-extern int UsingSuperClusters;
 extern int NumVolumes;
 extern int RootVolNo;
 extern struct volume_list VolList;
@@ -155,38 +104,23 @@ uint32_t crc32(const void *buf, size_t size);
 struct volume_info *setup_volume(int32_t vol_no, const char *filename,
                                int isnew, int oflags);
 struct volume_info *get_volume(int32_t vol_no);
-struct supercl_info *get_supercl(struct volume_info *vol, int32_t scl_no,
-                               hammer_alloc_state_t isnew);
-struct cluster_info *get_cluster(struct volume_info *vol, int32_t clu_no,
-                               hammer_alloc_state_t isnew);
-struct buffer_info *get_buffer(struct cluster_info *cl, int32_t buf_no,
-                               int64_t buf_type);
-hammer_node_ondisk_t get_node(struct cluster_info *cl, int32_t offset,
+struct buffer_info *get_buffer(hammer_off_t buf_offset, int isnew);
+hammer_node_ondisk_t get_node(hammer_off_t node_offset,
                                struct buffer_info **bufp);
 
-void init_alist_templates(void);
 void rel_volume(struct volume_info *volume);
-void rel_supercl(struct supercl_info *supercl);
-void rel_cluster(struct cluster_info *cluster);
 void rel_buffer(struct buffer_info *buffer);
 
-void *alloc_btree_element(struct cluster_info *cluster, int32_t *offp);
-void *alloc_data_element(struct cluster_info *cluster,
-                               int32_t bytes, int32_t *offp);
-void *alloc_record_element(struct cluster_info *cluster, int32_t *offp,
-                               u_int8_t rec_type);
-
+void *alloc_btree_element(hammer_off_t *offp);
+hammer_record_ondisk_t alloc_record_element(hammer_off_t *offp,
+                               u_int8_t rec_type, int32_t rec_len,
+                               int32_t data_len, void **datap);
 int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
 
 void flush_all_volumes(void);
 void flush_volume(struct volume_info *vol);
-void flush_supercl(struct supercl_info *supercl);
-void flush_cluster(struct cluster_info *cl);
 void flush_buffer(struct buffer_info *buf);
 
-void hammer_super_alist_template(struct hammer_alist_config *conf); 
-void hammer_buffer_alist_template(struct hammer_alist_config *conf); 
-
 void hammer_cache_add(struct cache_info *cache, enum cache_type type);
 void hammer_cache_del(struct cache_info *cache);
 void hammer_cache_flush(void);
index c287db1..a009b41 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/hammer/ondisk.c,v 1.9 2008/01/24 02:16:47 dillon Exp $
+ * $DragonFly: src/sbin/hammer/ondisk.c,v 1.10 2008/02/08 08:30:56 dillon Exp $
  */
 
 #include <sys/types.h>
 #include <fcntl.h>
 #include "hammer_util.h"
 
-static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
-                       u_int64_t type);
-static void alloc_new_buffer(struct cluster_info *cluster, hammer_alist_t live,
-                       u_int64_t type, int32_t nelements);
+static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
+static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
+                       struct buffer_info **bufp, u_int16_t hdr_type);
 #if 0
 static void readhammerbuf(struct volume_info *vol, void *data,
                        int64_t offset);
@@ -57,12 +56,6 @@ static void writehammerbuf(struct volume_info *vol, const void *data,
                        int64_t offset);
 
 
-struct hammer_alist_config Buf_alist_config;
-struct hammer_alist_config Vol_normal_alist_config;
-struct hammer_alist_config Vol_super_alist_config;
-struct hammer_alist_config Supercl_alist_config;
-struct hammer_alist_config Clu_master_alist_config;
-struct hammer_alist_config Clu_slave_alist_config;
 uuid_t Hammer_FSType;
 uuid_t Hammer_FSId;
 int64_t BootAreaSize;
@@ -72,37 +65,10 @@ int     NumVolumes;
 int    RootVolNo = -1;
 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
 
-void
-init_alist_templates(void)
-{
-       /*
-        * Initialize the alist templates we will be using
-        */
-       hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
-                             1, HAMMER_FSBUF_METAELMS, 0);
-       hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
-                             1, HAMMER_VOL_METAELMS_1LYR, 0);
-       hammer_alist_template(&Vol_super_alist_config,
-                         HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
-                             HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR,
-                             0);
-       hammer_super_alist_template(&Vol_super_alist_config);
-       hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
-                             1, HAMMER_SUPERCL_METAELMS, 0);
-       hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
-                             1, HAMMER_CLU_MASTER_METAELMS, 0);
-       hammer_alist_template(&Clu_slave_alist_config,
-                             HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
-                             HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS,
-                             1);
-       hammer_buffer_alist_template(&Clu_slave_alist_config);
-}
-
 /*
  * Lookup the requested information structure and related on-disk buffer.
  * Missing structures are created.
  */
-
 struct volume_info *
 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
 {
@@ -116,8 +82,7 @@ setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
         */
        vol = malloc(sizeof(*vol));
        bzero(vol, sizeof(*vol));
-       TAILQ_INIT(&vol->cluster_list);
-       TAILQ_INIT(&vol->supercl_list);
+       TAILQ_INIT(&vol->buffer_list);
        vol->name = strdup(filename);
        vol->fd = open(filename, oflags);
        if (vol->fd < 0) {
@@ -132,15 +97,12 @@ setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
        vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
        if (isnew) {
                bzero(ondisk, HAMMER_BUFSIZE);
-               vol->using_supercl = UsingSuperClusters;
        } else {
                n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
                if (n != HAMMER_BUFSIZE) {
                        err(1, "setup_volume: %s: Read failed at offset 0",
                            filename);
                }
-               if (ondisk->vol_flags & HAMMER_VOLF_USINGSUPERCL)
-                       vol->using_supercl = 1;
                vol_no = ondisk->vol_no;
                if (RootVolNo < 0) {
                        RootVolNo = ondisk->vol_rootvol;
@@ -162,20 +124,9 @@ setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
                }
        }
        vol->vol_no = vol_no;
-       if (vol->using_supercl) {
-               vol->clu_alist.config = &Vol_super_alist_config;
-               vol->clu_alist.meta = ondisk->vol_almeta.super;
-               vol->clu_alist.info = vol;
-       } else {
-               vol->clu_alist.config = &Vol_normal_alist_config;
-               vol->clu_alist.meta = ondisk->vol_almeta.normal;
-       }
-       vol->buf_alist.config = &Buf_alist_config;
-       vol->buf_alist.meta = ondisk->head.buf_almeta;
 
        if (isnew) {
-               hammer_alist_init(&vol->clu_alist, 0, 0, HAMMER_ASTATE_ALLOC);
-               initbuffer(&vol->buf_alist, &ondisk->head, HAMMER_FSBUF_VOLUME);
+               init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);
                vol->cache.modified = 1;
         }
 
@@ -215,252 +166,37 @@ rel_volume(struct volume_info *volume)
        --volume->cache.refs;
 }
 
-struct supercl_info *
-get_supercl(struct volume_info *vol, int32_t scl_no, hammer_alloc_state_t isnew)
-{
-       struct hammer_supercl_ondisk *ondisk;
-       struct supercl_info *scl;
-       int32_t scl_group;
-       int64_t scl_group_size;
-       int64_t clusterSize = vol->ondisk->vol_clsize;
-       int n;
-
-       assert(vol->using_supercl);
-
-       TAILQ_FOREACH(scl, &vol->supercl_list, entry) {
-               if (scl->scl_no == scl_no)
-                       break;
-       }
-       if (scl == NULL) {
-               /*
-                * Allocate the scl
-                */
-               scl = malloc(sizeof(*scl));
-               bzero(scl, sizeof(*scl));
-               scl->scl_no = scl_no;
-               scl->volume = vol;
-               TAILQ_INSERT_TAIL(&vol->supercl_list, scl, entry);
-               ++vol->cache.refs;
-               scl->cache.u.supercl = scl;
-               hammer_cache_add(&scl->cache, ISSUPERCL);
-
-               /*
-                * Calculate the super-cluster's offset in the volume.
-                *
-                * The arrangement is [scl * N][N * 32768 clusters], repeat.
-                * N is typically 16.
-                */
-               scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
-               scl_group_size = ((int64_t)HAMMER_BUFSIZE *
-                                 HAMMER_VOL_SUPERCLUSTER_GROUP) +
-                                 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
-                                 clusterSize * HAMMER_SCL_MAXCLUSTERS);
-               scl->scl_offset = vol->ondisk->vol_clo_beg +
-                                 scl_group * scl_group_size +
-                                 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) *
-                                 HAMMER_BUFSIZE;
-       }
-       ++scl->cache.refs;
-       hammer_cache_flush();
-       if ((ondisk = scl->ondisk) == NULL) {
-               scl->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
-               scl->clu_alist.config = &Supercl_alist_config;
-               scl->clu_alist.meta = ondisk->scl_meta;
-               scl->buf_alist.config = &Buf_alist_config;
-               scl->buf_alist.meta = ondisk->head.buf_almeta;
-               if (isnew == 0) {
-                       n = pread(vol->fd, ondisk, HAMMER_BUFSIZE,
-                                 scl->scl_offset);
-                       if (n != HAMMER_BUFSIZE) {
-                               err(1, "get_supercl: %s:%d Read failed "
-                                   "at offset %lld",
-                                   vol->name, scl_no, scl->scl_offset);
-                       }
-               }
-       }
-       if (isnew) {
-               bzero(ondisk, HAMMER_BUFSIZE);
-               hammer_alist_init(&scl->clu_alist, 0, 0, isnew);
-                initbuffer(&scl->buf_alist, &ondisk->head,
-                          HAMMER_FSBUF_SUPERCL);
-               scl->cache.modified = 1;
-       }
-       return(scl);
-}
-
-void
-rel_supercl(struct supercl_info *supercl)
-{
-       struct volume_info *volume;
-
-       assert(supercl->cache.refs > 0);
-       if (--supercl->cache.refs == 0) {
-               if (supercl->cache.delete) {
-                       volume = supercl->volume;
-                       if (supercl->cache.modified)
-                               flush_supercl(supercl);
-                       TAILQ_REMOVE(&volume->supercl_list, supercl, entry);
-                       hammer_cache_del(&supercl->cache);
-                       free(supercl->ondisk);
-                       free(supercl);
-                       rel_volume(volume);
-               }
-       }
-}
-
-struct cluster_info *
-get_cluster(struct volume_info *vol, int32_t clu_no, hammer_alloc_state_t isnew)
-{
-       struct hammer_cluster_ondisk *ondisk;
-       struct cluster_info *cl;
-       int32_t scl_group;
-       int64_t scl_group_size;
-       int64_t clusterSize = vol->ondisk->vol_clsize;
-       int n;
-
-       TAILQ_FOREACH(cl, &vol->cluster_list, entry) {
-               if (cl->clu_no == clu_no)
-                       break;
-       }
-       if (cl == NULL) {
-               /*
-                * Allocate the cluster
-                */
-               cl = malloc(sizeof(*cl));
-               bzero(cl, sizeof(*cl));
-               TAILQ_INIT(&cl->buffer_list);
-               cl->clu_no = clu_no;
-               cl->volume = vol;
-               TAILQ_INSERT_TAIL(&vol->cluster_list, cl, entry);
-               ++vol->cache.refs;
-               cl->cache.u.cluster = cl;
-               hammer_cache_add(&cl->cache, ISCLUSTER);
-               if (vol->using_supercl) {
-                       cl->supercl = get_supercl(vol, clu_no / HAMMER_SCL_MAXCLUSTERS, 0);
-                       ++cl->supercl->cache.refs;
-               }
-
-               /*
-                * Calculate the cluster's offset in the volume
-                *
-                * The arrangement is [scl * N][N * 32768 clusters], repeat.
-                * N is typically 16.
-                *
-                * Note that the cluster offset calculation is slightly
-                * different from the supercluster offset calculation due
-                * to the way the grouping works.
-                */
-               if (vol->using_supercl) {
-                       scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
-                                   HAMMER_SCL_MAXCLUSTERS;
-                       scl_group_size = 
-                               ((int64_t)HAMMER_BUFSIZE *
-                               HAMMER_VOL_SUPERCLUSTER_GROUP) +
-                               ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
-                               clusterSize * HAMMER_SCL_MAXCLUSTERS);
-                       scl_group_size += HAMMER_VOL_SUPERCLUSTER_GROUP *
-                                         HAMMER_BUFSIZE;
-                       cl->clu_offset =
-                               vol->ondisk->vol_clo_beg +
-                               scl_group * scl_group_size +
-                               (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
-                                ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS * HAMMER_VOL_SUPERCLUSTER_GROUP)) *
-                                clusterSize;
-               } else {
-                       cl->clu_offset = vol->ondisk->vol_clo_beg +
-                                        (int64_t)clu_no * clusterSize;
-               }
-       }
-       ++cl->cache.refs;
-       hammer_cache_flush();
-       if ((ondisk = cl->ondisk) == NULL) {
-               cl->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
-               cl->alist_master.config = &Clu_master_alist_config;
-               cl->alist_master.meta = ondisk->clu_master_meta;
-               cl->alist_btree.config = &Clu_slave_alist_config;
-               cl->alist_btree.meta = ondisk->clu_btree_meta;
-               cl->alist_btree.info = cl;
-               cl->alist_record.config = &Clu_slave_alist_config;
-               cl->alist_record.meta = ondisk->clu_record_meta;
-               cl->alist_record.info = cl;
-               cl->alist_mdata.config = &Clu_slave_alist_config;
-               cl->alist_mdata.meta = ondisk->clu_mdata_meta;
-               cl->alist_mdata.info = cl;
-               if (isnew == 0) {
-                       n = pread(vol->fd, ondisk, HAMMER_BUFSIZE,
-                                 cl->clu_offset);
-                       if (n != HAMMER_BUFSIZE) {
-                               err(1, "get_cluster: %s:%d Read failed "
-                                   "at offset %lld",
-                                   vol->name, clu_no, cl->clu_offset);
-                       }
-               }
-       }
-       if (isnew) {
-               bzero(ondisk, HAMMER_BUFSIZE);
-               hammer_alist_init(&cl->alist_master, 0, 0, isnew);
-               hammer_alist_init(&cl->alist_btree, 0, 0, HAMMER_ASTATE_ALLOC);
-               hammer_alist_init(&cl->alist_record, 0, 0, HAMMER_ASTATE_ALLOC);
-               hammer_alist_init(&cl->alist_mdata, 0, 0, HAMMER_ASTATE_ALLOC);
-               cl->cache.modified = 1;
-       }
-       return(cl);
-}
-
-void
-rel_cluster(struct cluster_info *cluster)
-{
-       struct volume_info *volume;
-       struct supercl_info *supercl;
-
-       assert(cluster->cache.refs > 0);
-       if (--cluster->cache.refs == 0) {
-               if (cluster->cache.delete) {
-                       volume = cluster->volume;
-                       supercl = cluster->supercl;
-                       if (cluster->cache.modified)
-                               flush_cluster(cluster);
-                       TAILQ_REMOVE(&volume->cluster_list, cluster, entry);
-                       hammer_cache_del(&cluster->cache);
-                       free(cluster->ondisk);
-                       free(cluster);
-                       rel_volume(volume);
-                       if (supercl)
-                               rel_supercl(supercl);
-               }
-       }
-}
-
 /*
  * Acquire the specified buffer.
- * 
- * We are formatting a new buffer is buf_type != 0
  */
 struct buffer_info *
-get_buffer(struct cluster_info *cl, int32_t buf_no, int64_t buf_type)
+get_buffer(hammer_off_t buf_offset, int isnew)
 {
-       hammer_fsbuf_ondisk_t ondisk;
+       void *ondisk;
        struct buffer_info *buf;
+       struct volume_info *volume;
        int n;
+       int vol_no;
 
-       /*
-        * Find the buffer.  Note that buffer 0 corresponds to the cluster
-        * header and should never be requested.
-        */
-       assert(buf_no != 0);
-       TAILQ_FOREACH(buf, &cl->buffer_list, entry) {
-               if (buf->buf_no == buf_no)
+       assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
+
+       vol_no = HAMMER_VOL_DECODE(buf_offset);
+       volume = get_volume(vol_no);
+       buf_offset &= ~HAMMER_BUFMASK64;
+
+       TAILQ_FOREACH(buf, &volume->buffer_list, entry) {
+               if (buf->buf_offset == buf_offset)
                        break;
        }
        if (buf == NULL) {
                buf = malloc(sizeof(*buf));
                bzero(buf, sizeof(*buf));
-               buf->buf_no = buf_no;
-               buf->buf_offset = cl->clu_offset + buf_no * HAMMER_BUFSIZE;
-               buf->cluster = cl;
-               buf->volume = cl->volume;
-               TAILQ_INSERT_TAIL(&cl->buffer_list, buf, entry);
-               ++cl->cache.refs;
+               buf->buf_offset = buf_offset;
+               buf->buf_disk_offset = volume->ondisk->vol_buf_beg +
+                                       (buf_offset & HAMMER_OFF_SHORT_MASK);
+               buf->volume = volume;
+               TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry);
+               ++volume->cache.refs;
                buf->cache.u.buffer = buf;
                hammer_cache_add(&buf->cache, ISBUFFER);
        }
@@ -468,22 +204,19 @@ get_buffer(struct cluster_info *cl, int32_t buf_no, int64_t buf_type)
        hammer_cache_flush();
        if ((ondisk = buf->ondisk) == NULL) {
                buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
-               buf->alist.config = &Buf_alist_config;
-               buf->alist.meta = ondisk->head.buf_almeta;
-               if (buf_type == 0) {
-                       n = pread(cl->volume->fd, ondisk, HAMMER_BUFSIZE,
-                                 buf->buf_offset);
+               if (isnew == 0) {
+                       n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
+                                 buf->buf_disk_offset);
                        if (n != HAMMER_BUFSIZE) {
-                               err(1, "get_buffer: %s:%d:%d Read failed at "
+                               err(1, "get_buffer: %s:%016llx Read failed at "
                                       "offset %lld",
-                                   cl->volume->name, buf->cluster->clu_no,
-                                   buf_no, buf->buf_offset);
+                                   volume->name, buf->buf_offset,
+                                   buf->buf_disk_offset);
                        }
                }
        }
-       if (buf_type) {
+       if (isnew) {
                bzero(ondisk, HAMMER_BUFSIZE);
-               initbuffer(&buf->alist, &ondisk->head, buf_type);
                buf->cache.modified = 1;
        }
        return(buf);
@@ -492,19 +225,19 @@ get_buffer(struct cluster_info *cl, int32_t buf_no, int64_t buf_type)
 void
 rel_buffer(struct buffer_info *buffer)
 {
-       struct cluster_info *cluster;
+       struct volume_info *volume;
 
        assert(buffer->cache.refs > 0);
        if (--buffer->cache.refs == 0) {
                if (buffer->cache.delete) {
-                       cluster = buffer->cluster;
+                       volume = buffer->volume;
                        if (buffer->cache.modified)
                                flush_buffer(buffer);
-                       TAILQ_REMOVE(&cluster->buffer_list, buffer, entry);
+                       TAILQ_REMOVE(&volume->buffer_list, buffer, entry);
                        hammer_cache_del(&buffer->cache);
                        free(buffer->ondisk);
                        free(buffer);
-                       rel_cluster(cluster);
+                       rel_volume(volume);
                }
        }
 }
@@ -514,140 +247,123 @@ rel_buffer(struct buffer_info *buffer)
  * bufp is freed if non-NULL and a referenced buffer is loaded into it.
  */
 hammer_node_ondisk_t
-get_node(struct cluster_info *cl, int32_t offset, struct buffer_info **bufp)
+get_node(hammer_off_t node_offset, struct buffer_info **bufp)
 {
        struct buffer_info *buf;
 
        if (*bufp)
                rel_buffer(*bufp);
-       *bufp = buf = get_buffer(cl, offset / HAMMER_BUFSIZE, 0);
-       if (buf->ondisk->head.buf_type != HAMMER_FSBUF_BTREE) {
-               errx(1, "get_node %d:%d:%d - not a B-Tree node buffer!",
-                    cl->volume->vol_no, cl->clu_no, offset);
-       }
-       return((void *)((char *)buf->ondisk + (offset & HAMMER_BUFMASK)));
+       *bufp = buf = get_buffer(node_offset, 0);
+       return((void *)((char *)buf->ondisk +
+                       (int32_t)(node_offset & HAMMER_BUFMASK)));
 }
 
 /*
  * Allocate HAMMER elements - btree nodes, data storage, and record elements
+ *
+ * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
+ * item and zero's out the remainder, so don't bzero() it.
  */
 void *
-alloc_btree_element(struct cluster_info *cluster, int32_t *offp)
+alloc_btree_element(hammer_off_t *offp)
 {
        struct buffer_info *buf;
-       hammer_alist_t live;
-       int32_t elm_no;
        void *item;
 
-       live = &cluster->alist_btree;
-       elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE)
-               elm_no = hammer_alist_alloc_fwd(live, 1, 0);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
-               alloc_new_buffer(cluster, live,
-                                HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES);
-               ++cluster->ondisk->stat_idx_bufs;
-               ++cluster->volume->ondisk->vol_stat_idx_bufs;
-               ++cluster->volume->ondisk->vol0_stat_idx_bufs;
-               elm_no = hammer_alist_alloc(live, 1);
-               assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
-       }
-       cluster->ondisk->idx_index = elm_no;
-       buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-       item = &buf->ondisk->btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK];
-       *offp = buf->buf_no * HAMMER_BUFSIZE +
-               ((char *)item - (char *)buf->ondisk);
+       *offp = hammer_alloc_fifo(sizeof(struct hammer_node_ondisk), 0,
+                                 &buf, HAMMER_HEAD_TYPE_BTREE);
+       item = (char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK);
+       /* XXX buf not released, ptr remains valid */
        return(item);
 }
 
-void *
-alloc_data_element(struct cluster_info *cluster, int32_t bytes, int32_t *offp)
+hammer_record_ondisk_t
+alloc_record_element(hammer_off_t *offp, u_int8_t rec_type,
+                    int32_t rec_len, int32_t data_len, void **datap)
 {
        struct buffer_info *buf;
-       hammer_alist_t live;
-       int32_t elm_no;
-       int32_t nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
-       void *item;
-
-       /*
-        * Try to allocate a btree-node.  If elm_no is HAMMER_ALIST_BLOCK_NONE
-        * and buf is non-NULL we have to initialize a new buffer's a-list.
-        */
-       live = &cluster->alist_mdata;
-       elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE)
-               elm_no = hammer_alist_alloc_fwd(live, 1, 0);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
-               alloc_new_buffer(cluster, live,
-                                HAMMER_FSBUF_DATA, HAMMER_DATA_NODES);
-               ++cluster->ondisk->stat_data_bufs;
-               ++cluster->volume->ondisk->vol_stat_data_bufs;
-               ++cluster->volume->ondisk->vol0_stat_data_bufs;
-               elm_no = hammer_alist_alloc(live, nblks);
-               assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
+       hammer_record_ondisk_t rec;
+       int32_t aligned_rec_len;
+
+       aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) &
+                         ~HAMMER_HEAD_ALIGN_MASK;
+
+       *offp = hammer_alloc_fifo(aligned_rec_len, data_len, &buf,
+                                 HAMMER_HEAD_TYPE_RECORD);
+       rec = (void *)((char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK));
+       rec->base.base.rec_type = rec_type;
+       if (data_len) {
+               rec->base.data_off = *offp + aligned_rec_len;
+               rec->base.data_len = data_len;
+               *datap = (char *)rec + aligned_rec_len;
+       } else {
+               *datap = NULL;
        }
-       cluster->ondisk->idx_index = elm_no;
-       buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-       item = &buf->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
-       *offp = buf->buf_no * HAMMER_BUFSIZE +
-               ((char *)item - (char *)buf->ondisk);
-       return(item);
+       /* XXX buf not released, ptr remains valid */
+       return(rec);
 }
 
-void *
-alloc_record_element(struct cluster_info *cluster, int32_t *offp,
-                    u_int8_t rec_type)
+/*
+ * Reserve space from the FIFO.  Make sure that bytes does not cross a 
+ * record boundary.
+ *
+ * Initialize the fifo header, keep track of the previous entry's size
+ * so the reverse poitner can be initialized (using lastBlk), and also
+ * store a terminator (used by the recovery code) which will be overwritten
+ * by the next allocation.
+ */
+static
+hammer_off_t
+hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
+                 struct buffer_info **bufp, u_int16_t hdr_type)
 {
        struct buffer_info *buf;
-       hammer_alist_t live;
-       int32_t elm_no;
-       void *item;
+       struct volume_info *volume;
+       hammer_fifo_head_t head;
+       hammer_off_t off;
+       int32_t aligned_bytes;
+       static u_int32_t lastBlk;
 
-       live = &cluster->alist_record;
-       elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE)
-               elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
-       if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
-               alloc_new_buffer(cluster, live,
-                                HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES);
-               ++cluster->ondisk->stat_rec_bufs;
-               ++cluster->volume->ondisk->vol_stat_rec_bufs;
-               ++cluster->volume->ondisk->vol0_stat_rec_bufs;
-               elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
-               assert(elm_no != HAMMER_ALIST_BLOCK_NONE);
-       }
-       cluster->ondisk->idx_record = elm_no;
-       buf = get_buffer(cluster, elm_no / HAMMER_FSBUF_MAXBLKS, 0);
-       assert(buf->ondisk->head.buf_type != 0);
-       item = &buf->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
-       *offp = buf->buf_no * HAMMER_BUFSIZE +
-               ((char *)item - (char *)buf->ondisk);
-       ++cluster->ondisk->stat_records;
-       if (rec_type == HAMMER_RECTYPE_CLUSTER)
-               ++cluster->ondisk->stat_records;
-       return(item);
-}
+       aligned_bytes = (base_bytes + ext_bytes + HAMMER_HEAD_ALIGN_MASK) &
+                       ~HAMMER_HEAD_ALIGN_MASK;
 
-static void
-alloc_new_buffer(struct cluster_info *cluster, hammer_alist_t live,
-                u_int64_t type, int32_t nelements)
-{
-       int32_t buf_no;
-       struct buffer_info *buf;
+       volume = get_volume(RootVolNo);
+       off = volume->ondisk->vol0_fifo_end;
 
-       if (type == HAMMER_FSBUF_RECORDS) {
-               buf_no = hammer_alist_alloc_rev(&cluster->alist_master, 1,
-                                               HAMMER_ALIST_BLOCK_MAX);
-       } else {
-               buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, 1, 
-                                               0);
-       }
-       assert(buf_no != HAMMER_ALIST_BLOCK_NONE);
-       buf = get_buffer(cluster, buf_no, type);
-       hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS, nelements);
-       /* XXX modified bit for multiple gets/rels */
+       /*
+        * For now don't deal with transitions across buffer boundaries,
+        * only newfs_hammer uses this function.
+        */
+       assert((off & ~HAMMER_BUFMASK64) ==
+                ((off + aligned_bytes + sizeof(*head)) & ~HAMMER_BUFMASK));
+
+       *bufp = buf = get_buffer(off, 0);
+
+       buf->cache.modified = 1;
+       volume->cache.modified = 1;
+
+       head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
+       bzero(head, base_bytes);
+
+       head->hdr_type = hdr_type;
+       head->hdr_rev_link = lastBlk;
+       head->hdr_fwd_link = aligned_bytes;
+       head->hdr_seq = volume->ondisk->vol0_next_seq++;
+       lastBlk = head->hdr_fwd_link;
+
+       volume->ondisk->vol0_fifo_end += aligned_bytes;
+       volume->cache.modified = 1;
+       head = (void *)((char *)head + aligned_bytes);
+       head->hdr_signature = HAMMER_HEAD_SIGNATURE;
+       head->hdr_type = HAMMER_HEAD_TYPE_TERM;
+       head->hdr_rev_link = lastBlk;
+       head->hdr_fwd_link = 0;
+       head->hdr_crc = 0;
+       head->hdr_seq = volume->ondisk->vol0_next_seq;
+
+       rel_volume(volume);
+
+       return(off);
 }
 
 /*
@@ -667,57 +383,35 @@ flush_all_volumes(void)
 }
 
 void
-flush_volume(struct volume_info *vol)
-{
-       struct supercl_info *supercl;
-       struct cluster_info *cl;
-
-       TAILQ_FOREACH(supercl, &vol->supercl_list, entry)
-               flush_supercl(supercl);
-       TAILQ_FOREACH(cl, &vol->cluster_list, entry)
-               flush_cluster(cl);
-       writehammerbuf(vol, vol->ondisk, 0);
-       vol->cache.modified = 0;
-}
-
-void
-flush_supercl(struct supercl_info *supercl)
+flush_volume(struct volume_info *volume)
 {
-       int64_t supercl_offset;
-
-       supercl_offset = supercl->scl_offset;
-       writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
-       supercl->cache.modified = 0;
-}
-
-void
-flush_cluster(struct cluster_info *cl)
-{
-       struct buffer_info *buf;
-       int64_t cluster_offset;
+       struct buffer_info *buffer;
 
-       TAILQ_FOREACH(buf, &cl->buffer_list, entry)
-               flush_buffer(buf);
-       cluster_offset = cl->clu_offset;
-       writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
-       cl->cache.modified = 0;
+       TAILQ_FOREACH(buffer, &volume->buffer_list, entry)
+               flush_buffer(buffer);
+       writehammerbuf(volume, volume->ondisk, 0);
+       volume->cache.modified = 0;
 }
 
 void
-flush_buffer(struct buffer_info *buf)
+flush_buffer(struct buffer_info *buffer)
 {
-       writehammerbuf(buf->volume, buf->ondisk, buf->buf_offset);
-       buf->cache.modified = 0;
+       writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset);
+       buffer->cache.modified = 0;
 }
 
 /*
  * Generic buffer initialization
  */
 static void
-initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
+init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
 {
-       head->buf_type = type;
-       hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
+       head->hdr_signature = HAMMER_HEAD_SIGNATURE;
+       head->hdr_type = hdr_type;
+       head->hdr_rev_link = 0;
+       head->hdr_fwd_link = 0;
+       head->hdr_crc = 0;
+       head->hdr_seq = 0;
 }
 
 #if 0
diff --git a/sbin/hammer/super_alist.c b/sbin/hammer/super_alist.c
deleted file mode 100644 (file)
index efa964a..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- * 
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * $DragonFly: src/sbin/hammer/Attic/super_alist.c,v 1.4 2008/01/03 06:48:45 dillon Exp $
- */
-/*
- * Implement the super-cluster A-list recursion for the cluster allocator.
- *
- * Volume A-list -> supercluster A-list -> cluster
- */
-
-#include <sys/types.h>
-#include "hammer_util.h"
-
-static int
-super_alist_init(void *info, int32_t blk, int32_t radix __unused,
-                hammer_alloc_state_t state)
-{
-       struct volume_info *vol = info;
-       struct supercl_info *supercl;
-       int32_t sclno;
-
-       /*
-        * Calculate the super-cluster number containing the cluster (blk)
-        * and obtain the super-cluster buffer.
-        */
-       sclno = blk / HAMMER_SCL_MAXCLUSTERS;
-       supercl = get_supercl(vol, sclno, state);
-       rel_supercl(supercl);
-       return(0);
-}
-
-static int
-super_alist_destroy(void *info __unused, int32_t blk __unused,
-                   int32_t radix __unused)
-{
-       return(0);
-}
-
-static int
-super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix __unused,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       struct volume_info *vol = info;
-       struct supercl_info *supercl;
-       int32_t sclno;
-       int32_t r;
-
-       sclno = blk / HAMMER_SCL_MAXCLUSTERS;
-       supercl = get_supercl(vol, sclno, 0);
-       r = hammer_alist_alloc_fwd(&supercl->clu_alist, count, atblk - blk);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       *fullp = hammer_alist_isfull(&supercl->clu_alist);
-       rel_supercl(supercl);
-       return(r);
-}
-
-static int
-super_alist_alloc_rev(void *info, int32_t blk, int32_t radix __unused,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       struct volume_info *vol = info;
-       struct supercl_info *supercl;
-       int32_t sclno;
-       int32_t r;
-
-       sclno = blk / HAMMER_SCL_MAXCLUSTERS;
-       supercl = get_supercl(vol, sclno, 0);
-       r = hammer_alist_alloc_rev(&supercl->clu_alist, count, atblk - blk);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       *fullp = hammer_alist_isfull(&supercl->clu_alist);
-       rel_supercl(supercl);
-       return(r);
-}
-
-static void
-super_alist_free(void *info, int32_t blk, int32_t radix __unused,
-                int32_t base_blk, int32_t count, int32_t *emptyp)
-{
-       struct volume_info *vol = info;
-       struct supercl_info *supercl;
-       int32_t sclno;
-
-       sclno = blk / HAMMER_SCL_MAXCLUSTERS;
-       supercl = get_supercl(vol, sclno, 0);
-
-       hammer_alist_free(&supercl->clu_alist, base_blk, count);
-       *emptyp = hammer_alist_isempty(&supercl->clu_alist);
-       rel_supercl(supercl);
-}
-
-static void
-super_alist_print(void *info __unused, int32_t blk __unused,
-                 int32_t radix __unused, int tab __unused)
-{
-}
-
-void
-hammer_super_alist_template(hammer_alist_config_t config)
-{
-        config->bl_radix_init = super_alist_init;
-        config->bl_radix_destroy = super_alist_destroy;
-        config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
-        config->bl_radix_alloc_rev = super_alist_alloc_rev;
-        config->bl_radix_free = super_alist_free;
-        config->bl_radix_print = super_alist_print;
-}
-
index bdb4c4d..96063f7 100644 (file)
@@ -1,5 +1,5 @@
 #
-# $DragonFly: src/sbin/newfs_hammer/Makefile,v 1.3 2008/01/03 06:48:47 dillon Exp $
+# $DragonFly: src/sbin/newfs_hammer/Makefile,v 1.4 2008/02/08 08:30:58 dillon Exp $
 
 PROG=  newfs_hammer
 MAN=   newfs_hammer.8
@@ -8,9 +8,7 @@ SRCS= newfs_hammer.c
 
 .PATH: ${.CURDIR}/../../sys/libkern
 SRCS+= crc32.c
-.PATH: ${.CURDIR}/../../sys/vfs/hammer
-SRCS+= hammer_alist.c
 .PATH: ${.CURDIR}/../hammer
-SRCS+= ondisk.c cache.c super_alist.c buffer_alist.c
+SRCS+= ondisk.c cache.c
 
 .include <bsd.prog.mk>
index 42f0440..d85d527 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.16 2008/01/25 21:52:10 dillon Exp $
+ * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.17 2008/02/08 08:30:58 dillon Exp $
  */
 
 #include "newfs_hammer.h"
@@ -40,12 +40,9 @@ static int64_t getsize(const char *str, int64_t minval, int64_t maxval, int pw);
 static const char *sizetostr(off_t size);
 static void check_volume(struct volume_info *vol);
 static void format_volume(struct volume_info *vol, int nvols,const char *label);
-static int32_t format_cluster(struct volume_info *vol, int isroot);
-static void format_root(struct cluster_info *cluster);
+static hammer_off_t format_root(void);
 static void usage(void);
 
-static int64_t ClusterSize;
-
 int
 main(int ac, char **av)
 {
@@ -53,21 +50,13 @@ main(int ac, char **av)
        int ch;
        u_int32_t status;
        off_t total;
-       int64_t max_volume_size;
        const char *label = NULL;
 
        /*
         * Sanity check basic filesystem structures.  No cookies for us
         * if it gets broken!
         */
-       assert(sizeof(struct hammer_almeta) == HAMMER_ALMETA_SIZE);
-       assert(sizeof(struct hammer_fsbuf_head) == HAMMER_FSBUF_HEAD_SIZE);
        assert(sizeof(struct hammer_volume_ondisk) <= HAMMER_BUFSIZE);
-       assert(sizeof(struct hammer_cluster_ondisk) <= HAMMER_BUFSIZE);
-       assert(sizeof(struct hammer_fsbuf_data) == HAMMER_BUFSIZE);
-       assert(sizeof(struct hammer_fsbuf_recs) == HAMMER_BUFSIZE);
-       assert(sizeof(struct hammer_fsbuf_btree) == HAMMER_BUFSIZE);
-       assert(sizeof(union hammer_fsbuf_ondisk) == HAMMER_BUFSIZE);
 
        /*
         * Generate a filesysem id and lookup the filesystem type
@@ -79,12 +68,10 @@ main(int ac, char **av)
                        "HAMMER filesystem type");
        }
 
-       init_alist_templates();
-
        /*
         * Parse arguments
         */
-       while ((ch = getopt(ac, av, "L:b:c:m:S")) != -1) {
+       while ((ch = getopt(ac, av, "L:b:m:")) != -1) {
                switch(ch) {
                case 'L':
                        label = optarg;
@@ -94,22 +81,11 @@ main(int ac, char **av)
                                         HAMMER_BUFSIZE,
                                         HAMMER_BOOT_MAXBYTES, 2);
                        break;
-               case 'c':
-                       ClusterSize = getsize(optarg, 
-                                        HAMMER_BUFSIZE * 256LL,
-                                        HAMMER_CLU_MAXBYTES, 1);
-                       break;
                case 'm':
                        MemAreaSize = getsize(optarg,
                                         HAMMER_BUFSIZE,
                                         HAMMER_MEM_MAXBYTES, 2);
                        break;
-               case 'S':
-                       /*
-                        * Force the use of super-clusters
-                        */
-                       UsingSuperClusters = 1;
-                       break;
                default:
                        usage();
                        break;
@@ -128,6 +104,7 @@ main(int ac, char **av)
        ac -= optind;
        av += optind;
        NumVolumes = ac;
+       RootVolNo = 0;
 
        total = 0;
        for (i = 0; i < NumVolumes; ++i) {
@@ -143,19 +120,6 @@ main(int ac, char **av)
                total += vol->size;
        }
 
-       /*
-        * Calculate the size of a cluster.  A cluster is broken
-        * down into 256 chunks which must be at least filesystem buffer
-        * sized.  This gives us a minimum chunk size of around 4MB.
-        */
-       if (ClusterSize == 0) {
-               ClusterSize = HAMMER_BUFSIZE * 256;
-               while (ClusterSize < total / NumVolumes / 256 &&
-                      ClusterSize < HAMMER_CLU_MAXBYTES) {
-                       ClusterSize <<= 1;
-               }
-       }
-
        /*
         * Calculate defaults for the boot and memory area sizes.
         */
@@ -181,20 +145,6 @@ main(int ac, char **av)
        printf("---------------------------------------------\n");
        printf("%d volume%s total size %s\n",
                NumVolumes, (NumVolumes == 1 ? "" : "s"), sizetostr(total));
-       printf("cluster-size:        %s\n", sizetostr(ClusterSize));
-
-       if (UsingSuperClusters) {
-               max_volume_size = (int64_t)HAMMER_VOL_MAXSUPERCLUSTERS * \
-                                 HAMMER_SCL_MAXCLUSTERS * ClusterSize;
-       } else {
-               max_volume_size = HAMMER_VOL_MAXCLUSTERS * ClusterSize;
-       }
-       printf("max-volume-size:     %s\n", sizetostr(max_volume_size));
-
-       printf("max-filesystem-size: %s\n",
-              (max_volume_size * 32768LL < max_volume_size) ?
-              "Unlimited" :
-              sizetostr(max_volume_size * 32768LL));
        printf("boot-area-size:      %s\n", sizetostr(BootAreaSize));
        printf("memory-log-size:     %s\n", sizetostr(MemAreaSize));
        printf("\n");
@@ -365,17 +315,6 @@ check_volume(struct volume_info *vol)
               vol->vol_no, vol->type, vol->name,
               sizetostr(vol->size));
 
-       /*
-        * Strictly speaking we do not need to enable super clusters unless
-        * we have volumes > 2TB, but turning them on doesn't really hurt
-        * and if we don't the user may get confused if he tries to expand
-        * the size of an existing volume.
-        */
-       if (vol->size > 200LL * 1024 * 1024 * 1024 && !UsingSuperClusters) {
-               UsingSuperClusters = 1;
-               printf("Enabling super-clusters\n");
-       }
-
        /*
         * Reserve space for (future) header junk
         */
@@ -390,20 +329,6 @@ void
 format_volume(struct volume_info *vol, int nvols, const char *label)
 {
        struct hammer_volume_ondisk *ondisk;
-       int32_t nclusters;
-       int32_t minclsize;
-       int32_t nscl_groups;
-       int64_t scl_group_size;
-       int64_t scl_header_size;
-       int64_t n64;
-
-       /*
-        * The last cluster in a volume may wind up truncated.  It must be
-        * at least minclsize to really be workable as a cluster.
-        */
-       minclsize = (int32_t)(ClusterSize / 4);
-       if (minclsize < HAMMER_BUFSIZE * 64)
-               minclsize = HAMMER_BUFSIZE * 64;
 
        /*
         * Initialize basic information in the on-disk volume structure.
@@ -416,220 +341,63 @@ format_volume(struct volume_info *vol, int nvols, const char *label)
        ondisk->vol_no = vol->vol_no;
        ondisk->vol_count = nvols;
        ondisk->vol_version = 1;
-       ondisk->vol_clsize = (int32_t)ClusterSize;
-       if (UsingSuperClusters)
-               ondisk->vol_flags = HAMMER_VOLF_USINGSUPERCL;
 
        ondisk->vol_bot_beg = vol->vol_alloc;
        vol->vol_alloc += BootAreaSize;
        ondisk->vol_mem_beg = vol->vol_alloc;
        vol->vol_alloc += MemAreaSize;
-       ondisk->vol_clo_beg = vol->vol_alloc;
-       ondisk->vol_clo_end = vol->size;
+       ondisk->vol_buf_beg = vol->vol_alloc;
+       ondisk->vol_buf_end = vol->size & ~(int64_t)HAMMER_BUFMASK;
 
-       if (ondisk->vol_clo_end < ondisk->vol_clo_beg) {
+       if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
                errx(1, "volume %d %s is too small to hold the volume header",
                     vol->vol_no, vol->name);
        }
 
-       /*
-        * Our A-lists have been initialized but are marked all-allocated.
-        * Calculate the actual number of clusters in the volume and free
-        * them to get the filesystem ready for work.  The clusters will
-        * be initialized on-demand.
-        *
-        * If using super-clusters we must still calculate nclusters but
-        * we only need to initialize superclusters that are not going
-        * to wind up in the all-free state, which will only be the last
-        * supercluster.  hammer_alist_free() will recurse into the
-        * supercluster infrastructure and create the necessary superclusters.
-        *
-        * NOTE: The nclusters calculation ensures that the volume EOF does
-        * not occur in the middle of a supercluster buffer array.
-        */
-       if (UsingSuperClusters) {
-               /*
-                * Figure out how many full super-cluster groups we will have.
-                * This calculation does not include the partial supercluster
-                * group at the end.
-                */
-               scl_header_size = (int64_t)HAMMER_BUFSIZE *
-                                 HAMMER_VOL_SUPERCLUSTER_GROUP;
-               scl_group_size = scl_header_size +
-                                (int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
-                                ClusterSize * HAMMER_SCL_MAXCLUSTERS;
-               nscl_groups = (ondisk->vol_clo_end - ondisk->vol_clo_beg) /
-                               scl_group_size;
-               nclusters = nscl_groups * HAMMER_SCL_MAXCLUSTERS *
-                               HAMMER_VOL_SUPERCLUSTER_GROUP;
-
-               /*
-                * Figure out how much space we have left and calculate the
-                * remaining number of clusters.
-                */
-               n64 = (ondisk->vol_clo_end - ondisk->vol_clo_beg) -
-                       (nscl_groups * scl_group_size);
-               if (n64 > scl_header_size) {
-                       nclusters += (n64 + minclsize) / ClusterSize;
-               }
-               printf("%d clusters, %d full super-cluster groups\n",
-                       nclusters, nscl_groups);
-               hammer_alist_init(&vol->clu_alist, 0, nclusters,
-                                 HAMMER_ASTATE_FREE);
-       } else {
-               nclusters = (ondisk->vol_clo_end - ondisk->vol_clo_beg +
-                            minclsize) / ClusterSize;
-               if (nclusters > HAMMER_VOL_MAXCLUSTERS) {
-                       errx(1, "Volume is too large, max %s\n",
-                            sizetostr(nclusters * ClusterSize));
-               }
-               hammer_alist_init(&vol->clu_alist, 0, nclusters,
-                                 HAMMER_ASTATE_FREE);
-       }
-       ondisk->vol_nclusters = nclusters;
-       ondisk->vol_nblocks = nclusters * ClusterSize / HAMMER_BUFSIZE -
-                             nclusters;
+       ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
+                             HAMMER_BUFSIZE;
        ondisk->vol_blocksize = HAMMER_BUFSIZE;
 
        /*
-        * Place the root cluster in volume 0.
+        * Assign the root volume to volume 0.
         */
        ondisk->vol_rootvol = 0;
+       ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
        if (ondisk->vol_no == (int)ondisk->vol_rootvol) {
-               ondisk->vol0_root_clu_id = format_cluster(vol, 1);
-               ondisk->vol0_recid = 1;
-               /* global next TID */
-               ondisk->vol0_nexttid = createtid();
-       }
-}
-
-/*
- * Format a hammer cluster.  Returns byte offset in volume of cluster.
- */
-static
-int32_t
-format_cluster(struct volume_info *vol, int isroot)
-{
-       hammer_tid_t clu_id = createtid();
-       struct cluster_info *cluster;
-       struct hammer_cluster_ondisk *ondisk;
-       int nbuffers;
-       int clno;
-
-       /*
-        * Allocate a cluster
-        */
-       clno = hammer_alist_alloc(&vol->clu_alist, 1);
-       if (clno == HAMMER_ALIST_BLOCK_NONE) {
-               fprintf(stderr, "volume %d %s has insufficient space\n",
-                       vol->vol_no, vol->name);
-               exit(1);
-       }
-       cluster = get_cluster(vol, clno, 1);
-       printf("allocate cluster id=%016llx %d@%08llx\n",
-              clu_id, clno, cluster->clu_offset);
-
-       ondisk = cluster->ondisk;
-
-       ondisk->vol_fsid = vol->ondisk->vol_fsid;
-       ondisk->vol_fstype = vol->ondisk->vol_fstype;
-       ondisk->clu_gen = 1;
-       ondisk->clu_id = clu_id;
-       ondisk->clu_no = clno;
-       ondisk->clu_flags = 0;
-       ondisk->clu_start = HAMMER_BUFSIZE;
-       if (vol->size - cluster->clu_offset > ClusterSize)
-               ondisk->clu_limit = (u_int32_t)ClusterSize;
-       else
-               ondisk->clu_limit = (u_int32_t)(vol->size - cluster->clu_offset);
-
-       /*
-        * In-band filesystem buffer management A-List.  The first filesystem
-        * buffer is the cluster header itself.
-        */
-       nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
-       hammer_alist_free(&cluster->alist_master, 1, nbuffers - 1);
-       printf("cluster %d has %d buffers\n", cluster->clu_no, nbuffers);
-
-       /*
-        * Buffer Iterators in elements.  Each buffer has 256 elements.
-        * The data and B-Tree indices are forward allocations while the
-        * record index allocates backwards.
-        */
-       ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
-       ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
-       ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
-
-       /*
-        * Iterator for whole-buffer data allocations. The iterator is
-        * the buf_no.
-        */
-       ondisk->idx_ldata = 1;
+               /*
+                * Create an empty FIFO starting at the first buffer
+                * in volume 0.  hammer_off_t must be properly formatted
+                * (see vfs/hammer/hammer_disk.h)
+                */
+               ondisk->vol0_fifo_beg = HAMMER_ENCODE_RAW_BUFFER(0, 0);
+               ondisk->vol0_fifo_end = ondisk->vol0_fifo_beg;
+               ondisk->vol0_next_tid = createtid();
+               ondisk->vol0_next_seq = 1;
 
-       /*
-        * Initialize root cluster's parent cluster info.  -1's
-        * indicate we are the root cluster and no parent exists.
-        */
-       ondisk->clu_btree_parent_vol_no = -1;
-       ondisk->clu_btree_parent_clu_no = -1;
-       ondisk->clu_btree_parent_offset = -1;
-       ondisk->clu_btree_parent_clu_gen = -1;
+               ondisk->vol0_btree_root = format_root();
+               ++ondisk->vol0_stat_inodes;     /* root inode */
 
-       /*
-        * Cluster 0 is the root cluster.  Set the B-Tree range for this
-        * cluster to the entire key space and format the root directory. 
-        *
-        * Note that delete_tid for the ending range must be set to 0,
-        * 0 indicates 'not deleted', aka 'the most recent'.  See
-        * hammer_btree_cmp() in sys/vfs/hammer/hammer_btree.c.
-        *
-        * The root cluster's key space represents the entire key space for
-        * the filesystem.  The btree_end element appears to be inclusive
-        * only because we can't overflow our variables.  It's actually
-        * non-inclusive... that is, it is a right-side boundary element.
-        */
-       if (isroot) {
-               ondisk->clu_btree_beg.obj_id = -0x8000000000000000LL;
-               ondisk->clu_btree_beg.key = -0x8000000000000000LL;
-               ondisk->clu_btree_beg.create_tid = 1;
-               ondisk->clu_btree_beg.delete_tid = 1;
-               ondisk->clu_btree_beg.rec_type = 0;
-               ondisk->clu_btree_beg.obj_type = 0;
-
-               ondisk->clu_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
-               ondisk->clu_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
-               ondisk->clu_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
-               ondisk->clu_btree_end.delete_tid = 0;   /* special case */
-               ondisk->clu_btree_end.rec_type = 0xFFFFU;
-               ondisk->clu_btree_end.obj_type = 0;
-
-               format_root(cluster);
        }
-
-       /*
-        * Write-out and update the index, record, and cluster buffers
-        */
-       return(clno);
 }
 
 /*
  * Format the root directory.
  */
 static
-void
-format_root(struct cluster_info *cluster)
+hammer_off_t
+format_root(void)
 {
-       int32_t btree_off;
-       int32_t rec_off;
-       int32_t data_off;
+       hammer_off_t btree_off;
+       hammer_off_t rec_off;
        hammer_node_ondisk_t bnode;
-       union hammer_record_ondisk *rec;
+       hammer_record_ondisk_t rec;
        struct hammer_inode_data *idata;
        hammer_btree_elm_t elm;
 
-       bnode = alloc_btree_element(cluster, &btree_off);
-       rec = alloc_record_element(cluster, &rec_off, HAMMER_RECTYPE_INODE);
-       idata = alloc_data_element(cluster, sizeof(*idata), &data_off);
+       bnode = alloc_btree_element(&btree_off);
+       rec = alloc_record_element(&rec_off, HAMMER_RECTYPE_INODE,
+                                  sizeof(rec->inode), sizeof(*idata),
+                                  (void **)&idata);
 
        /*
         * Populate the inode data and inode record for the root directory.
@@ -644,22 +412,13 @@ format_root(struct cluster_info *cluster)
        rec->base.base.delete_tid = 0;
        rec->base.base.rec_type = HAMMER_RECTYPE_INODE;
        rec->base.base.obj_type = HAMMER_OBJTYPE_DIRECTORY;
-       rec->base.data_offset = data_off;
-       rec->base.data_len = sizeof(*idata);
-       rec->base.data_crc = crc32(idata, sizeof(*idata));
+       /* rec->base.data_offset - initialized by alloc_record_element */
+       /* rec->base.data_len    - initialized by alloc_record_element */
+       rec->base.head.hdr_crc = crc32(idata, sizeof(*idata));
        rec->inode.ino_atime  = rec->base.base.create_tid;
        rec->inode.ino_mtime  = rec->base.base.create_tid;
        rec->inode.ino_size   = 0;
        rec->inode.ino_nlinks = 1;
-       cluster->ondisk->synchronized_rec_id = 1;
-
-       ++cluster->volume->ondisk->vol0_stat_inodes;
-
-       /*
-        * Assign the cluster's root B-Tree node.
-        */
-       assert(cluster->ondisk->clu_btree_root == 0);
-       cluster->ondisk->clu_btree_root = btree_off;
 
        /*
         * Create the root of the B-Tree.  The root is a leaf node so we
@@ -671,8 +430,9 @@ format_root(struct cluster_info *cluster)
        elm = &bnode->elms[0];
        elm->base = rec->base.base;
        elm->leaf.rec_offset = rec_off;
-       elm->leaf.data_offset = rec->base.data_offset;
+       elm->leaf.data_offset = rec->base.data_off;
        elm->leaf.data_len = rec->base.data_len;
-       elm->leaf.data_crc = rec->base.data_crc;
+       elm->leaf.data_crc = rec->base.head.hdr_crc;
+       return(btree_off);
 }
 
index 8372e83..67871b7 100644 (file)
@@ -1,5 +1,5 @@
 # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $
-# $DragonFly: src/sys/conf/files,v 1.202 2008/02/04 08:33:14 dillon Exp $
+# $DragonFly: src/sys/conf/files,v 1.203 2008/02/08 08:30:55 dillon Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -1129,7 +1129,6 @@ vfs/userfs/userfs_vfsops.c        optional userfs
 vfs/userfs/userfs_vnops.c      optional userfs
 vfs/userfs/userfs_inode.c      optional userfs
 vfs/userfs/userfs_elms.c       optional userfs
-vfs/hammer/hammer_alist.c      optional hammer
 vfs/hammer/hammer_inode.c      optional hammer
 vfs/hammer/hammer_vfsops.c     optional hammer
 vfs/hammer/hammer_vnops.c      optional hammer
index e915386..af5b718 100644 (file)
@@ -1,11 +1,11 @@
 #
-# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.6 2008/02/04 08:33:17 dillon Exp $
+# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.7 2008/02/08 08:30:59 dillon Exp $
 
 KMOD=  hammer
 SRCS=  hammer_vfsops.c hammer_vnops.c hammer_inode.c \
        hammer_subs.c hammer_ondisk.c hammer_io.c \
        hammer_cursor.c hammer_btree.c hammer_transaction.c \
-       hammer_alist.c hammer_object.c hammer_spike.c \
+       hammer_object.c hammer_spike.c \
        hammer_recover.c hammer_ioctl.c
 
 NOMAN=
index 836eeb6..2ca209e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.34 2008/02/06 08:59:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.35 2008/02/08 08:30:59 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -56,7 +56,6 @@
 #include <sys/globaldata.h>
 
 #include <sys/buf2.h>
-#include "hammer_alist.h"
 #include "hammer_disk.h"
 #include "hammer_mount.h"
 #include "hammer_ioctl.h"
@@ -218,6 +217,7 @@ struct hammer_record {
        union hammer_record_ondisk      rec;
        union hammer_data_ondisk        *data;
        int                             flags;
+       int                             rec_len;
        int                             blocked;
 };
 
@@ -226,41 +226,31 @@ typedef struct hammer_record *hammer_record_t;
 #define HAMMER_RECF_ALLOCDATA          0x0001
 #define HAMMER_RECF_ONRBTREE           0x0002
 #define HAMMER_RECF_DELETED            0x0004
-#define HAMMER_RECF_EMBEDDED_DATA      0x0008
+#define HAMMER_RECF_UNUSED0008         0x0008
 #define HAMMER_RECF_SYNCING            0x0010
 #define HAMMER_RECF_WANTED             0x0020
 
 /*
- * Structures used to internally represent a volume and a cluster
+ * In-memory structures representing on-disk structures.
  */
 struct hammer_volume;
-struct hammer_cluster;
-struct hammer_supercl;
 struct hammer_buffer;
 struct hammer_node;
 RB_HEAD(hammer_vol_rb_tree, hammer_volume);
-RB_HEAD(hammer_clu_rb_tree, hammer_cluster);
-RB_HEAD(hammer_scl_rb_tree, hammer_supercl);
 RB_HEAD(hammer_buf_rb_tree, hammer_buffer);
 RB_HEAD(hammer_nod_rb_tree, hammer_node);
 
 RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node,
              hammer_vol_rb_compare, int32_t);
-RB_PROTOTYPE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
-             hammer_clu_rb_compare, int32_t);
-RB_PROTOTYPE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
-             hammer_scl_rb_compare, int32_t);
 RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
-             hammer_buf_rb_compare, int32_t);
+             hammer_buf_rb_compare, hammer_off_t);
 RB_PROTOTYPE2(hammer_nod_rb_tree, hammer_node, rb_node,
-             hammer_nod_rb_compare, int32_t);
+             hammer_nod_rb_compare, hammer_off_t);
 
 /*
  * IO management - embedded at the head of various in-memory structures
  */
 enum hammer_io_type { HAMMER_STRUCTURE_VOLUME,
-                     HAMMER_STRUCTURE_SUPERCL,
-                     HAMMER_STRUCTURE_CLUSTER,
                      HAMMER_STRUCTURE_BUFFER };
 
 union hammer_io_structure;
@@ -297,15 +287,13 @@ typedef struct hammer_io *hammer_io_t;
 struct hammer_volume {
        struct hammer_io io;
        RB_ENTRY(hammer_volume) rb_node;
-       struct hammer_clu_rb_tree rb_clus_root;
-       struct hammer_scl_rb_tree rb_scls_root;
+       struct hammer_nod_rb_tree rb_nods_root;
+       struct hammer_buf_rb_tree rb_bufs_root;
        struct hammer_volume_ondisk *ondisk;
-       struct hammer_alist_live alist;
        int32_t vol_no;
-       int32_t vol_clsize;
-       int32_t clu_iterator;   /* cluster allocation iterator */
        int64_t nblocks;        /* note: special calculation for statfs */
-       int64_t cluster_base;   /* base offset of cluster 0 */
+       int64_t buffer_base;    /* base offset of buffer 0 */
+       hammer_off_t maxbuf_off; /* Maximum buffer offset */
        char    *vol_name;
        struct vnode *devvp;
        struct hammer_mount *hmp;
@@ -314,54 +302,6 @@ struct hammer_volume {
 
 typedef struct hammer_volume *hammer_volume_t;
 
-/*
- * In-memory super-cluster representing on-disk buffer
- */
-struct hammer_supercl {
-       struct hammer_io io;
-       RB_ENTRY(hammer_supercl) rb_node;
-       struct hammer_supercl_ondisk *ondisk;
-       struct hammer_volume *volume;
-       struct hammer_alist_live alist;
-       int32_t scl_no;
-};
-
-typedef struct hammer_supercl *hammer_supercl_t;
-
-/*
- * In-memory cluster representing on-disk buffer
- *
- * The cluster's indexing range is cached in hammer_cluster, separate
- * from the ondisk info in order to allow cursors to point to it.
- */
-struct hammer_cluster {
-       struct hammer_io io;
-       RB_ENTRY(hammer_cluster) rb_node;
-       struct hammer_buf_rb_tree rb_bufs_root;
-       struct hammer_cluster_ondisk *ondisk;
-       struct hammer_volume *volume;
-       struct hammer_alist_live alist_master;
-       struct hammer_alist_live alist_btree;
-       struct hammer_alist_live alist_record;
-       struct hammer_alist_live alist_mdata;
-       struct hammer_nod_rb_tree rb_nods_root; /* cursors in cluster */
-       struct hammer_base_elm clu_btree_beg;   /* copy of on-disk info */
-       struct hammer_base_elm clu_btree_end;   /* copy of on-disk info */
-       int     flags;
-       int32_t clu_no;
-};
-
-typedef struct hammer_cluster *hammer_cluster_t;
-
-#define HAMMER_CLUSTER_DELETED 0x0001
-#define HAMMER_CLUSTER_EMPTY   0x0002
-
-/*
- * Passed to hammer_get_cluster()
- */
-#define GET_CLUSTER_NEW                0x0001
-#define GET_CLUSTER_NORECOVER  0x0002
-
 /*
  * In-memory buffer (other then volume, super-cluster, or cluster),
  * representing an on-disk buffer.
@@ -369,12 +309,9 @@ typedef struct hammer_cluster *hammer_cluster_t;
 struct hammer_buffer {
        struct hammer_io io;
        RB_ENTRY(hammer_buffer) rb_node;
-       hammer_fsbuf_ondisk_t ondisk;
+       void *ondisk;
        struct hammer_volume *volume;
-       struct hammer_cluster *cluster;
-       int32_t buf_no;
-       u_int64_t buf_type;
-       struct hammer_alist_live alist;
+       hammer_off_t buf_offset;
        struct hammer_node_list clist;
 };
 
@@ -397,9 +334,9 @@ struct hammer_node {
        struct hammer_lock      lock;           /* node-by-node lock */
        TAILQ_ENTRY(hammer_node) entry;         /* per-buffer linkage */
        RB_ENTRY(hammer_node)   rb_node;        /* per-cluster linkage */
-       int32_t                 node_offset;    /* cluster-rel offset */
-       struct hammer_cluster   *cluster;
+       hammer_off_t            node_offset;    /* full offset spec */
        struct hammer_buffer    *buffer;        /* backing buffer */
+       struct hammer_volume    *volume;        /* backing volume */
        hammer_node_ondisk_t    ondisk;         /* ptr to on-disk structure */
        struct hammer_node      **cache1;       /* passive cache(s) */
        struct hammer_node      **cache2;
@@ -429,8 +366,6 @@ typedef struct hammer_node_locklist *hammer_node_locklist_t;
 union hammer_io_structure {
        struct hammer_io        io;
        struct hammer_volume    volume;
-       struct hammer_supercl   supercl;
-       struct hammer_cluster   cluster;
        struct hammer_buffer    buffer;
 };
 
@@ -449,7 +384,8 @@ struct hammer_mount {
        struct hammer_ino_rb_tree rb_inos_root;
        struct hammer_vol_rb_tree rb_vols_root;
        struct hammer_volume *rootvol;
-       struct hammer_cluster *rootcl;
+       struct hammer_base_elm root_btree_beg;
+       struct hammer_base_elm root_btree_end;
        char    *zbuf;  /* HAMMER_BUFSIZE bytes worth of all-zeros */
        int     hflags;
        int     ronly;
@@ -476,12 +412,6 @@ struct hammer_sync_info {
 extern struct vop_ops hammer_vnode_vops;
 extern struct vop_ops hammer_spec_vops;
 extern struct vop_ops hammer_fifo_vops;
-extern struct hammer_alist_config Buf_alist_config;
-extern struct hammer_alist_config Vol_normal_alist_config;
-extern struct hammer_alist_config Vol_super_alist_config;
-extern struct hammer_alist_config Supercl_alist_config;
-extern struct hammer_alist_config Clu_master_alist_config;
-extern struct hammer_alist_config Clu_slave_alist_config;
 extern struct bio_ops hammer_bioops;
 
 extern int hammer_debug_general;
@@ -493,11 +423,8 @@ extern int hammer_count_inodes;
 extern int hammer_count_records;
 extern int hammer_count_record_datas;
 extern int hammer_count_volumes;
-extern int hammer_count_supercls;
-extern int hammer_count_clusters;
 extern int hammer_count_buffers;
 extern int hammer_count_nodes;
-extern int hammer_count_spikes;
 
 int    hammer_vop_inactive(struct vop_inactive_args *);
 int    hammer_vop_reclaim(struct vop_reclaim_args *);
@@ -512,8 +439,6 @@ void        hammer_put_inode_ref(struct hammer_inode *ip);
 
 int    hammer_unload_inode(hammer_inode_t ip, void *data);
 int    hammer_unload_volume(hammer_volume_t volume, void *data __unused);
-int    hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused);
-int    hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused);
 int    hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused);
 int    hammer_install_volume(hammer_mount_t hmp, const char *volname);
 
@@ -527,15 +452,13 @@ int       hammer_ip_check_directory_empty(hammer_transaction_t trans,
                        hammer_inode_t ip);
 int    hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
 int    hammer_sync_volume(hammer_volume_t volume, void *data);
-int    hammer_sync_cluster(hammer_cluster_t cluster, void *data);
 int    hammer_sync_buffer(hammer_buffer_t buffer, void *data);
 
 hammer_record_t
-       hammer_alloc_mem_record(hammer_inode_t ip);
+       hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len);
 void   hammer_rel_mem_record(hammer_record_t record);
 
 int    hammer_cursor_up(hammer_cursor_t cursor);
-int    hammer_cursor_toroot(hammer_cursor_t cursor);
 int    hammer_cursor_down(hammer_cursor_t cursor);
 int    hammer_cursor_upgrade(hammer_cursor_t cursor);
 void   hammer_cursor_downgrade(hammer_cursor_t cursor);
@@ -557,15 +480,14 @@ hammer_tid_t hammer_timespec_to_transid(struct timespec *ts);
 hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans);
 hammer_tid_t hammer_now_tid(void);
 hammer_tid_t hammer_str_to_tid(const char *str);
-u_int64_t hammer_alloc_recid(hammer_cluster_t cluster);
 
 enum vtype hammer_get_vnode_type(u_int8_t obj_type);
 int hammer_get_dtype(u_int8_t obj_type);
 u_int8_t hammer_get_obj_type(enum vtype vtype);
 int64_t hammer_directory_namekey(void *name, int len);
 
-int    hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache, hammer_mount_t hmp);
-int    hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster);
+int    hammer_init_cursor_hmp(hammer_cursor_t cursor,
+                       struct hammer_node **cache, hammer_mount_t hmp);
 
 void   hammer_done_cursor(hammer_cursor_t cursor);
 void   hammer_mem_done(hammer_cursor_t cursor);
@@ -577,8 +499,6 @@ int hammer_btree_extract(hammer_cursor_t cursor, int flags);
 int    hammer_btree_iterate(hammer_cursor_t cursor);
 int    hammer_btree_iterate_reverse(hammer_cursor_t cursor);
 int    hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm);
-int    hammer_btree_insert_cluster(hammer_cursor_t cursor,
-                       hammer_cluster_t cluster, int32_t rec_offset);
 int    hammer_btree_delete(hammer_cursor_t cursor);
 int    hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2);
 int    hammer_btree_chkts(hammer_tid_t ts, hammer_base_elm_t key);
@@ -593,36 +513,29 @@ void      hammer_btree_unlock_children(struct hammer_node_locklist **locklistp);
 void   hammer_print_btree_node(hammer_node_ondisk_t ondisk);
 void   hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
 
-void   *hammer_bread(struct hammer_cluster *cluster, int32_t cloff,
-                       u_int64_t buf_type, int *errorp,
-                       struct hammer_buffer **bufferp);
+void   *hammer_bread(struct hammer_mount *hmp, hammer_off_t off,
+                       int *errorp, struct hammer_buffer **bufferp);
+void   *hammer_bnew(struct hammer_mount *hmp, hammer_off_t off,
+                       int *errorp, struct hammer_buffer **bufferp);
 
 hammer_volume_t hammer_get_root_volume(hammer_mount_t hmp, int *errorp);
-hammer_cluster_t hammer_get_root_cluster(hammer_mount_t hmp, int *errorp);
 
 hammer_volume_t        hammer_get_volume(hammer_mount_t hmp,
                        int32_t vol_no, int *errorp);
-hammer_supercl_t hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
-                       int *errorp, hammer_alloc_state_t isnew);
-hammer_cluster_t hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
-                       int *errorp, int getflags);
-hammer_buffer_t        hammer_get_buffer(hammer_cluster_t cluster,
-                       int32_t buf_no, u_int64_t buf_type, int *errorp);
+hammer_buffer_t        hammer_get_buffer(hammer_mount_t hmp,
+                       hammer_off_t buf_offset, int isnew, int *errorp);
 
 int            hammer_ref_volume(hammer_volume_t volume);
-int            hammer_ref_cluster(hammer_cluster_t cluster);
 int            hammer_ref_buffer(hammer_buffer_t buffer);
 void           hammer_flush_buffer_nodes(hammer_buffer_t buffer);
 
 void           hammer_rel_volume(hammer_volume_t volume, int flush);
-void           hammer_rel_supercl(hammer_supercl_t supercl, int flush);
-void           hammer_rel_cluster(hammer_cluster_t cluster, int flush);
 void           hammer_rel_buffer(hammer_buffer_t buffer, int flush);
 
 int            hammer_vfs_export(struct mount *mp, int op,
                        const struct export_args *export);
-hammer_node_t  hammer_get_node(hammer_cluster_t cluster,
-                       int32_t node_offset, int *errorp);
+hammer_node_t  hammer_get_node(hammer_mount_t hmp,
+                       hammer_off_t node_offset, int *errorp);
 int            hammer_ref_node(hammer_node_t node);
 hammer_node_t  hammer_ref_node_safe(struct hammer_mount *hmp,
                        struct hammer_node **cache, int *errorp);
@@ -634,38 +547,23 @@ void              hammer_flush_node(hammer_node_t node);
 
 void hammer_dup_buffer(struct hammer_buffer **bufferp,
                        struct hammer_buffer *buffer);
-void hammer_dup_cluster(struct hammer_cluster **clusterp,
-                       struct hammer_cluster *cluster);
-hammer_cluster_t hammer_alloc_cluster(hammer_mount_t hmp,
-                       hammer_cluster_t cluster_hint, int *errorp);
-void hammer_init_cluster(hammer_cluster_t cluster,
-                       hammer_base_elm_t left_bound,
-                       hammer_base_elm_t right_bound);
-hammer_node_t hammer_alloc_btree(struct hammer_cluster *cluster, int *errorp);
-void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes,
-                       int *errorp, struct hammer_buffer **bufferp);
-void *hammer_alloc_record(struct hammer_cluster *cluster, int *errorp,
-                       u_int8_t rec_type, struct hammer_buffer **bufferp);
-void hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
-                       u_int64_t type);
-void hammer_free_data_ptr(struct hammer_buffer *buffer, 
-                       void *data, int bytes);
-void hammer_free_record_ptr(struct hammer_buffer *buffer,
-                       union hammer_record_ondisk *rec, u_int8_t rec_type);
-void hammer_free_cluster(hammer_cluster_t cluster);
-void hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset);
-void hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset,
-                       int32_t bytes);
-void hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset,
-                       u_int8_t rec_type);
+hammer_node_t hammer_alloc_btree(hammer_mount_t hmp, int *errorp);
+void *hammer_alloc_record(hammer_mount_t hmp,
+                       hammer_off_t *rec_offp, u_int8_t rec_type,
+                       int32_t rec_len, struct hammer_buffer **rec_bufferp,
+                       hammer_off_t *data_offp, int32_t data_len,
+                       void **data1p, void **data2p, int32_t *data2_index,
+                       struct hammer_buffer **data2_bufferp,
+                       int *errorp);
+void hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset);
+void hammer_unwind_fifo(hammer_mount_t hmp, hammer_off_t rec_offset);
+void hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type);
+int hammer_generate_undo(hammer_mount_t hmp, hammer_off_t undo_offset,
+                       void *base, int len);
 
 void hammer_put_volume(struct hammer_volume *volume, int flush);
-void hammer_put_supercl(struct hammer_supercl *supercl, int flush);
-void hammer_put_cluster(struct hammer_cluster *cluster, int flush);
 void hammer_put_buffer(struct hammer_buffer *buffer, int flush);
 
-void hammer_init_alist_config(void);
-
 void hammer_start_transaction(struct hammer_transaction *trans,
                              struct hammer_mount *hmp);
 void hammer_start_transaction_tid(struct hammer_transaction *trans,
@@ -690,21 +588,15 @@ int  hammer_ip_del_directory(struct hammer_transaction *trans,
 int  hammer_ip_add_record(struct hammer_transaction *trans,
                        hammer_record_t record);
 int  hammer_ip_delete_range(struct hammer_transaction *trans,
-                       hammer_inode_t ip, int64_t ran_beg, int64_t ran_end,
-                       struct hammer_cursor **spikep);
+                       hammer_inode_t ip, int64_t ran_beg, int64_t ran_end);
 int  hammer_ip_delete_range_all(struct hammer_transaction *trans,
                        hammer_inode_t ip);
 int  hammer_ip_sync_data(struct hammer_transaction *trans,
                        hammer_inode_t ip, int64_t offset,
-                       void *data, int bytes, struct hammer_cursor **spikep);
-int  hammer_ip_sync_record(hammer_record_t rec, struct hammer_cursor **spikep);
+                       void *data, int bytes);
+int  hammer_ip_sync_record(hammer_record_t rec);
 int  hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t rec,
-                       void *data, int cursor_flags);
-
-void hammer_load_spike(hammer_cursor_t cursor, struct hammer_cursor **spikep);
-int hammer_spike(struct hammer_cursor **spikep);
-int hammer_recover(struct hammer_cluster *cluster);
-int buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count);
+                       int32_t rec_len, void *data, int cursor_flags);
 
 int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                        struct ucred *cred);
@@ -715,34 +607,17 @@ int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
 void hammer_io_release(struct hammer_io *io, int flush);
 void hammer_io_flush(struct hammer_io *io);
 int hammer_io_checkflush(hammer_io_t io);
-void hammer_io_notify_cluster(hammer_cluster_t cluster);
 void hammer_io_clear_modify(struct hammer_io *io);
 void hammer_io_waitdep(struct hammer_io *io);
 
-void hammer_modify_volume(hammer_volume_t volume);
-void hammer_modify_supercl(hammer_supercl_t supercl);
-void hammer_modify_cluster(hammer_cluster_t cluster);
-void hammer_modify_buffer(hammer_buffer_t buffer);
-void hammer_modify_buffer_nodep(hammer_buffer_t buffer);
+void hammer_modify_volume(hammer_volume_t volume, void *base, int len);
+void hammer_modify_buffer(hammer_buffer_t buffer, void *base, int len);
 
 #endif
 
 static __inline void
 hammer_modify_node(struct hammer_node *node)
 {
-       hammer_modify_buffer(node->buffer);
-}
-
-/*
- * Return the cluster-relative byte offset of an element within a buffer
- */
-static __inline int
-hammer_bclu_offset(struct hammer_buffer *buffer, void *ptr)
-{
-       int bclu_offset;
-
-       bclu_offset = buffer->buf_no * HAMMER_BUFSIZE + 
-                     ((char *)ptr - (char *)buffer->ondisk);
-       return(bclu_offset);
+       hammer_modify_buffer(node->buffer, node->ondisk, sizeof(*node->ondisk));
 }
 
diff --git a/sys/vfs/hammer/hammer_alist.c b/sys/vfs/hammer/hammer_alist.c
deleted file mode 100644 (file)
index a865432..0000000
+++ /dev/null
@@ -1,2261 +0,0 @@
-/*
- * HAMMER_ALIST.C -
- *
- * Bitmap allocator/deallocator, using a radix tree with hinting.
- * Unlimited-size allocations, power-of-2 only, power-of-2 aligned results
- * only.  This module has been separated from the generic kernel module and
- * written specifically for embedding in HAMMER storage structures.
- * 
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- * 
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.c,v 1.11 2008/01/24 02:14:45 dillon Exp $
- */
-/*
- * This module implements a generic allocator through the use of a hinted
- * radix tree.  All allocations must be in powers of 2 and will return
- * similarly aligned results.  The radix tree typically recurses within
- * a memory buffer and then continues its recursion by chaining to other
- * memory buffers, creating a seemless whole capable of managing any amount
- * of storage.
- *
- * The radix tree is layed out recursively using a linear array.  Each meta
- * node is immediately followed (layed out sequentially in memory) by
- * HAMMER_ALIST_META_RADIX lower level nodes.  This is a recursive structure
- * but one that can be easily scanned through a very simple 'skip'
- * calculation.
- *
- * The radix tree supports an early-termination optimization which
- * effectively allows us to efficiently mix large and small allocations
- * with a single abstraction.  The address space can be partitioned
- * arbitrarily without adding much in the way of additional meta-storage
- * for the allocator.
- *
- * The radix tree supports allocator layering. By supplying a base_radix > 1
- * the allocator will issue callbacks to recurse into lower layers once 
- * higher layers have been exhausted.
- *
- * ALLOCATIONS IN MULTIPLES OF base_radix WILL BE ENTIRELY RETAINED IN THE
- * HIGHER LEVEL ALLOCATOR AND NEVER RECURSE.  This means the init function
- * will never be called and the A-list will consider the underlying zone
- * as being uninitialized.  If you then do a partial free, the A-list will
- * call the init function before freeing.  Most users of this API, including
- * HAMMER, only allocate and free whole zones, or only allocate and free
- * partial zones, and never mix their metaphors.
- *
- * This code can be compiled stand-alone for debugging.
- */
-
-#ifdef _KERNEL
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/lock.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <vm/vm.h>
-#include <vm/vm_object.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-
-#include "hammer_alist.h"
-#include "hammer_disk.h"
-
-#else
-
-#ifndef ALIST_NO_DEBUG
-#define ALIST_DEBUG
-#endif
-
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <stdio.h>
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdarg.h>
-
-#define kmalloc(a,b,c) malloc(a)
-#define kfree(a,b)     free(a)
-#define kprintf                printf
-#define KKASSERT(exp)  assert(exp)
-struct malloc_type;
-
-#include "hammer_alist.h"
-
-void panic(const char *ctl, ...);
-
-#endif
-
-/*
- * static support functions
- */
-static int32_t hammer_alst_leaf_alloc_fwd(hammer_almeta_t scan,
-                                       int32_t blk, int count, int32_t atblk);
-static int32_t hammer_alst_meta_alloc_fwd(hammer_alist_t live,
-                                       hammer_almeta_t scan,
-                                       int32_t blk, int32_t count,
-                                       int32_t radix, int skip, int32_t atblk);
-static int32_t hammer_alst_leaf_alloc_rev(hammer_almeta_t scan,
-                                       int32_t blk, int count, int32_t atblk);
-static int32_t hammer_alst_meta_alloc_rev(hammer_alist_t live,
-                                       hammer_almeta_t scan,
-                                       int32_t blk, int32_t count,
-                                       int32_t radix, int skip, int32_t atblk);
-static int32_t hammer_alst_find(hammer_alist_t live, hammer_almeta_t scan,
-                                       int32_t blk, int32_t radix,
-                                       int32_t skip, int32_t atblk, int flags);
-static void hammer_alst_leaf_free(hammer_almeta_t scan, int32_t relblk,
-                                       int count);
-static void hammer_alst_meta_free(hammer_alist_t live, hammer_almeta_t scan,
-                                       int32_t freeBlk, int32_t count, 
-                                       int32_t radix, int skip, int32_t blk);
-static int32_t hammer_alst_radix_init(hammer_almeta_t scan,
-                                       int32_t radix, int skip, int32_t count);
-static void    hammer_alst_radix_recover(hammer_alist_recover_t info,
-                                       hammer_almeta_t scan, int32_t blk,
-                                       int32_t radix, int skip, int32_t count,
-                                       int32_t a_beg, int32_t a_end);
-#ifdef ALIST_DEBUG
-static void    hammer_alst_radix_print(hammer_alist_t live,
-                                       hammer_almeta_t scan, int32_t blk,
-                                       int32_t radix, int skip, int tab);
-#endif
-
-/*
- * Initialize an a-list config structure for use.  The config structure
- * describes the basic structure of an a-list's topology and may be
- * shared by any number of a-lists which have the same topology.
- *
- * blocks is the total number of blocks, that is the number of blocks
- * handled at this layer multiplied by the base radix.
- *
- * When base_radix != 1 the A-list has only meta elements and does not have
- * any leaves, in order to be able to track partial allocations.
- */
-void
-hammer_alist_template(hammer_alist_config_t bl, int32_t blocks,
-                     int32_t base_radix, int32_t maxmeta, int inverted)
-{
-       int radix;
-       int skip;
-
-       /*
-        * Calculate radix and skip field used for scanning.  The leaf nodes
-        * in our tree are either BMAP or META nodes depending on whether
-        * we chain to a lower level allocation layer or not.
-        */
-       if (base_radix == 1)
-               radix = HAMMER_ALIST_BMAP_RADIX;
-       else
-               radix = HAMMER_ALIST_META_RADIX;
-       skip = 1;
-
-       while (radix < blocks / base_radix) {
-               radix *= HAMMER_ALIST_META_RADIX;
-               skip = skip * HAMMER_ALIST_META_RADIX + 1;
-       }
-
-       /*
-        * Increase the radix based on the number of blocks a lower level
-        * allocator is able to handle at the 'base' of our allocator.
-        * If base_radix != 1 the caller will have to initialize the callback
-        * fields to implement the lower level allocator.
-        */
-       KKASSERT((int64_t)radix * (int64_t)base_radix < 0x80000000LL);
-       radix *= base_radix;
-
-       bzero(bl, sizeof(*bl));
-
-       bl->bl_blocks = blocks;
-       bl->bl_base_radix = base_radix;
-       bl->bl_radix = radix;
-       bl->bl_skip = skip;
-       bl->bl_rootblks = hammer_alst_radix_init(NULL, bl->bl_radix,
-                                                bl->bl_skip, blocks);
-       bl->bl_inverted = inverted;
-       ++bl->bl_rootblks;      /* one more for freeblks header */
-       if (base_radix == 1)
-               bl->bl_terminal = 1;
-       KKASSERT(maxmeta == 0 || bl->bl_rootblks <= maxmeta);
-
-#if defined(ALIST_DEBUG)
-       kprintf(
-               "PRIMARY ALIST LAYER manages %d blocks"
-               ", requiring %dK (%d bytes) of ram\n",
-               bl->bl_blocks / bl->bl_base_radix,
-               (bl->bl_rootblks * sizeof(struct hammer_almeta) + 1023) / 1024,
-               (bl->bl_rootblks * sizeof(struct hammer_almeta))
-       );
-       kprintf("ALIST raw radix tree contains %d records\n", bl->bl_rootblks);
-#endif
-}
-
-/*
- * Initialize a new A-list
- */
-void
-hammer_alist_init(hammer_alist_t live, int32_t start, int32_t count,
-                 enum hammer_alloc_state state)
-{
-       hammer_alist_config_t bl = live->config;
-
-       /*
-        * Note: base_freeblks is a count, not a block number limit.
-        */
-       live->meta->bm_alist_freeblks = 0;
-       live->meta->bm_alist_base_freeblks = count;
-       hammer_alst_radix_init(live->meta + 1, bl->bl_radix,
-                              bl->bl_skip, bl->bl_blocks);
-       if (count && state == HAMMER_ASTATE_FREE)
-               hammer_alist_free(live, start, count);
-}
-
-#if !defined(_KERNEL) && defined(ALIST_DEBUG)
-
-/*
- * hammer_alist_create()       (userland only)
- *
- *     create a alist capable of handling up to the specified number of
- *     blocks.  blocks must be greater then 0
- *
- *     The smallest alist consists of a single leaf node capable of 
- *     managing HAMMER_ALIST_BMAP_RADIX blocks, or (if base_radix != 1)
- *     a single meta node capable of managing HAMMER_ALIST_META_RADIX
- *     blocks which recurses into other storage layers for a total
- *     handling capability of (HAMMER_ALIST_META_RADIX * base_radix) blocks.
- *
- *     Larger a-list's increase their capability exponentially by
- *     HAMMER_ALIST_META_RADIX.
- *
- *     The block count is the total number of blocks inclusive of any
- *     layering.  blocks can be less then base_radix and will result in
- *     a radix tree with a single leaf node which then chains down.
- */
-
-hammer_alist_t 
-hammer_alist_create(int32_t blocks, int32_t base_radix,
-                   struct malloc_type *mtype, enum hammer_alloc_state state)
-{
-       hammer_alist_t live;
-       hammer_alist_config_t bl;
-       size_t metasize;
-
-       live = kmalloc(sizeof(*live), mtype, M_WAITOK);
-       live->config = bl = kmalloc(sizeof(*bl), mtype, M_WAITOK);
-       hammer_alist_template(bl, blocks, base_radix, 0, 0);
-
-       metasize = sizeof(*live->meta) * bl->bl_rootblks;
-       live->meta = kmalloc(metasize, mtype, M_WAITOK);
-       bzero(live->meta, metasize);
-
-#if defined(ALIST_DEBUG)
-       kprintf(
-               "ALIST representing %d blocks (%d MB of swap)"
-               ", requiring %dK (%d bytes) of ram\n",
-               bl->bl_blocks,
-               bl->bl_blocks * 4 / 1024,
-               (bl->bl_rootblks * sizeof(*live->meta) + 1023) / 1024,
-               (bl->bl_rootblks * sizeof(*live->meta))
-       );
-       if (base_radix != 1) {
-               kprintf("ALIST recurses when it reaches a base_radix of %d\n",
-                       base_radix);
-       }
-       kprintf("ALIST raw radix tree contains %d records\n", bl->bl_rootblks);
-#endif
-       hammer_alist_init(live, 0, blocks, state);
-       return(live);
-}
-
-void
-hammer_alist_destroy(hammer_alist_t live, struct malloc_type *mtype)
-{
-       kfree(live->config, mtype);
-       kfree(live->meta, mtype);
-       live->config = NULL;
-       live->meta = NULL;
-       kfree(live, mtype);
-}
-
-#endif
-
-/*
- * hammer_alist_alloc()
- *
- *     Reserve space in the block bitmap.  Return the base of a contiguous
- *     region or HAMMER_ALIST_BLOCK_NONE if space could not be allocated.
- */
-
-int32_t 
-hammer_alist_alloc(hammer_alist_t live, int32_t count)
-{
-       int32_t blk = HAMMER_ALIST_BLOCK_NONE;
-       hammer_alist_config_t bl = live->config;
-
-       KKASSERT((count | (count - 1)) == (count << 1) - 1);
-
-       if (bl && count <= bl->bl_radix) {
-               /*
-                * When skip is 1 we are at a leaf.  If we are the terminal
-                * allocator we use our native leaf functions and radix will
-                * be HAMMER_ALIST_BMAP_RADIX.  Otherwise we are a meta node
-                * which will chain to another allocator.
-                */
-               if (bl->bl_skip == 1 && bl->bl_terminal) {
-#ifndef _KERNEL
-                       KKASSERT(bl->bl_radix == HAMMER_ALIST_BMAP_RADIX);
-#endif
-                       blk = hammer_alst_leaf_alloc_fwd(
-                                   live->meta + 1, 0, count, 0);
-               } else {
-                       blk = hammer_alst_meta_alloc_fwd(
-                                   live, live->meta + 1,
-                                   0, count, bl->bl_radix, bl->bl_skip, 0);
-               }
-               if (blk != HAMMER_ALIST_BLOCK_NONE)
-                       live->meta->bm_alist_freeblks -= count;
-       }
-       return(blk);
-}
-
-int32_t 
-hammer_alist_alloc_fwd(hammer_alist_t live, int32_t count, int32_t atblk)
-{
-       int32_t blk = HAMMER_ALIST_BLOCK_NONE;
-       hammer_alist_config_t bl = live->config;
-
-       KKASSERT((count | (count - 1)) == (count << 1) - 1);
-
-       if (bl && count <= bl->bl_radix) {
-               /*
-                * When skip is 1 we are at a leaf.  If we are the terminal
-                * allocator we use our native leaf functions and radix will
-                * be HAMMER_ALIST_BMAP_RADIX.  Otherwise we are a meta node
-                * which will chain to another allocator.
-                */
-               if (bl->bl_skip == 1 && bl->bl_terminal) {
-#ifndef _KERNEL
-                       KKASSERT(bl->bl_radix == HAMMER_ALIST_BMAP_RADIX);
-#endif
-                       blk = hammer_alst_leaf_alloc_fwd(
-                                   live->meta + 1, 0, count, atblk);
-               } else {
-                       blk = hammer_alst_meta_alloc_fwd(
-                                   live, live->meta + 1,
-                                   0, count, bl->bl_radix, bl->bl_skip, atblk);
-               }
-               if (blk != HAMMER_ALIST_BLOCK_NONE)
-                       live->meta->bm_alist_freeblks -= count;
-       }
-       return(blk);
-}
-
-int32_t 
-hammer_alist_alloc_rev(hammer_alist_t live, int32_t count, int32_t atblk)
-{
-       hammer_alist_config_t bl = live->config;
-       int32_t blk = HAMMER_ALIST_BLOCK_NONE;
-
-       KKASSERT((count | (count - 1)) == (count << 1) - 1);
-
-       if (bl && count < bl->bl_radix) {
-               if (bl->bl_skip == 1 && bl->bl_terminal) {
-#ifndef _KERNEL
-                       KKASSERT(bl->bl_radix == HAMMER_ALIST_BMAP_RADIX);
-#endif
-                       blk = hammer_alst_leaf_alloc_rev(
-                                   live->meta + 1, 0, count, atblk);
-               } else {
-                       blk = hammer_alst_meta_alloc_rev(
-                                   live, live->meta + 1,
-                                   0, count, bl->bl_radix, bl->bl_skip, atblk);
-               }
-               if (blk != HAMMER_ALIST_BLOCK_NONE)
-                       live->meta->bm_alist_freeblks -= count;
-       }
-       return(blk);
-}
-
-/*
- * hammer_alist_find()
- *
- *     Locate the first block >= atblk and < maxblk marked as allocated
- *     in the A-list and return it.  Return HAMMER_ALIST_BLOCK_NONE if
- *     no block could be found.
- *
- *     HAMMER_ALIST_FIND_NOSTACK  - A special search mode which returns
- *     all initialized blocks (whether allocated or free) on bl_base_radix
- *     boundaries.  The all-free (11) state is treated as initialized only
- *     if bl_inverted is set.
- *
- *     HAMMER_ALIST_FIND_INITONLY - only initialized blocks are returned.
- *     Blocks belonging to the all-allocated/uninitialized state are not
- *     returned.
- */
-int32_t
-hammer_alist_find(hammer_alist_t live, int32_t atblk, int32_t maxblk, int flags)
-{
-       hammer_alist_config_t bl = live->config;
-       KKASSERT(live->config != NULL);
-       KKASSERT(atblk >= 0);
-       atblk = hammer_alst_find(live, live->meta + 1, 0, bl->bl_radix,
-                                bl->bl_skip, atblk, flags);
-       if (atblk >= maxblk)
-               atblk = HAMMER_ALIST_BLOCK_NONE;
-       return(atblk);
-}
-
-/*
- * hammer_alist_free()
- *
- *     Free up space in the block bitmap.  Return the base of a contiguous
- *     region.  Panic if an inconsistancy is found.
- *
- *     Unlike allocations, there are no alignment requirements for blkno or
- *     count when freeing blocks.
- */
-
-void 
-hammer_alist_free(hammer_alist_t live, int32_t blkno, int32_t count)
-{
-       hammer_alist_config_t bl = live->config;
-
-       KKASSERT(blkno + count <= bl->bl_blocks);
-       if (bl->bl_skip == 1 && bl->bl_terminal) {
-#ifndef _KERNEL
-               KKASSERT(bl->bl_radix == HAMMER_ALIST_BMAP_RADIX);
-#endif
-               hammer_alst_leaf_free(live->meta + 1, blkno, count);
-       } else {
-               hammer_alst_meta_free(live, live->meta + 1,
-                                     blkno, count,
-                                     bl->bl_radix, bl->bl_skip, 0);
-       }
-       live->meta->bm_alist_freeblks += count;
-}
-
-/*
- * Recover an A-list.  This will dive down to the leaves and regenerate
- * the hints and the freeblks count.  This function will also recurse
- * through any stacked A-lists.  > 0 is returned on success, a negative
- * error code on failure.
- *
- * Since A-lists have no pointers the only thing that can prevent recovery
- * is an I/O error in e.g. a stacked A-list.  This doesn't mean the recovered
- * map will be meaningful, however.
- *
- * blk is usually passed as 0 at the top level and is adjusted as the recovery
- * code scans the A-list.  It is only used when recursing down a stacked
- * A-list.
- *
- * (start,count) describes the region of the A-list which is allowed to contain
- * free blocks.  Any region to the left or right will be marked as allocated.
- */
-int
-hammer_alist_recover(hammer_alist_t live, int32_t blk, int32_t start,
-                    int32_t count)
-{
-       hammer_alist_config_t bl = live->config;
-       struct hammer_alist_recover info;
-
-       info.live = live;
-       info.error = 0;
-
-       live->meta->bm_alist_freeblks = 0;
-       live->meta->bm_alist_base_freeblks = count;
-       hammer_alst_radix_recover(&info, live->meta + 1, blk, bl->bl_radix,
-                                 bl->bl_skip, bl->bl_blocks,
-                                 start, start + count);
-       if (info.error)
-               return(info.error);
-       return(live->meta->bm_alist_freeblks);
-}
-
-int
-hammer_alist_isfull(hammer_alist_t live)
-{
-       return(live->meta->bm_alist_freeblks == 0);
-}
-
-int
-hammer_alist_isempty(hammer_alist_t live)
-{
-       return((int)live->meta->bm_alist_freeblks ==
-              live->meta->bm_alist_base_freeblks);
-}
-
-#ifdef ALIST_DEBUG
-
-/*
- * alist_print()    - dump radix tree
- */
-
-void
-hammer_alist_print(hammer_alist_t live, int tab)
-{
-       hammer_alist_config_t bl = live->config;
-
-       kprintf("%*.*sALIST (%d/%d free blocks) {\n",
-               tab, tab, "",
-               live->meta->bm_alist_freeblks,
-               live->meta->bm_alist_base_freeblks);
-       hammer_alst_radix_print(live, live->meta + 1, 0,
-                               bl->bl_radix, bl->bl_skip, tab + 4);
-       kprintf("%*.*s}\n", tab, tab, "");
-}
-
-#endif
-
-/************************************************************************
- *                       ALLOCATION SUPPORT FUNCTIONS                  *
- ************************************************************************
- *
- *     These support functions do all the actual work.  They may seem 
- *     rather longish, but that's because I've commented them up.  The
- *     actual code is straight forward.
- *
- */
-
-/*
- * hammer_alist_leaf_alloc_fwd()
- *
- *     Allocate at a leaf in the radix tree (a bitmap).
- *
- *     This is the core of the allocator and is optimized for the 1 block
- *     and the HAMMER_ALIST_BMAP_RADIX block allocation cases.  Other cases
- *     are somewhat slower.  The 1 block allocation case is log2 and extremely
- *     quick.
- */
-
-static int32_t
-hammer_alst_leaf_alloc_fwd(hammer_almeta_t scan, int32_t blk,
-                          int count, int32_t atblk)
-{
-       u_int32_t orig = scan->bm_bitmap;
-       int32_t saveblk = blk;
-
-       /*
-        * Optimize bitmap all-allocated case.  Also, count = 1
-        * case assumes at least 1 bit is free in the bitmap, so
-        * we have to take care of this case here.
-        */
-       if (orig == 0) {
-               scan->bm_bighint = 0;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-#if 0
-       /*
-        * Optimized code to allocate one bit out of the bitmap
-        *
-        * mask iterates e.g. 00001111 00000011 00000001
-        *
-        * mask starts at 00001111
-        */
-       if (count == 1) {
-               u_int32_t mask;
-               int j = HAMMER_ALIST_BMAP_RADIX/2;
-               int r = 0;
-
-               mask = (u_int32_t)-1 >> (HAMMER_ALIST_BMAP_RADIX/2);
-
-               while (j) {
-                       if ((orig & mask) == 0) {
-                           r += j;
-                           orig >>= j;
-                       }
-                       j >>= 1;
-                       mask >>= j;
-               }
-               scan->bm_bitmap &= ~(1 << r);
-               return(blk + r);
-       }
-#endif
-
-       /*
-        * non-optimized code to allocate N bits out of the bitmap.
-        * The more bits, the faster the code runs.  It will run
-        * the slowest allocating 2 bits, but since there aren't any
-        * memory ops in the core loop (or shouldn't be, anyway),
-        * you probably won't notice the difference.
-        *
-        * Similar to the blist case, the alist code also requires
-        * allocations to be power-of-2 sized and aligned to the
-        * size of the allocation, which simplifies the algorithm.
-        */
-       {
-               int j;
-               int n = HAMMER_ALIST_BMAP_RADIX - count;
-               u_int32_t mask;
-
-               mask = (u_int32_t)-1 >> n;
-
-               for (j = 0; j <= n; j += count) {
-                       if ((orig & mask) == mask && blk >= atblk) {
-                               scan->bm_bitmap &= ~mask;
-                               return(blk);
-                       }
-                       mask = mask << count;
-                       blk += count;
-               }
-       }
-
-       /*
-        * We couldn't allocate count in this subtree, update bighint if
-        * atblk didn't interfere with the hinting mechanism.
-        */
-       if (saveblk >= atblk)
-               scan->bm_bighint = count - 1;
-       return(HAMMER_ALIST_BLOCK_NONE);
-}
-
-/*
- * This version allocates blocks in the reverse direction.
- */
-static int32_t
-hammer_alst_leaf_alloc_rev(hammer_almeta_t scan, int32_t blk,
-                          int count, int32_t atblk)
-{
-       u_int32_t orig = scan->bm_bitmap;
-       int32_t saveblk;
-
-       /*
-        * Optimize bitmap all-allocated case.  Also, count = 1
-        * case assumes at least 1 bit is free in the bitmap, so
-        * we have to take care of this case here.
-        */
-       if (orig == 0) {
-               scan->bm_bighint = 0;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-#if 0
-       /*
-        * Optimized code to allocate one bit out of the bitmap
-        */
-       if (count == 1) {
-               u_int32_t mask;
-               int j = HAMMER_ALIST_BMAP_RADIX/2;
-               int r = HAMMER_ALIST_BMAP_RADIX - 1;
-
-               mask = ~((u_int32_t)-1 >> (HAMMER_ALIST_BMAP_RADIX/2));
-
-               while (j) {
-                       if ((orig & mask) == 0) {
-                           r -= j;
-                           orig <<= j;
-                       }
-                       j >>= 1;
-                       mask <<= j;
-               }
-               scan->bm_bitmap &= ~(1 << r);
-               return(blk + r);
-       }
-#endif
-
-       /*
-        * non-optimized code to allocate N bits out of the bitmap.
-        * The more bits, the faster the code runs.  It will run
-        * the slowest allocating 2 bits, but since there aren't any
-        * memory ops in the core loop (or shouldn't be, anyway),
-        * you probably won't notice the difference.
-        *
-        * Similar to the blist case, the alist code also requires
-        * allocations to be power-of-2 sized and aligned to the
-        * size of the allocation, which simplifies the algorithm.
-        *
-        * initial mask if count == 2:  1100....0000
-        */
-       {
-               int j;
-               int n = HAMMER_ALIST_BMAP_RADIX - count;
-               u_int32_t mask;
-
-               mask = ((u_int32_t)-1 >> n) << n;
-               blk += n;
-               saveblk = blk;
-
-               for (j = n; j >= 0; j -= count) {
-                       if ((orig & mask) == mask && blk <= atblk) {
-                               scan->bm_bitmap &= ~mask;
-                               return(blk);
-                       }
-                       mask = mask >> count;
-                       blk -= count;
-               }
-       }
-
-       /*
-        * We couldn't allocate count in this subtree, update bighint if
-        * atblk didn't interfere with it.
-        */
-       if (saveblk <= atblk)
-               scan->bm_bighint = count - 1;
-       return(HAMMER_ALIST_BLOCK_NONE);
-}
-
-/*
- * hammer_alist_meta_alloc_fwd()
- *
- *     Allocate at a meta in the radix tree.
- *
- *     Attempt to allocate at a meta node.  If we can't, we update
- *     bighint and return a failure.  Updating bighint optimize future
- *     calls that hit this node.  We have to check for our collapse cases
- *     and we have a few optimizations strewn in as well.
- */
-static int32_t
-hammer_alst_meta_alloc_fwd(hammer_alist_t live, hammer_almeta_t scan,
-                          int32_t blk, int32_t count,
-                          int32_t radix, int skip, int32_t atblk
-) {
-       hammer_alist_config_t bl;
-       u_int32_t mask;
-       u_int32_t pmask;
-       int32_t saveblk;
-       int next_skip;
-       int i;
-
-       /*
-        * ALL-ALLOCATED special case
-        */
-       if (scan->bm_bitmap == 0 || scan->bm_bitmap == 0xAAAAAAAAU)  {
-               scan->bm_bighint = 0;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-       radix /= HAMMER_ALIST_META_RADIX;
-       bl = live->config;
-
-       /*
-        * Radix now represents each bitmap entry for this meta node.  If
-        * the number of blocks being allocated can be fully represented,
-        * we allocate directly out of this meta node.
-        *
-        * Meta node bitmaps use 2 bits per block.
-        *
-        *      00      ALL-ALLOCATED - UNINITIALIZED
-        *      01      PARTIALLY-FREE/PARTIALLY-ALLOCATED
-        *      10      ALL-ALLOCATED - INITIALIZED
-        *      11      ALL-FREE      - UNINITIALIZED
-        */
-       if (count >= radix) {
-               int n = count / radix * 2;      /* number of bits */
-               int nd2 = n / 2;
-               int j;
-
-               mask = (u_int32_t)-1 >> (HAMMER_ALIST_BMAP_RADIX - n);
-               saveblk = blk;
-               for (j = 0; j < (int)HAMMER_ALIST_META_RADIX; j += nd2) {
-                       if ((scan->bm_bitmap & mask) == mask && blk >= atblk) {
-                               /*
-                                * NOTE: Marked all-allocate/uninitialized
-                                * rather then all-allocated/initialized.
-                                * See the warning at the top of the file.
-                                */
-                               scan->bm_bitmap &= ~mask;
-                               return(blk);
-                       }
-                       mask <<= n;
-                       blk += radix * nd2;
-               }
-               if (scan->bm_bighint >= count && saveblk >= atblk)
-                       scan->bm_bighint = count >> 1;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-       /*
-        * If the count is too big we couldn't allocate anything from a
-        * recursion even if the sub-tree were entirely free.
-        */
-       saveblk = blk;
-       if (count > radix)
-               goto failed;
-
-       /*
-        * If not we have to recurse.
-        */
-       mask = 0x00000003;
-       pmask = 0x00000001;
-
-       if (skip == 1) {
-               /*
-                * If skip is 1 we are a meta leaf node, which means there
-                * is another allocation layer we have to dive down into.
-                */
-               for (i = 0; i < (int)HAMMER_ALIST_META_RADIX; ++i) {
-                       /*
-                        * If the next layer is completely free then call
-                        * its init function to initialize it.
-                        */
-                       if ((scan->bm_bitmap & mask) == mask &&
-                           blk + radix > atblk) {
-                               if (bl->bl_radix_init(live->info, blk, radix, HAMMER_ASTATE_FREE) == 0) {
-                                       /*
-                                        * NOTE: Marked all-allocate/uninit-
-                                        * ialized rather then all-allocated/
-                                        * initialized.  See the warning at
-                                        * the top of the file.
-                                        */
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                               }
-                       }
-
-                       /*
-                        * If there may be some free blocks try to allocate
-                        * out of the layer.  If the layer indicates that
-                        * it is completely full then clear both bits to
-                        * propogate the condition.
-                        */
-                       if ((scan->bm_bitmap & mask) == pmask &&
-                           blk + radix > atblk) {
-                               int32_t r;
-                               int32_t full;
-
-                               r = bl->bl_radix_alloc_fwd(live->info, blk,
-                                                          radix, count,
-                                                          atblk, &full);
-                               if (full) {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask << 1;
-                               }
-                               if (r != HAMMER_ALIST_BLOCK_NONE)
-                                       return(r);
-                       }
-                       blk += radix;
-                       mask <<= 2;
-                       pmask <<= 2;
-               }
-       } else {
-               /*
-                * Otherwise there are sub-records in the current a-list
-                * layer.  We can also peek into the sub-layers to get
-                * more accurate size hints.
-                */
-               next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-               for (i = 1; i < skip; i += next_skip) {
-                       if (scan[i].bm_bighint == (int32_t)-1) {
-                               /* 
-                                * Terminator
-                                */
-                               break;
-                       }
-
-                       /*
-                        * Initialize bitmap if allocating from the all-free
-                        * case.
-                        */
-                       if ((scan->bm_bitmap & mask) == mask) {
-                               scan[i].bm_bitmap = (u_int32_t)-1;
-                               scan[i].bm_bighint = radix;
-                       }
-
-                       if (count <= scan[i].bm_bighint &&
-                           blk + radix > atblk) {
-                               /*
-                                * count fits in object, recurse into the
-                                * next layer.  If the next_skip is 1 it
-                                * will be either a normal leaf or a meta
-                                * leaf.
-                                */
-                               int32_t r;
-
-                               if (next_skip == 1 && bl->bl_terminal) {
-                                       r = hammer_alst_leaf_alloc_fwd(
-                                               &scan[i], blk, count, atblk);
-                               } else {
-                                       r = hammer_alst_meta_alloc_fwd(
-                                               live, &scan[i],
-                                               blk, count,
-                                               radix, next_skip, atblk);
-                               }
-                               if (r != HAMMER_ALIST_BLOCK_NONE) {
-                                       if (scan[i].bm_bitmap == 0) {
-                                               scan->bm_bitmap &= ~mask;
-                                       } else if (scan[i].bm_bitmap == 0xAAAAAAAAU) {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask << 1;
-                                       } else {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask;
-                                       }
-                                       return(r);
-                               }
-                       }
-                       blk += radix;
-                       mask <<= 2;
-                       pmask <<= 2;
-               }
-       }
-
-failed:
-       /*
-        * We couldn't allocate count in this subtree, update bighint.
-        */
-       if (scan->bm_bighint >= count && saveblk >= atblk)
-               scan->bm_bighint = count >> 1;
-       return(HAMMER_ALIST_BLOCK_NONE);
-}
-
-/*
- * This version allocates blocks in the reverse direction.
- *
- * This is really nasty code
- */
-static int32_t
-hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan,
-                          int32_t blk, int32_t count,
-                          int32_t radix, int skip, int32_t atblk
-) {
-       hammer_alist_config_t bl;
-       int i;
-       int j;
-       u_int32_t mask;
-       u_int32_t pmask;
-       int32_t saveblk;
-       int next_skip;
-
-       /*
-        * ALL-ALLOCATED special case
-        */
-       if (scan->bm_bitmap == 0 || scan->bm_bitmap == 0xAAAAAAAAU)  {
-               scan->bm_bighint = 0;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-       radix /= HAMMER_ALIST_META_RADIX;
-       bl = live->config;
-
-       /*
-        * Radix now represents each bitmap entry for this meta node.  If
-        * the number of blocks being allocated can be fully represented,
-        * we allocate directly out of this meta node.
-        *
-        * Meta node bitmaps use 2 bits per block.
-        *
-        *      00      ALL-ALLOCATED (uninitialized)
-        *      01      PARTIALLY-FREE/PARTIALLY-ALLOCATED
-        *      10      ALL-ALLOCATED (initialized)
-        *      11      ALL-FREE
-        */
-       if (count >= radix) {
-               int n = count / radix * 2;      /* number of bits */
-               int nd2 = n / 2;                /* number of radi */
-
-               /*
-                * Initial mask if e.g. n == 2:  1100....0000
-                */
-               mask = (u_int32_t)-1 >> (HAMMER_ALIST_BMAP_RADIX - n) <<
-                       (HAMMER_ALIST_BMAP_RADIX - n);
-               blk += (HAMMER_ALIST_META_RADIX - nd2) * radix;
-               saveblk = blk;
-               for (j = HAMMER_ALIST_META_RADIX - nd2; j >= 0; j -= nd2) {
-                       if ((scan->bm_bitmap & mask) == mask && blk <= atblk) {
-                               scan->bm_bitmap &= ~mask;
-                               return(blk);
-                       }
-                       mask >>= n;
-                       blk -= nd2 * radix;
-               }
-               if (scan->bm_bighint >= count && saveblk <= atblk)
-                       scan->bm_bighint = count >> 1;
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-       /*
-        * NOTE: saveblk must represent the entire layer, not the base blk
-        * of the last element.  Otherwise an atblk that is inside the
-        * last element can cause bighint to be updated for a failed
-        * allocation when we didn't actually test all available blocks.
-        */
-
-       if (skip == 1) {
-               /*
-                * We need the recurse but we are at a meta node leaf, which
-                * means there is another layer under us.
-                */
-               mask = 0xC0000000;
-               pmask = 0x40000000;
-               blk += radix * HAMMER_ALIST_META_RADIX;
-
-               saveblk = blk;
-               blk -= radix;
-
-               for (i = 0; i < (int)HAMMER_ALIST_META_RADIX; ++i) {
-                       /*
-                        * If the next layer is completely free then call
-                        * its init function to initialize it.  The init
-                        * function is responsible for the initial freeing.
-                        */
-                       if ((scan->bm_bitmap & mask) == mask && blk <= atblk) {
-                               if (bl->bl_radix_init(live->info, blk, radix, HAMMER_ASTATE_FREE) == 0) {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                               }
-                       }
-
-                       /*
-                        * If there may be some free blocks try to allocate
-                        * out of the layer.  If the layer indicates that
-                        * it is completely full then clear both bits to
-                        * propogate the condition.
-                        */
-                       if ((scan->bm_bitmap & mask) == pmask && blk <= atblk) {
-                               int32_t r;
-                               int32_t full;
-
-                               r = bl->bl_radix_alloc_rev(live->info, blk,
-                                                          radix, count,
-                                                          atblk, &full);
-                               if (full) {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask << 1;
-                               }
-                               if (r != HAMMER_ALIST_BLOCK_NONE)
-                                       return(r);
-                       }
-                       mask >>= 2;
-                       pmask >>= 2;
-                       blk -= radix;
-               }
-       } else {
-               /*
-                * Since we are going in the reverse direction we need an
-                * extra loop to determine if there is a terminator, then
-                * run backwards.
-                *
-                * This is a little weird but we do not want to overflow the
-                * mask/pmask in the loop.
-                */
-               next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-               j = 0;
-               for (i = 1; i < skip; i += next_skip) {
-                       if (scan[i].bm_bighint == (int32_t)-1) {
-                               KKASSERT(j != 0);
-                               break;
-                       }
-                       blk += radix;
-                       j += 2;
-               }
-
-               saveblk = blk;
-               blk -= radix;
-               j -= 2;
-               mask = 0x00000003 << j;
-               pmask = 0x00000001 << j;
-               i -= next_skip;
-
-               while (i >= 1) {
-                       /*
-                        * Initialize the bitmap in the child if allocating
-                        * from the all-free case.
-                        */
-                       if ((scan->bm_bitmap & mask) == mask) {
-                               scan[i].bm_bitmap = (u_int32_t)-1;
-                               scan[i].bm_bighint = radix;
-                       }
-
-                       /*
-                        * Handle various count cases.  Bighint may be too
-                        * large but is never too small.
-                        */
-                       if (count <= scan[i].bm_bighint && blk <= atblk) {
-                               /*
-                                * count fits in object
-                                */
-                               int32_t r;
-                               if (next_skip == 1 && bl->bl_terminal) {
-                                       r = hammer_alst_leaf_alloc_rev(
-                                               &scan[i], blk, count, atblk);
-                               } else {
-                                       r = hammer_alst_meta_alloc_rev(
-                                               live, &scan[i],
-                                               blk, count,
-                                               radix, next_skip, atblk);
-                               }
-                               if (r != HAMMER_ALIST_BLOCK_NONE) {
-                                       if (scan[i].bm_bitmap == 0) {
-                                               scan->bm_bitmap &= ~mask;
-                                       } else if (scan[i].bm_bitmap == 0xAAAAAAAAU) {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask << 1;
-                                       } else {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask;
-                                       }
-                                       return(r);
-                               }
-                       }
-                       blk -= radix;
-                       mask >>= 2;
-                       pmask >>= 2;
-                       i -= next_skip;
-               }
-       }
-
-       /*
-        * We couldn't allocate count in this subtree, update bighint.
-        * Since we are restricted to powers of 2, the next highest count
-        * we might be able to allocate is (count >> 1).
-        */
-       if (scan->bm_bighint >= count && saveblk <= atblk)
-               scan->bm_bighint = count >> 1;
-       return(HAMMER_ALIST_BLOCK_NONE);
-}
-
-/*
- * HAMMER_ALST_FIND()
- *
- * Locate the first allocated block greater or equal to atblk.
- */
-static int32_t
-hammer_alst_find(hammer_alist_t live, hammer_almeta_t scan, int32_t blk,
-                int32_t radix, int32_t skip, int32_t atblk, int flags)
-{
-       u_int32_t mask;
-       u_int32_t pmask;
-       int32_t next_skip;
-       int32_t tmpblk;
-       int nostack = flags & HAMMER_ALIST_FIND_NOSTACK;
-       int initonly = flags & HAMMER_ALIST_FIND_INITONLY;
-       int i;
-       int j;
-
-       /*
-        * Leaf node (currently hammer_alist_find() only works on terminal
-        * a-list's and the case is asserted in hammer_alist_find()).
-        */
-       if (skip == 1 && live->config->bl_terminal) {
-               if (scan->bm_bitmap == (u_int32_t)-1)
-                       return(HAMMER_ALIST_BLOCK_NONE);
-               for (i = 0; i < (int)HAMMER_ALIST_BMAP_RADIX; ++i) {
-                       if (blk + i < atblk)
-                               continue;
-                       if ((scan->bm_bitmap & (1 << i)) == 0)
-                               return(blk + i);
-               }
-               return(HAMMER_ALIST_BLOCK_NONE);
-       }
-
-       /*
-        * Meta
-        */
-       radix /= HAMMER_ALIST_META_RADIX;
-       next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-       mask = 0x00000003;
-       pmask = 0x00000001;
-       for (j = 0, i = 1; j < (int)HAMMER_ALIST_META_RADIX;
-            (i += next_skip), ++j) {
-               /*
-                * Check Terminator
-                */
-               if (scan[i].bm_bighint == (int32_t)-1) {
-                       break;
-               }
-
-               /*
-                * Recurse if this meta might contain a desired block.
-                */
-               if (blk + radix > atblk) {
-                       if (next_skip == 0 && nostack) {
-                               /*
-                                * At base_radix and nostack was specified.
-                                */
-                               if ((scan->bm_bitmap & mask) == 0) {
-                                       /*
-                                        * 00 - all-allocated, uninitalized
-                                        */
-                                       if (initonly == 0) {
-                                               if (atblk <= blk)
-                                                       return(blk);
-                                       }
-                               } else if ((scan->bm_bitmap & mask) != mask) {
-                                       /*
-                                        * 01 or 10 - partially allocated
-                                        * or all-allocated/initialized.
-                                        */
-                                       if (atblk <= blk)
-                                               return(blk);
-                               } else if ((scan->bm_bitmap & mask) == mask) {
-                                       /*
-                                        * 11 - all free.  If inverted it is
-                                        * initialized, however, and must be
-                                        * returned for the no-stack case.
-                                        */
-                                       if (live->config->bl_inverted) {
-                                               if (atblk <= blk)
-                                                       return(blk);
-                                       }
-                               }
-                       } else if ((scan->bm_bitmap & mask) == 0) {
-                               /*
-                                * 00 - all-allocated, uninitialized
-                                */
-                               if (initonly == 0) {
-                                       goto return_all_meta;
-                               }
-                               /* else skip */
-                       } else if ((scan->bm_bitmap & mask) == (pmask << 1)) {
-                               /*
-                                * 10 - all-allocated, initialized
-                                */
-                               goto return_all_meta;
-                       } else if ((scan->bm_bitmap & mask) == mask) {
-                               /* 
-                                * 11 - all-free (skip if not inverted)
-                                *
-                                * If nostack and inverted we must return
-                                * blocks on the base radix boundary.
-                                */
-                               if (nostack && live->config->bl_inverted) {
-                                       int32_t bradix;
-
-return_all_meta:
-                                       bradix = live->config->bl_base_radix;
-                                       if (atblk <= blk)
-                                               return(blk);
-                                       atblk = (atblk + bradix - 1) & 
-                                               ~(bradix - 1);
-                                       if (atblk < blk + radix)
-                                               return(atblk);
-                               }
-                       } else if (next_skip == 0) {
-                               /*
-                                * Partially allocated but we have to recurse
-                                * into a stacked A-list.
-                                *
-                                * If no-stack is set the caller just wants
-                                * to know if the 
-                                */
-                               if (atblk < blk)
-                                       atblk = blk;
-                               tmpblk = live->config->bl_radix_find(
-                                               live->info, blk, radix,
-                                               atblk, flags);
-                               if (tmpblk != HAMMER_ALIST_BLOCK_NONE)
-                                       return(tmpblk);
-                               /* else skip */
-                       } else if ((scan->bm_bitmap & mask) == pmask) {
-                               /*
-                                * 01 - partially-allocated
-                                */
-                               if (atblk < blk)
-                                       atblk = blk;
-                               tmpblk = hammer_alst_find(live, &scan[i],
-                                                         blk, radix,
-                                                         next_skip, atblk,
-                                                         flags);
-                               if (tmpblk != HAMMER_ALIST_BLOCK_NONE)
-                                       return(tmpblk);
-                       }
-               }
-               mask <<= 2;
-               pmask <<= 2;
-               blk += radix;
-       }
-       return(HAMMER_ALIST_BLOCK_NONE);
-}
-
-/*
- * HAMMER_ALST_LEAF_FREE()
- *
- *     Free allocated blocks from leaf bitmap.  The allocation code is
- *     restricted to powers of 2, the freeing code is not.
- */
-static void
-hammer_alst_leaf_free(hammer_almeta_t scan, int32_t blk, int count) {
-       /*
-        * free some data in this bitmap
-        *
-        * e.g.
-        *      0000111111111110000
-        *          \_________/\__/
-        *              v        n
-        */
-       int n = blk & (HAMMER_ALIST_BMAP_RADIX - 1);
-       u_int32_t mask;
-
-       mask = ((u_int32_t)-1 << n) &
-           ((u_int32_t)-1 >> (HAMMER_ALIST_BMAP_RADIX - count - n));
-
-       if (scan->bm_bitmap & mask)
-               panic("hammer_alst_radix_free: freeing free block");
-       scan->bm_bitmap |= mask;
-
-       /*
-        * We could probably do a better job here.  We are required to make
-        * bighint at least as large as the biggest contiguous block of 
-        * data.  If we just shoehorn it, a little extra overhead will
-        * be incured on the next allocation (but only that one typically).
-        */
-       scan->bm_bighint = HAMMER_ALIST_BMAP_RADIX;
-}
-
-/*
- * BLST_META_FREE()
- *
- *     Free allocated blocks from radix tree meta info.
- *
- *     This support routine frees a range of blocks from the bitmap.
- *     The range must be entirely enclosed by this radix node.  If a
- *     meta node, we break the range down recursively to free blocks
- *     in subnodes (which means that this code can free an arbitrary
- *     range whereas the allocation code cannot allocate an arbitrary
- *     range).
- */
-
-static void 
-hammer_alst_meta_free(hammer_alist_t live, hammer_almeta_t scan, 
-                     int32_t freeBlk, int32_t count,
-                     int32_t radix, int skip, int32_t blk
-) {
-       hammer_alist_config_t bl;
-       int next_skip;
-       u_int32_t mask;
-       u_int32_t pmask;
-       int i;
-
-       /*
-        * Break the free down into its components.  Because it is so easy
-        * to implement, frees are not limited to power-of-2 sizes.
-        *
-        * Each block in a meta-node bitmap takes two bits.
-        */
-       radix /= HAMMER_ALIST_META_RADIX;
-       bl = live->config;
-
-       i = (freeBlk - blk) / radix;
-       blk += i * radix;
-       mask = 0x00000003 << (i * 2);
-       pmask = 0x00000001 << (i * 2);
-
-       if (skip == 1) {
-               /*
-                * Our meta node is a leaf node, which means it must recurse
-                * into another allocator.
-                */
-               while (i < (int)HAMMER_ALIST_META_RADIX &&
-                      blk < freeBlk + count) {
-                       int32_t v;
-
-                       v = blk + radix - freeBlk;
-                       if (v > count)
-                               v = count;
-
-                       if (scan->bm_bighint == (int32_t)-1)
-                               panic("hammer_alst_meta_free: freeing unexpected range");
-                       KKASSERT((scan->bm_bitmap & mask) != mask);
-
-                       if (freeBlk == blk && count >= radix) {
-                               /*
-                                * Freeing an entire zone.  Only recurse if
-                                * the zone was initialized.  A 00 state means
-                                * that the zone is marked all-allocated,
-                                * but was never initialized.
-                                *
-                                * Then set the zone to the all-free state (11).
-                                */
-                               int32_t empty;
-
-                               if (scan->bm_bitmap & mask) {
-                                       bl->bl_radix_free(live->info, blk, radix,
-                                                         freeBlk - blk, v, &empty);
-                                       KKASSERT(empty);
-                                       bl->bl_radix_destroy(live->info, blk, radix);
-                               }
-                               scan->bm_bitmap |= mask;
-                               scan->bm_bighint = radix * HAMMER_ALIST_META_RADIX;
-                               /* XXX bighint not being set properly */
-                       } else {
-                               /*
-                                * Recursion case, partial free.  If 00 the
-                                * zone is marked all allocated but has never
-                                * been initialized, so we init it.
-                                */
-                               int32_t empty;
-
-                               if ((scan->bm_bitmap & mask) == 0)
-                                       bl->bl_radix_init(live->info, blk, radix, HAMMER_ASTATE_ALLOC);
-                               bl->bl_radix_free(live->info, blk, radix,
-                                                 freeBlk - blk, v, &empty);
-                               if (empty) {
-                                       if (scan->bm_bitmap & mask)
-                                               bl->bl_radix_destroy(live->info, blk, radix);
-                                       scan->bm_bitmap |= mask;
-                                       scan->bm_bighint = radix * HAMMER_ALIST_META_RADIX;
-                                       /* XXX bighint not being set properly */
-                               } else {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                                       if (scan->bm_bighint < radix / 2)
-                                               scan->bm_bighint = radix / 2;
-                                       /* XXX bighint not being set properly */
-                               }
-                       }
-                       ++i;
-                       mask <<= 2;
-                       pmask <<= 2;
-                       count -= v;
-                       freeBlk += v;
-                       blk += radix;
-               }
-       } else {
-               next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-               i = 1 + i * next_skip;
-
-               while (i <= skip && blk < freeBlk + count) {
-                       int32_t v;
-
-                       KKASSERT(mask != 0);
-                       KKASSERT(count != 0);
-
-                       v = blk + radix - freeBlk;
-                       if (v > count)
-                               v = count;
-
-                       if (scan->bm_bighint == (int32_t)-1)
-                               panic("hammer_alst_meta_free: freeing unexpected range");
-
-                       if (freeBlk == blk && count >= radix) {
-                               /*
-                                * All-free case, no need to update sub-tree
-                                */
-                               scan->bm_bitmap |= mask;
-                               scan->bm_bighint = radix * HAMMER_ALIST_META_RADIX;
-                               /* XXX bighint not being set properly */
-                       } else {
-                               /*
-                                * Recursion case
-                                */
-                               if (next_skip == 1 && bl->bl_terminal) {
-                                       hammer_alst_leaf_free(&scan[i], freeBlk, v);
-                               } else {
-                                       hammer_alst_meta_free(live, &scan[i],
-                                                             freeBlk, v,
-                                                             radix, next_skip,
-                                                             blk);
-                               }
-                               if (scan[i].bm_bitmap == (u_int32_t)-1) {
-                                       scan->bm_bitmap |= mask;
-                                       /* XXX bighint not set properly */
-                                       scan->bm_bighint = radix * HAMMER_ALIST_META_RADIX;
-                               } else {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                                       /* XXX bighint not set properly */
-                                       if (scan->bm_bighint < scan[i].bm_bighint)
-                                               scan->bm_bighint = scan[i].bm_bighint;
-                               }
-                       }
-                       mask <<= 2;
-                       pmask <<= 2;
-                       count -= v;
-                       freeBlk += v;
-                       blk += radix;
-                       i += next_skip;
-               }
-       }
-}
-
-/*
- * hammer_alst_radix_init()
- *
- *     Initialize our meta structures and bitmaps and calculate the exact
- *     number of meta-nodes required to manage 'count' blocks.  
- *
- *     The required space may be truncated due to early termination records.
- */
-static int32_t 
-hammer_alst_radix_init(hammer_almeta_t scan, int32_t radix,
-                      int skip, int32_t count)
-{
-       int i;
-       int next_skip;
-       int32_t memindex = 1;
-       u_int32_t mask;
-       u_int32_t pmask;
-
-       /*
-        * Basic initialization of the almeta for meta or leaf nodes.  This
-        * marks the element as all-allocated.
-        */
-       if (scan) {
-               scan->bm_bighint = 0;
-               scan->bm_bitmap = 0;
-       }
-
-       /*
-        * We are at a terminal node, we only eat one meta element.   If
-        * live->config->bl_terminal is set this is a leaf node, otherwise
-        * it is a meta node for a stacked A-list.  We do NOT recurse into
-        * stacked A-lists but simply mark the entire stack as all-free using
-        * code 00 (meaning all-free & uninitialized).
-        */
-       if (skip == 1)
-               return(memindex);
-
-       /*
-        * Meta node.  If allocating the entire object we can special
-        * case it.  However, we need to figure out how much memory
-        * is required to manage 'count' blocks, so we continue on anyway.
-        */
-       radix /= HAMMER_ALIST_META_RADIX;
-       next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-       mask = 0x00000003;
-       pmask = 0x00000001;
-
-       for (i = 1; i < skip; i += next_skip) {
-               /*
-                * We eat up to this record
-                */
-               memindex = i;
-
-               KKASSERT(mask != 0);
-
-               if (count >= radix) {
-                       /*
-                        * Allocate the entire object
-                        */
-                       memindex += hammer_alst_radix_init(
-                           ((scan) ? &scan[i] : NULL),
-                           radix,
-                           next_skip,
-                           radix
-                       );
-                       count -= radix;
-                       /* already marked as wholely allocated */
-               } else if (count > 0) {
-                       /*
-                        * Allocate a partial object
-                        */
-                       memindex += hammer_alst_radix_init(
-                           ((scan) ? &scan[i] : NULL),
-                           radix,
-                           next_skip,
-                           count
-                       );
-                       count = 0;
-
-                       /*
-                        * Mark as partially allocated
-                        */
-                       if (scan) {
-                               scan->bm_bitmap &= ~mask;
-                               scan->bm_bitmap |= pmask;
-                       }
-               } else {
-                       /*
-                        * Add terminator and break out.  The terminal
-                        * eats the meta node at scan[i].
-                        */
-                       ++memindex;
-                       if (scan)
-                               scan[i].bm_bighint = (int32_t)-1;
-                       /* already marked as wholely allocated */
-                       break;
-               }
-               mask <<= 2;
-               pmask <<= 2;
-       }
-       return(memindex);
-}
-
-/*
- * hammer_alst_radix_recover()
- *
- *     This code is basically a duplicate of hammer_alst_radix_init()
- *     except it recovers the a-list instead of initializes it.
- *
- *     (a_beg,a_end) specifies the global allocatable range within
- *     the radix tree.  The recovery code guarentees that blocks
- *     within the tree but outside this range are marked as being
- *     allocated to prevent them from being allocated.
- *
- *
- */
-static void    
-hammer_alst_radix_recover(hammer_alist_recover_t info, hammer_almeta_t scan,
-                         int32_t blk, int32_t radix, int skip, int32_t count,
-                         int32_t a_beg, int32_t a_end)
-{
-       hammer_alist_t live = info->live;
-       u_int32_t mask;
-       u_int32_t pmask;
-       int next_skip;
-       int i;
-       int j;
-       int n;
-
-       /*
-        * Don't try to recover bighint, just set it to its maximum
-        * value and let the A-list allocations reoptimize it.  XXX
-        */
-       scan->bm_bighint = radix;
-
-       /*
-        * If we are at a terminal node (i.e. not stacked on top of another
-        * A-list), just count the free blocks.
-        */
-       if (skip == 1 && live->config->bl_terminal) {
-               for (i = 0; i < (int)HAMMER_ALIST_BMAP_RADIX; ++i) {
-                       if (blk + i < a_beg || blk + i >= a_end)
-                               scan->bm_bitmap &= ~(1 << i);
-                       if (scan->bm_bitmap & (1 << i))
-                               ++info->live->meta->bm_alist_freeblks;
-               }
-               return;
-       }
-
-       /*
-        * Recursive meta node (next_skip != 0) or terminal meta
-        * node (next_skip == 0).
-        */
-       radix /= HAMMER_ALIST_META_RADIX;
-       next_skip = (skip - 1) / HAMMER_ALIST_META_RADIX;
-       mask = 0x00000003;
-       pmask = 0x00000001;
-
-       for (i = 1, j = 0; j < (int)HAMMER_ALIST_META_RADIX;
-             ++j, (i += next_skip)) {
-               /*
-                * Check mask:
-                *
-                *      00      ALL-ALLOCATED - UNINITIALIZED
-                *      01      PARTIALLY-FREE/PARTIALLY-ALLOCATED
-                *      10      ALL-ALLOCATED - INITIALIZED
-                *      11      ALL-FREE      - UNINITIALIZED (INITIALIZED
-                *                              IF CONFIG SAYS INVERTED)
-                */
-               KKASSERT(mask);
-
-               /*
-                * Adjust the bitmap for out of bounds or partially out of
-                * bounds elements.  If we are completely out of bounds
-                * mark as all-allocated.  If we are partially out of
-                * bounds do not allow the area to be marked all-free.
-                */
-               if (blk + radix <= a_beg || blk >= a_end) {
-                       scan->bm_bitmap &= ~mask;
-               } else if (blk < a_beg || blk + radix > a_end) {
-                       if ((scan->bm_bitmap & mask) == mask) {
-                               scan->bm_bitmap &= ~mask;
-                               scan->bm_bitmap |= pmask;
-                       }
-               }
-
-               if (count >= radix) {
-                       /*
-                        * Recover the entire object
-                        */
-                       if ((scan->bm_bitmap & mask) == 0) {
-                               /*
-                                * All-allocated (uninited), do nothing
-                                */
-                       } else if ((scan->bm_bitmap & mask) == mask &&
-                                  live->config->bl_inverted == 0) {
-                               /*
-                                * All-free (uninited), do nothing.
-                                *
-                                * (if inverted all-free is initialized and
-                                * must be recovered).
-                                */
-                               live->meta->bm_alist_freeblks += radix;
-                       } else if (next_skip) {
-                               if ((scan->bm_bitmap & mask) == mask) {
-                                       /*
-                                        * This is a special case.  If we
-                                        * are inverted but in an all-free
-                                        * state, it's actually initialized
-                                        * (sorta) and we still have to dive
-                                        * through to any stacked nodes so
-                                        * we can call bl_radix_recover().
-                                        */
-                                       scan[i].bm_bitmap = (u_int32_t)-1;
-                               }
-                               /*
-                                * Normal meta node, initialized.  Recover and
-                                * adjust to the proper state.
-                                */
-                               hammer_alst_radix_recover(
-                                   info,
-                                   &scan[i],
-                                   blk, radix,
-                                   next_skip, radix,
-                                   a_beg, a_end
-                               );
-                               if (scan[i].bm_bitmap == 0) {
-                                       scan->bm_bitmap &= ~mask;
-                               } else if (scan[i].bm_bitmap == 0xAAAAAAAAU) {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask << 1;
-                               } else if (scan[i].bm_bitmap == (u_int32_t)-1) {
-                                       scan->bm_bitmap |= mask;
-                               } else {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                               }
-                       } else {
-                               /*
-                                * Stacked meta node, recurse.
-                                *
-                                * If no free blocks are present mark the
-                                * meta node as all-allocated/initialized.
-                                * A return code of -EDOM will mark the
-                                * meta node as all-allocated/uninitialized.
-                                *
-                                * This can be really confusing. bl_inverted
-                                * has no function here because we are
-                                * ALREADY known to be in an initialized
-                                * state.
-                                */
-                               n = live->config->bl_radix_recover(
-                                           live->info,
-                                           blk, radix, radix);
-                               if (n == -EDOM) {
-                                       scan->bm_bitmap &= ~mask;
-                               } else if (n >= 0) {
-                                       live->meta->bm_alist_freeblks += n;
-                                       if (n == 0) {
-                                               scan->bm_bitmap =
-                                                   (scan->bm_bitmap & ~mask) |
-                                                   (pmask << 1);
-                                       } else if (n == radix) {
-                                               scan->bm_bitmap |= mask;
-                                       } else {
-                                               scan->bm_bitmap =
-                                                   (scan->bm_bitmap & ~mask) |
-                                                   pmask;
-                                       }
-                               } else {
-                                       info->error = n;
-                               }
-                       }
-                       count -= radix;
-               } else if (count > 0) {
-                       /*
-                        * Recover a partial object.  The object can become
-                        * wholely allocated but never wholely free.
-                        */
-                       if (next_skip) {
-                               hammer_alst_radix_recover(
-                                   info,
-                                   &scan[i],
-                                   blk, radix,
-                                   next_skip, count,
-                                   a_beg, a_end
-                               );
-                               if (scan[i].bm_bitmap == 0) {
-                                       scan->bm_bitmap &= ~mask;
-                               } else if (scan[i].bm_bitmap == 0xAAAAAAAAU) {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask << 1;
-                               } else {
-                                       scan->bm_bitmap &= ~mask;
-                                       scan->bm_bitmap |= pmask;
-                               }
-                       } else {
-                               /*
-                                * If no free blocks are present mark the
-                                * meta node as all-allocated/initialized.
-                                * A return code of -EDOM will mark the
-                                * meta node as all-allocated/uninitialized.
-                                *
-                                * This can be really confusing. bl_inverted
-                                * has no function here because we are
-                                * ALREADY known to be in an initialized
-                                */
-                               n = live->config->bl_radix_recover(
-                                           live->info,
-                                           blk, radix, count);
-                               if (n == -EDOM) {
-                                       scan->bm_bitmap &= ~mask;
-                               } else if (n >= 0) {
-                                       live->meta->bm_alist_freeblks += n;
-                                       if (n == 0) {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask << 1;
-                                       } else {
-                                               scan->bm_bitmap &= ~mask;
-                                               scan->bm_bitmap |= pmask;
-                                       }
-                               } else {
-                                       info->error = n;
-                               }
-                       }
-                       count = 0;
-               } else if (next_skip) {
-                       /*
-                        * Add terminator.  The terminator eats the meta
-                        * node at scan[i].  There is only ONE terminator,
-                        * make sure we don't write out any more (set count to
-                        * -1) or we may overflow our allocation.
-                        */
-                       if (count == 0) {
-                               scan[i].bm_bighint = (int32_t)-1;
-                               count = -1;
-                       }
-                       scan->bm_bitmap &= ~mask;       /* all-allocated/uni */
-               } else {
-                       scan->bm_bitmap &= ~mask;       /* all-allocated/uni */
-               }
-               mask <<= 2;
-               pmask <<= 2;
-               blk += radix;
-       }
-}
-
-#ifdef ALIST_DEBUG
-
-static void    
-hammer_alst_radix_print(hammer_alist_t live, hammer_almeta_t scan,
-                       int32_t blk, int32_t radix, int skip, int tab)
-{
-       int i;
-       int next_skip;
-       int lastState = 0;
-       u_int32_t mask;
-
-       if (skip == 1 && live->config->bl_terminal) {
-               kprintf(
-                   "%*.*s(%04x,%d): bitmap %08x big=%d\n", 
-                   tab, tab, "",
-                   blk, radix,
-                   scan->bm_bitmap,
-                   scan->bm_bighint
-               );
-               return;
-       }
-
-       if (scan->bm_bitmap == 0) {
-               kprintf(
-                   "%*.*s(%04x,%d) ALL ALLOCATED\n",
-                   tab, tab, "",
-                   blk,
-                   radix
-               );
-               return;
-       }
-       if (scan->bm_bitmap == (u_int32_t)-1) {
-               kprintf(
-                   "%*.*s(%04x,%d) ALL FREE\n",
-                   tab, tab, "",
-                   blk,
-                   radix
-               );
-               return;
-       }
-
-       kprintf(
-           "%*.*s(%04x,%d): %s (%d) bitmap=%08x big=%d {\n",
-           tab, tab, "",
-           blk, radix,
-           (skip == 1 ? "LAYER" : "subtree"),
-           radix,
-           scan->bm_bitmap,
-           scan->bm_bighint
-       );
-
-       radix /= HAMMER_ALIST_META_RADIX;
-       tab += 4;
-       mask = 0x00000003;
-
-       if (skip == 1) {
-               for (i = 0; i < HAMMER_ALIST_META_RADIX; ++i) {
-                       if ((scan->bm_bitmap & mask) == mask) {
-                               kprintf(
-                                   "%*.*s(%04x,%d): ALL FREE\n",
-                                   tab, tab, "",
-                                   blk, radix
-                               );
-                       } else if ((scan->bm_bitmap & mask) == 0) {
-                               kprintf(
-                                   "%*.*s(%04x,%d): ALL ALLOCATED\n",
-                                   tab, tab, "",
-                                   blk, radix
-                               );
-                       } else {
-                               live->config->bl_radix_print(
-                                               live->info, blk, radix, tab);
-                       }
-                       blk += radix;
-                       mask <<= 2;
-               }
-       } else {
-               next_skip = ((u_int)(skip - 1) / HAMMER_ALIST_META_RADIX);
-
-               for (i = 1; i < skip; i += next_skip) {
-                       KKASSERT(mask != 0);
-                       if (scan[i].bm_bighint == (int32_t)-1) {
-                               kprintf(
-                                   "%*.*s(%04x,%d): Terminator\n",
-                                   tab, tab, "",
-                                   blk, radix
-                               );
-                               lastState = 0;
-                               break;
-                       }
-                       if ((scan->bm_bitmap & mask) == mask) {
-                               kprintf(
-                                   "%*.*s(%04x,%d): ALL FREE\n",
-                                   tab, tab, "",
-                                   blk, radix
-                               );
-                       } else if ((scan->bm_bitmap & mask) == 0) {
-                               kprintf(
-                                   "%*.*s(%04x,%d): ALL ALLOCATED\n",
-                                   tab, tab, "",
-                                   blk, radix
-                               );
-                       } else {
-                               hammer_alst_radix_print(
-                                   live,
-                                   &scan[i],
-                                   blk,
-                                   radix,
-                                   next_skip,
-                                   tab
-                               );
-                       }
-                       blk += radix;
-                       mask <<= 2;
-               }
-       }
-       tab -= 4;
-
-       kprintf(
-           "%*.*s}\n",
-           tab, tab, ""
-       );
-}
-
-#endif
-
-#ifdef ALIST_DEBUG
-
-static struct hammer_alist_live **layers;      /* initialized by main */
-static int32_t layer_radix = -1;
-
-/*
- * Initialize a zone.
- *
- * If allocating is non-zero this init is being called when transitioning out
- * of an all-free state.  Allocate the zone and mark the whole mess as being
- * free so the caller can then allocate out of this zone.
- *
- * If freeing this init is being called when transitioning out of an
- * initial all-allocated (00) state.  Allocate the zone but leave the whole
- * mess left all-allocated.  The caller will then free the appropriate range.
- */
-static
-int
-debug_radix_init(void *info, int32_t blk, int32_t radix,
-                enum hammer_alloc_state state)
-{
-       hammer_alist_t layer;
-       int layer_no = blk / layer_radix;
-
-       printf("lower layer: init (%04x,%d) layer_radix=%d\n",
-              blk, radix, layer_radix);
-       KKASSERT(layer_radix == radix);
-       KKASSERT(layers[layer_no] == NULL);
-       layer = layers[layer_no] = hammer_alist_create(radix, 1, NULL, state); 
-       return(0);
-}
-
-static
-int32_t
-debug_radix_recover(void *info, int32_t blk, int32_t radix, int32_t count)
-{
-       hammer_alist_t layer;
-       int layer_no = blk / layer_radix;
-       int32_t n;
-
-       KKASSERT(layer_radix == radix);
-       KKASSERT(layers[layer_no] != NULL);
-       layer = layers[layer_no];
-       n = hammer_alist_recover(layer, blk, 0, count);
-       printf("Recover layer %d blk %d result %d/%d\n",
-               layer_no, blk, n, count);
-       return(n);
-}
-
-static
-int32_t
-debug_radix_find(void *info, int32_t blk, int32_t radix, int32_t atblk,
-                int flags)
-{
-       hammer_alist_t layer;
-       int layer_no = blk / layer_radix;
-       int32_t res;
-
-       KKASSERT(layer_radix == radix);
-       KKASSERT(layers[layer_no] != NULL);
-       layer = layers[layer_no];
-       res = hammer_alist_find(layer, atblk - blk, radix, flags);
-       if (res != HAMMER_ALIST_BLOCK_NONE)
-               res += blk;
-       return(res);
-}
-
-/*
- * This is called when a zone becomes entirely free, typically after a
- * call to debug_radix_free() has indicated that the entire zone is now
- * free.
- */
-static
-int
-debug_radix_destroy(void *info, int32_t blk, int32_t radix)
-{
-       hammer_alist_t layer;
-       int layer_no = blk / layer_radix;
-
-       printf("lower layer: destroy (%04x,%d)\n", blk, radix);
-       layer = layers[layer_no];
-       KKASSERT(layer != NULL);
-       hammer_alist_destroy(layer, NULL);
-       layers[layer_no] = NULL;
-       return(0);
-}
-
-
-static
-int32_t
-debug_radix_alloc_fwd(void *info, int32_t blk, int32_t radix,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       hammer_alist_t layer = layers[blk / layer_radix];
-       int32_t r;
-
-       r = hammer_alist_alloc_fwd(layer, count, atblk - blk);
-       *fullp = hammer_alist_isfull(layer);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       return(r);
-}
-
-static
-int32_t
-debug_radix_alloc_rev(void *info, int32_t blk, int32_t radix,
-                     int32_t count, int32_t atblk, int32_t *fullp)
-{
-       hammer_alist_t layer = layers[blk / layer_radix];
-       int32_t r;
-
-       r = hammer_alist_alloc_rev(layer, count, atblk - blk);
-       *fullp = hammer_alist_isfull(layer);
-       if (r != HAMMER_ALIST_BLOCK_NONE)
-               r += blk;
-       return(r);
-}
-
-static
-void
-debug_radix_free(void *info, int32_t blk, int32_t radix,
-                int32_t base_blk, int32_t count, int32_t *emptyp)
-{
-       int layer_no = blk / layer_radix;
-       hammer_alist_t layer = layers[layer_no];
-
-       KKASSERT(layer);
-       hammer_alist_free(layer, base_blk, count);
-       *emptyp = hammer_alist_isempty(layer);
-}
-
-static
-void
-debug_radix_print(void *info, int32_t blk, int32_t radix, int tab)
-{
-       hammer_alist_t layer = layers[blk / layer_radix];
-
-       hammer_alist_print(layer, tab);
-}
-
-int
-main(int ac, char **av)
-{
-       int32_t size = -1;
-       int i;
-       hammer_alist_t live;
-       hammer_almeta_t meta = NULL;
-
-       for (i = 1; i < ac; ++i) {
-               const char *ptr = av[i];
-               if (*ptr != '-') {
-                       if (size == -1)
-                               size = strtol(ptr, NULL, 0);
-                       else if (layer_radix == -1)
-                               layer_radix = strtol(ptr, NULL, 0);
-                       else
-                               ;
-                       continue;
-               }
-               ptr += 2;
-               fprintf(stderr, "Bad option: %s\n", ptr - 2);
-               exit(1);
-       }
-       if (size == -1)
-               size = 1024;
-       if (layer_radix == -1)
-               layer_radix = 1;        /* no second storage layer */
-       if ((size ^ (size - 1)) != (size << 1) - 1) {
-               fprintf(stderr, "size must be a power of 2\n");
-               exit(1);
-       }
-       if ((layer_radix ^ (layer_radix - 1)) != (layer_radix << 1) - 1) {
-               fprintf(stderr, "the second layer radix must be a power of 2\n");
-               exit(1);
-       }
-
-       live = hammer_alist_create(size, layer_radix, NULL,
-                                  HAMMER_ASTATE_ALLOC);
-       layers = calloc(size, sizeof(hammer_alist_t));
-
-       printf("A-LIST TEST %d blocks, first-layer radix %d, "
-              "second-layer radix %d\n",
-               size, live->config->bl_radix / layer_radix, layer_radix);
-
-       live->config->bl_radix_init = debug_radix_init;
-       live->config->bl_radix_recover = debug_radix_recover;
-       live->config->bl_radix_find = debug_radix_find;
-       live->config->bl_radix_destroy = debug_radix_destroy;
-       live->config->bl_radix_alloc_fwd = debug_radix_alloc_fwd;
-       live->config->bl_radix_alloc_rev = debug_radix_alloc_rev;
-       live->config->bl_radix_free = debug_radix_free;
-       live->config->bl_radix_print = debug_radix_print;
-
-       hammer_alist_free(live, 0, size);
-
-       for (;;) {
-               char buf[1024];
-               int32_t da = 0;
-               int32_t count = 0;
-               int32_t atblk;
-               int32_t blk;
-               int flags;
-
-               kprintf("%d/%d> ",
-                       live->meta->bm_alist_freeblks, size);
-               fflush(stdout);
-               if (fgets(buf, sizeof(buf), stdin) == NULL)
-                       break;
-               switch(buf[0]) {
-               case 'p':
-                       hammer_alist_print(live, 0);
-
-                       flags = 0;
-                       if (buf[1] == 'i' || (buf[1] && buf[2] == 'i'))
-                               flags |= HAMMER_ALIST_FIND_INITONLY;
-                       if (buf[1] == 'm' || (buf[1] && buf[2] == 'm'))
-                               flags |= HAMMER_ALIST_FIND_NOSTACK;
-                       atblk = 0;
-                       kprintf("allocated: ");
-                       while ((atblk = hammer_alist_find(live, atblk, size, flags)) != HAMMER_ALIST_BLOCK_NONE) {
-                               kprintf(" %d", atblk);
-                               ++atblk;
-                       }
-                       kprintf("\n");
-                       break;
-               case 'a':
-                       atblk = 0;
-                       if (sscanf(buf + 1, "%d %d", &count, &atblk) >= 1) {
-                               blk = hammer_alist_alloc_fwd(live, count, atblk);
-                               kprintf("    R=%04x\n", blk);
-                       } else {
-                               kprintf("?\n");
-                       }
-                       break;
-               case 'r':
-                       atblk = HAMMER_ALIST_BLOCK_MAX;
-                       if (sscanf(buf + 1, "%d %d", &count, &atblk) >= 1) {
-                               blk = hammer_alist_alloc_rev(live, count, atblk);
-                               kprintf("    R=%04x\n", blk);
-                       } else {
-                               kprintf("?\n");
-                       }
-                       break;
-               case 'f':
-                       if (sscanf(buf + 1, "%x %d", &da, &count) == 2) {
-                               hammer_alist_free(live, da, count);
-                               if (hammer_alist_isempty(live))
-                                       kprintf("a-list is now 100%% empty\n");
-                       } else {
-                               kprintf("?\n");
-                       }
-                       break;
-               case 'R':
-                       {
-                               int n;
-
-                               n = hammer_alist_recover(live, 0, 0,
-                                          live->meta->bm_alist_base_freeblks);
-                               if (n < 0)
-                                       kprintf("recover: error %d\n", -n);
-                               else
-                                       kprintf("recover: %d free\n", n);
-                       }
-                       break;
-               case '?':
-               case 'h':
-                       puts(
-                           "p[i][m]    -print (initialized only) (meta-only)\n"
-                           "a %d       -allocate\n"
-                           "r %d       -allocate reverse\n"
-                           "f %x %d    -free\n"
-                           "R          -recovery a-list\n"
-                           "h/?        -help"
-                       );
-                       break;
-               default:
-                       kprintf("?\n");
-                       break;
-               }
-       }
-       return(0);
-}
-
-void
-panic(const char *ctl, ...)
-{
-       __va_list va;
-
-       __va_start(va, ctl);
-       vfprintf(stderr, ctl, va);
-       fprintf(stderr, "\n");
-       __va_end(va);
-       exit(1);
-}
-
-#endif
-
diff --git a/sys/vfs/hammer/hammer_alist.h b/sys/vfs/hammer/hammer_alist.h
deleted file mode 100644 (file)
index c7d95d6..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
- * 
- * This code is derived from software contributed to The DragonFly Project
- * by Matthew Dillon <dillon@backplane.com>
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- * 3. Neither the name of The DragonFly Project nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific, prior written permission.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
- * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * 
- * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.h,v 1.7 2008/01/24 02:14:45 dillon Exp $
- */
-
-/*
- * The structures below represent HAMMER's on-disk and in-memory A-List
- * support.  An A-list is a radix tree bitmap allocator with hinting.  It
- * is very fast and efficient.
- *
- * A-lists can be single-layered or multi-layered.  A single-layered
- * A-list uses a radix 32 leaf and radix 16 internal nodes.  A multi-layered
- * A-list uses a radix 16 leaf and radix 16 internal nodes.
- *
- * Multi-layered A-lists allow otherwise independant A-lists to be stacked
- * to form a single A-list.
- */
-
-/*
- * This on-disk structure represents the actual information an A-list needs
- * to store to disk.  Each A-list layer is made up of a linear array of
- * meta-elements.  The number of elements needed is calculated using a
- * recursion.  A-lists contain an early-termination feature which allows
- * an A-list's storage to scale to the actual number of blocks that need
- * to be managed regardless of whether they are on a radix boundary or not.
- *
- * The first almeta structure in the array is used for housekeeping and not
- * part of the topology proper.  The second almeta structure is the 'root'
- * meta.
- */
-typedef struct hammer_almeta {
-       u_int32_t       bm_bitmap;
-       int32_t         bm_bighint;
-} *hammer_almeta_t;
-
-#define bm_alist_freeblks      bm_bitmap       /* housekeeping */
-#define bm_alist_base_freeblks bm_bighint      /* housekeeping */
-
-#define HAMMER_ALMETA_SIZE     8
-
-enum hammer_alloc_state {
-       HAMMER_ASTATE_NONE,
-       HAMMER_ASTATE_ALLOC,
-       HAMMER_ASTATE_FREE
-};
-
-typedef enum hammer_alloc_state hammer_alloc_state_t;
-
-/*
- * This in-memory structure specifies how an a-list is configured and
- * can be shared by multiple live alists.
- */
-typedef struct hammer_alist_config {
-       int32_t bl_blocks;      /* area of coverage */
-       int32_t bl_radix;       /* coverage radix */
-       int32_t bl_base_radix;  /* chain to other allocators */
-       int32_t bl_skip;        /* starting skip for linear layout */
-       int32_t bl_rootblks;    /* meta-blocks allocated for tree */
-       int32_t bl_terminal;    /* terminal alist, else layer recursion */
-       int32_t bl_inverted;    /* all-free case is initialized, not uninit */
-       int     (*bl_radix_init)(void *info, int32_t blk, int32_t radix,
-                                       hammer_alloc_state_t state);
-       int32_t (*bl_radix_recover)(void *info, int32_t blk, int32_t radix,
-                                       int32_t count);
-       int32_t (*bl_radix_find)(void *info, int32_t blk, int32_t radix,
-                                       int32_t atblk, int inited_only);
-       int     (*bl_radix_destroy)(void *info, int32_t blk, int32_t radix);
-       int32_t (*bl_radix_alloc_fwd)(void *info, int32_t blk, int32_t radix,
-                                       int32_t count, int32_t atblk,
-                                       int32_t *fullp);
-       int32_t (*bl_radix_alloc_rev)(void *info, int32_t blk, int32_t radix,
-                                       int32_t count, int32_t atblk,
-                                       int32_t *fullp);
-       void    (*bl_radix_free)(void *info, int32_t blk, int32_t radix,
-                                       int32_t base_blk, int32_t count,
-                                       int32_t *emptyp);
-       void    (*bl_radix_print)(void *info, int32_t blk, int32_t radix,
-                                       int tab);
-} *hammer_alist_config_t;
-
-/*
- * This in-memory structure is needed for each live alist.
- */
-typedef struct hammer_alist_live {
-       hammer_alist_config_t config;   /* a-list configuration */
-       hammer_almeta_t meta;           /* location of meta array */
-       void *info;                     /* chaining call info argument */
-} *hammer_alist_t;
-
-/*
- * In-memory structure used to track A-list recovery operations.
- */
-typedef struct hammer_alist_recover {
-       hammer_alist_t live;
-       int     error;
-} *hammer_alist_recover_t;
-
-
-#define HAMMER_ALIST_META_RADIX        (sizeof(int32_t) * 4)   /* 16 */
-#define HAMMER_ALIST_BMAP_RADIX        (sizeof(int32_t) * 8)   /* 32 */
-#define HAMMER_ALIST_BLOCK_NONE        ((int32_t)-1)
-#define HAMMER_ALIST_BLOCK_MAX ((int32_t)0x7fffffff)
-
-/*
- * Hard-code some pre-calculated constants for managing varying numbers
- * of blocks.  These are the number of meta-elements required.
- */
-#define HAMMER_ALIST_METAELMS_256_1LYR 11
-#define HAMMER_ALIST_METAELMS_32K_1LYR 1095
-#define HAMMER_ALIST_METAELMS_16K_2LYR HAMMER_ALIST_METAELMS_32K_1LYR
-
-#define HAMMER_ALIST_METAELMS_4K_1LYR  139
-#define HAMMER_ALIST_METAELMS_4K_2LYR  275
-
-/*
- * hammer_alist_find() flags
- */
-#define HAMMER_ALIST_FIND_INITONLY             0x0001
-#define HAMMER_ALIST_FIND_NOSTACK              0x0002
-
-/*
- * Function library support available to kernel and userland
- */
-void hammer_alist_template(hammer_alist_config_t bl, int32_t blocks,
-                           int32_t base_radix, int32_t maxmeta, int inverted);
-void hammer_alist_init(hammer_alist_t live, int32_t start, int32_t count,
-                          hammer_alloc_state_t state);
-int32_t hammer_alist_recover(hammer_alist_t live, int32_t blk,
-                          int32_t start, int32_t count);
-int32_t hammer_alist_alloc(hammer_alist_t live, int32_t count);
-int32_t hammer_alist_alloc_fwd(hammer_alist_t live,
-                          int32_t count, int32_t atblk);
-int32_t hammer_alist_alloc_rev(hammer_alist_t live,
-                          int32_t count, int32_t atblk);
-int32_t hammer_alist_find(hammer_alist_t live, int32_t atblk, int32_t maxblk,
-                          int flags);
-int hammer_alist_isfull(hammer_alist_t live);
-int hammer_alist_isempty(hammer_alist_t live);
-void hammer_alist_free(hammer_alist_t live, int32_t blkno, int32_t count);
-
index 6f752c3..812c044 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.28 2008/02/05 07:58:43 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.29 2008/02/08 08:30:59 dillon Exp $
  */
 
 /*
  *
  * B-Trees also make the stacking of trees fairly straightforward.
  *
- * SPIKES: Two leaf elements denoting a sub-range of keys may represent
- * a spike, or a recursion into another cluster.  Most standard B-Tree
- * searches traverse spikes.  The ending spike element is range-inclusive
- * and does not operate quite like a right-bound.
- *
  * INSERTIONS:  A search performed with the intention of doing
  * an insert will guarantee that the terminal leaf node is not full by
  * splitting full nodes.  Splits occur top-down during the dive down the
@@ -94,7 +89,6 @@ static int btree_split_leaf(hammer_cursor_t cursor);
 static int btree_remove(hammer_cursor_t cursor);
 static int btree_remove_deleted_element(hammer_cursor_t cursor);
 static int btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm);
-static int btree_node_is_almost_full(hammer_node_ondisk_t node);
 static int btree_node_is_full(hammer_node_ondisk_t node);
 static void hammer_make_separator(hammer_base_elm_t key1,
                        hammer_base_elm_t key2, hammer_base_elm_t dest);
@@ -146,14 +140,9 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                 * We iterate up the tree and then index over one element
                 * while we are at the last element in the current node.
                 *
-                * NOTE: This can pop us up to another cluster.
-                *
-                * If we are at the root of the root cluster, cursor_up
+                * If we are at the root of the filesystem, cursor_up
                 * returns ENOENT.
                 *
-                * NOTE: hammer_cursor_up() will adjust cursor->key_beg
-                * when told to re-search for the cluster tag.
-                *
                 * XXX this could be optimized by storing the information in
                 * the parent reference.
                 *
@@ -175,18 +164,14 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                 * Check internal or leaf element.  Determine if the record
                 * at the cursor has gone beyond the end of our range.
                 *
-                * Generally we recurse down through internal nodes.  An
-                * internal node can only be returned if INCLUSTER is set
-                * and the node represents a cluster-push record.
+                * We recurse down through internal nodes.
                 */
                if (node->type == HAMMER_BTREE_TYPE_INTERNAL) {
                        elm = &node->elms[cursor->index];
                        r = hammer_btree_cmp(&cursor->key_end, &elm[0].base);
                        s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base);
                        if (hammer_debug_btree) {
-                               kprintf("BRACKETL %d:%d:%08x[%d] %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        elm[0].internal.base.obj_id,
@@ -194,9 +179,7 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                                        elm[0].internal.base.key,
                                        r
                                );
-                               kprintf("BRACKETR %d:%d:%08x[%d] %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index + 1,
                                        elm[1].internal.base.obj_id,
@@ -238,9 +221,7 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        elm = &node->elms[cursor->index];
                        r = hammer_btree_cmp(&cursor->key_end, &elm->base);
                        if (hammer_debug_btree) {
-                               kprintf("ELEMENT  %d:%d:%08x:%d %c %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        (elm[0].leaf.base.btype ?
@@ -274,59 +255,6 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                                        continue;
                                }
                                break;
-                       case HAMMER_BTREE_TYPE_SPIKE_BEG:
-                               /*
-                                * NOTE: This code assumes that the spike
-                                * ending element immediately follows the
-                                * spike beginning element.
-                                */
-                               /*
-                                * We must cursor-down via the SPIKE_END
-                                * element, otherwise cursor->parent will
-                                * not be set correctly for deletions.
-                                *
-                                * fall-through to avoid an improper
-                                * termination from the conditional above.
-                                */
-                               KKASSERT(cursor->index + 1 < node->count);
-                               ++elm;
-                               KKASSERT(elm->leaf.base.btype ==
-                                        HAMMER_BTREE_TYPE_SPIKE_END);
-                               ++cursor->index;
-                               /* fall through */
-                       case HAMMER_BTREE_TYPE_SPIKE_END:
-                               /*
-                                * The SPIKE_END element is inclusive, NOT
-                                * like a boundary, so be careful with the
-                                * match check.
-                                *
-                                * This code assumes that a preceding SPIKE_BEG
-                                * has already been checked.
-                                */
-                               if (cursor->flags & HAMMER_CURSOR_INCLUSTER)
-                                       break;
-                               error = hammer_cursor_down(cursor);
-                               if (error)
-                                       break;
-                               KKASSERT(cursor->index == 0);
-                               /* reload stale pointer */
-                               node = cursor->node->ondisk;
-
-                               /*
-                                * If the cluster root is empty it and its
-                                * related spike can be deleted.  Ignore
-                                * errors.  Cursor
-                                */
-                               if (node->count == 0) {
-                                       error = hammer_cursor_upgrade(cursor);
-                                       if (error == 0)
-                                               error = btree_remove(cursor);
-                                       hammer_cursor_downgrade(cursor);
-                                       error = 0;
-                                       /* reload stale pointer */
-                                       node = cursor->node->ondisk;
-                               }
-                               continue;
                        default:
                                error = EINVAL;
                                break;
@@ -391,20 +319,6 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                /*
                 * We iterate up the tree and then index over one element
                 * while we are at the last element in the current node.
-                *
-                * NOTE: This can pop us up to another cluster.
-                *
-                * If we are at the root of the root cluster, cursor_up
-                * returns ENOENT.
-                *
-                * NOTE: hammer_cursor_up() will adjust cursor->key_beg
-                * when told to re-search for the cluster tag.
-                *
-                * XXX this could be optimized by storing the information in
-                * the parent reference.
-                *
-                * XXX we can lose the node lock temporarily, this could mess
-                * up our scan.
                 */
                if (cursor->index == -1) {
                        error = hammer_cursor_up(cursor);
@@ -423,9 +337,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                 * Check internal or leaf element.  Determine if the record
                 * at the cursor has gone beyond the end of our range.
                 *
-                * Generally we recurse down through internal nodes.  An
-                * internal node can only be returned if INCLUSTER is set
-                * and the node represents a cluster-push record.
+                * We recurse down through internal nodes. 
                 */
                KKASSERT(cursor->index != node->count);
                if (node->type == HAMMER_BTREE_TYPE_INTERNAL) {
@@ -433,9 +345,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                        r = hammer_btree_cmp(&cursor->key_end, &elm[0].base);
                        s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base);
                        if (hammer_debug_btree) {
-                               kprintf("BRACKETL %d:%d:%08x[%d] %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        elm[0].internal.base.obj_id,
@@ -443,9 +353,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                                        elm[0].internal.base.key,
                                        r
                                );
-                               kprintf("BRACKETR %d:%d:%08x[%d] %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index + 1,
                                        elm[1].internal.base.obj_id,
@@ -483,9 +391,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                        elm = &node->elms[cursor->index];
                        s = hammer_btree_cmp(&cursor->key_beg, &elm->base);
                        if (hammer_debug_btree) {
-                               kprintf("ELEMENT  %d:%d:%08x:%d %c %016llx %02x %016llx %d\n",
-                                       cursor->node->cluster->volume->vol_no,
-                                       cursor->node->cluster->clu_no,
+                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        (elm[0].leaf.base.btype ?
@@ -509,48 +415,6 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                                        continue;
                                }
                                break;
-                       case HAMMER_BTREE_TYPE_SPIKE_BEG:
-                               /*
-                                * Skip the spike BEG record.  We will hit
-                                * the END record first since we are
-                                * iterating backwards.
-                                */
-                               --cursor->index;
-                               continue;
-                       case HAMMER_BTREE_TYPE_SPIKE_END:
-                               /*
-                                * The SPIKE_END element is inclusive, NOT
-                                * like a boundary, so be careful with the
-                                * match check.
-                                *
-                                * This code assumes that a preceding SPIKE_BEG
-                                * has already been checked.
-                                */
-                               if (cursor->flags & HAMMER_CURSOR_INCLUSTER)
-                                       break;
-                               error = hammer_cursor_down(cursor);
-                               if (error)
-                                       break;
-                               KKASSERT(cursor->index == 0);
-                               /* reload stale pointer */
-                               node = cursor->node->ondisk;
-
-                               /*
-                                * If the cluster root is empty it and its
-                                * related spike can be deleted.  Ignore
-                                * errors.  Cursor
-                                */
-                               if (node->count == 0) {
-                                       error = hammer_cursor_upgrade(cursor);
-                                       if (error == 0)
-                                               error = btree_remove(cursor);
-                                       hammer_cursor_downgrade(cursor);
-                                       error = 0;
-                                       /* reload stale pointer */
-                                       node = cursor->node->ondisk;
-                               }
-                               cursor->index = node->count - 1;
-                               continue;
                        default:
                                error = EINVAL;
                                break;
@@ -590,7 +454,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
  * positioned for an insertion on ENOENT if HAMMER_CURSOR_INSERT was
  * specified.
  *
- * The cursor may begin anywhere, the search will traverse clusters in
+ * The cursor may begin anywhere, the search will traverse the tree in
  * either direction to locate the requested element.
  *
  * Most of the logic implementing historical searches is handled here.  We
@@ -695,33 +559,30 @@ hammer_btree_last(hammer_cursor_t cursor)
  * position.  Any prior record or data stored in the cursor is replaced.
  * The cursor must be positioned at a leaf node.
  *
- * NOTE: Most extractions occur at the leaf of the B-Tree.  The only
- *      extraction allowed at an internal element is at a cluster-push.
- *      Cluster-push elements have records but no data.
+ * NOTE: All extractions occur at the leaf of the B-Tree.
  */
 int
 hammer_btree_extract(hammer_cursor_t cursor, int flags)
 {
+       hammer_mount_t hmp;
        hammer_node_ondisk_t node;
        hammer_btree_elm_t elm;
-       hammer_cluster_t cluster;
-       u_int64_t buf_type;
-       int32_t cloff;
-       int32_t roff;
+       hammer_off_t rec_off;
+       hammer_off_t data_off;
+       hammer_off_t data_end;
        int error;
 
        /*
-        * A cluster record type has no data reference, the information
-        * is stored directly in the record and B-Tree element.
-        *
         * The case where the data reference resolves to the same buffer
         * as the record reference must be handled.
         */
        node = cursor->node->ondisk;
        elm = &node->elms[cursor->index];
-       cluster = cursor->node->cluster;
-       cursor->flags &= ~HAMMER_CURSOR_DATA_EMBEDDED;
-       cursor->data = NULL;
+       cursor->data1 = NULL;
+       cursor->data2 = NULL;
+       cursor->data_split = 0;
+       hmp = cursor->node->volume->hmp;
+       flags |= cursor->flags & HAMMER_CURSOR_DATAEXTOK;
 
        /*
         * There is nothing to extract for an internal element.
@@ -729,66 +590,72 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags)
        if (node->type == HAMMER_BTREE_TYPE_INTERNAL)
                return(EINVAL);
 
+       /*
+        * Only record types have data.
+        */
        KKASSERT(node->type == HAMMER_BTREE_TYPE_LEAF);
+       if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
+               flags &= ~HAMMER_CURSOR_GET_DATA;
+       data_off = elm->leaf.data_offset;
+       data_end = data_off + elm->leaf.data_len - 1;
+       if (data_off == 0)
+               flags &= ~HAMMER_CURSOR_GET_DATA;
+       rec_off = elm->leaf.rec_offset;
 
        /*
-        * Leaf element.
+        * Extract the record if the record was requested or the data
+        * resides in the record buf.
         */
-       if ((flags & HAMMER_CURSOR_GET_RECORD)) {
-               cloff = elm->leaf.rec_offset;
-               cursor->record = hammer_bread(cluster, cloff,
-                                             HAMMER_FSBUF_RECORDS, &error,
+       if ((flags & HAMMER_CURSOR_GET_RECORD) ||
+           ((flags & HAMMER_CURSOR_GET_DATA) &&
+            ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) == 0)) {
+               cursor->record = hammer_bread(hmp, rec_off, &error,
                                              &cursor->record_buffer);
        } else {
-               cloff = 0;
+               rec_off = 0;
                error = 0;
        }
        if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) {
-               if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) {
-                       /*
-                        * Only records have data references.  Spike elements
-                        * do not.
-                        */
-                       cursor->data = NULL;
-               } else if ((cloff ^ elm->leaf.data_offset) & ~HAMMER_BUFMASK) {
+               if ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) {
                        /*
                         * The data is not in the same buffer as the last
                         * record we cached, but it could still be embedded
                         * in a record.  Note that we may not have loaded the
                         * record's buffer above, depending on flags.
+                        *
+                        * Assert that the data does not cross into additional
+                        * buffers.
                         */
-                       if ((elm->leaf.rec_offset ^ elm->leaf.data_offset) &
-                           ~HAMMER_BUFMASK) {
-                               if (elm->leaf.data_len & HAMMER_BUFMASK)
-                                       buf_type = HAMMER_FSBUF_DATA;
-                               else
-                                       buf_type = 0;   /* pure data buffer */
-                       } else {
-                               buf_type = HAMMER_FSBUF_RECORDS;
-                       }
-                       cursor->data = hammer_bread(cluster,
-                                                 elm->leaf.data_offset,
-                                                 buf_type, &error,
-                                                 &cursor->data_buffer);
+                       cursor->data_split = 0;
+                       cursor->data2 = hammer_bread(hmp, data_off,
+                                                 &error, &cursor->data_buffer);
+                       KKASSERT(((data_off ^ data_end) &
+                                ~HAMMER_BUFMASK64) == 0);
                } else {
                        /*
-                        * Data in same buffer as record.  Note that we
-                        * leave any existing data_buffer intact, even
-                        * though we don't use it in this case, in case
-                        * other records extracted during an iteration
-                        * go back to it.
-                        *
-                        * The data must be embedded in the record for this
-                        * case to be hit.
-                        *
-                        * Just assume the buffer type is correct.
+                        * The data starts in same buffer as record.  Check
+                        * to determine if the data extends into another
+                        * buffer.
                         */
-                       cursor->data = (void *)
+                       cursor->data1 = (void *)
                                ((char *)cursor->record_buffer->ondisk +
-                                (elm->leaf.data_offset & HAMMER_BUFMASK));
-                       roff = (char *)cursor->data - (char *)cursor->record;
-                       KKASSERT (roff >= 0 && roff < HAMMER_RECORD_SIZE);
-                       cursor->flags |= HAMMER_CURSOR_DATA_EMBEDDED;
+                               ((int32_t)data_off & HAMMER_BUFMASK));
+                       if ((data_off ^ data_end) & ~HAMMER_BUFMASK64) {
+                               cursor->data_split = HAMMER_BUFSIZE -
+                                       ((int32_t)data_off & HAMMER_BUFMASK);
+                               if (flags & HAMMER_CURSOR_DATAEXTOK) {
+                                       /*
+                                        * NOTE: Assumes data buffer does not
+                                        * cross a volume boundary.
+                                        */
+                                       cursor->data2 = hammer_bread(hmp, data_off + cursor->data_split,
+                                                                 &error, &cursor->data_buffer);
+                               } else {
+                                       panic("Illegal data extension");
+                               }
+                       } else {
+                               cursor->data_split = elm->leaf.data_len;
+                       }
                }
        }
        return(error);
@@ -819,8 +686,9 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
        /*
         * Insert the element at the leaf node and update the count in the
         * parent.  It is possible for parent to be NULL, indicating that
-        * the root of the B-Tree in the cluster is a leaf.  It is also
-        * possible for the leaf to be empty.
+        * the filesystem's ROOT B-Tree node is a leaf itself, which is
+        * possible.  The root inode can never be deleted so the leaf should
+        * never be empty.
         *
         * Remember that the right-hand boundary is not included in the
         * count.
@@ -839,9 +707,7 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
        ++node->count;
 
        /*
-        * Debugging sanity checks.  Note that the element to the left
-        * can match the element we are inserting if it is a SPIKE_END,
-        * because spike-end's represent a non-inclusive end to a range.
+        * Debugging sanity checks.
         */
        KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->leaf.base) <= 0);
        KKASSERT(hammer_btree_cmp(cursor->right_bound, &elm->leaf.base) > 0);
@@ -854,116 +720,6 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm)
        return(0);
 }
 
-/*
- * Insert a cluster spike into the B-Tree at the current cursor position.
- * The caller pre-positions the insertion cursor at ncluster's
- * left bound in the originating cluster.  Both the originating cluster
- * and the target cluster must be serialized, EDEADLK is fatal.
- *
- * Basically we have to lay down the two spike elements and assert that
- * the leaf's right bound does not bisect the ending element.  The ending
- * spike element is non-inclusive, just like a boundary.  The target cluster's
- * clu_btree_parent_offset may have to adjusted.
- *
- * NOTE: Serialization is usually accoplished by virtue of being the
- * initial accessor of a cluster.
- */
-int
-hammer_btree_insert_cluster(hammer_cursor_t cursor, hammer_cluster_t ncluster,
-                           int32_t rec_offset)
-{
-       hammer_node_ondisk_t node;
-       hammer_btree_elm_t   elm;
-       hammer_cluster_t     ocluster;
-       const int esize = sizeof(*elm);
-       int error;
-       int i;
-       int32_t node_offset;
-
-       if ((error = hammer_cursor_upgrade(cursor)) != 0)
-               return(error);
-       hammer_modify_node(cursor->node);
-       node = cursor->node->ondisk;
-       node_offset = cursor->node->node_offset;
-       i = cursor->index;
-
-       KKASSERT(node->type == HAMMER_BTREE_TYPE_LEAF);
-       KKASSERT(node->count <= HAMMER_BTREE_LEAF_ELMS - 2);
-       KKASSERT(i >= 0 && i <= node->count);
-
-       /*
-        * Make sure the spike is legal or the B-Tree code will get really
-        * confused.
-        *
-        * XXX the right bound my bisect the two spike elements.  We
-        * need code here to 'fix' the right bound going up the tree
-        * instead of an assertion.
-        */
-       KKASSERT(hammer_btree_cmp(&ncluster->ondisk->clu_btree_beg,
-                                 cursor->left_bound) >= 0);
-       KKASSERT(hammer_btree_cmp(&ncluster->ondisk->clu_btree_end,
-                                 cursor->right_bound) <= 0);
-       if (i != node->count) {
-               KKASSERT(hammer_btree_cmp(&ncluster->ondisk->clu_btree_end,
-                                         &node->elms[i].leaf.base) <= 0);
-       }
-
-       elm = &node->elms[i];
-       bcopy(elm, elm + 2, (node->count - i) * esize);
-       bzero(elm, 2 * esize);
-       node->count += 2;
-
-       elm[0].leaf.base = ncluster->ondisk->clu_btree_beg;
-       elm[0].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_BEG;
-       elm[0].leaf.rec_offset = rec_offset;
-       elm[0].leaf.spike_clu_no = ncluster->clu_no;
-       elm[0].leaf.spike_vol_no = ncluster->volume->vol_no;
-
-       elm[1].leaf.base = ncluster->ondisk->clu_btree_end;
-       elm[1].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_END;
-       elm[1].leaf.rec_offset = rec_offset;
-       elm[1].leaf.spike_clu_no = ncluster->clu_no;
-       elm[1].leaf.spike_vol_no = ncluster->volume->vol_no;
-
-       /*
-        * SPIKE_END must be inclusive, not exclusive.
-        */
-       KKASSERT(elm[1].leaf.base.create_tid != 1);
-       --elm[1].leaf.base.create_tid;
-
-       /*
-        * The target cluster's parent offset may have to be updated.
-        *
-        * NOTE: Modifying a cluster header does not mark it open, and
-        * flushing it will only clear an existing open flag if the cluster
-        * has been validated.
-        */
-       if (hammer_debug_general & 0x40) {
-               kprintf("INSERT CLUSTER %d:%d -> %d:%d ",
-                       ncluster->ondisk->clu_btree_parent_vol_no,
-                       ncluster->ondisk->clu_btree_parent_clu_no,
-                       ncluster->volume->vol_no,
-                       ncluster->clu_no);
-       }
-
-       ocluster = cursor->node->cluster;
-       if (ncluster->ondisk->clu_btree_parent_offset != node_offset ||
-           ncluster->ondisk->clu_btree_parent_clu_no != ocluster->clu_no ||
-           ncluster->ondisk->clu_btree_parent_vol_no != ocluster->volume->vol_no) {
-               hammer_modify_cluster(ncluster);
-               ncluster->ondisk->clu_btree_parent_offset = node_offset;
-               ncluster->ondisk->clu_btree_parent_clu_no = ocluster->clu_no;
-               ncluster->ondisk->clu_btree_parent_vol_no = ocluster->volume->vol_no;
-               if (hammer_debug_general & 0x40)
-                       kprintf("(offset fixup)\n");
-       } else {
-               if (hammer_debug_general & 0x40)
-                       kprintf("(offset unchanged)\n");
-       }
-
-       return(0);
-}
-
 /*
  * Delete a record from the B-Tree at the current cursor position.
  * The cursor is positioned such that the current element is the one
@@ -975,9 +731,9 @@ hammer_btree_insert_cluster(hammer_cursor_t cursor, hammer_cluster_t ncluster,
  *
  * Deletions will attempt to partially rebalance the B-Tree in an upward
  * direction, but will terminate rather then deadlock.  Empty leaves are
- * not allowed except at the root node of a cluster.  An early termination
- * will leave an internal node with an element whos subtree_offset is 0,
- * a case detected and handled by btree_search().
+ * not allowed.  An early termination will leave an internal node with an
+ * element whos subtree_offset is 0, a case detected and handled by
+ * btree_search().
  *
  * This function can return EDEADLK, requiring the caller to retry the
  * operation after clearing the deadlock.
@@ -1020,12 +776,11 @@ hammer_btree_delete(hammer_cursor_t cursor)
 
                KKASSERT(parent != NULL);
                KKASSERT(parent->node_offset == ondisk->parent);
-               KKASSERT(parent->cluster == node->cluster);
        }
 
        /*
         * If the leaf becomes empty it must be detached from the parent,
-        * potentially recursing through to the cluster root.
+        * potentially recursing through to the filesystem root.
         *
         * This may reposition the cursor at one of the parent's of the
         * current node.
@@ -1052,7 +807,7 @@ hammer_btree_delete(hammer_cursor_t cursor)
 /*
  * PRIMAY B-TREE SEARCH SUPPORT PROCEDURE
  *
- * Search a cluster's B-Tree for cursor->key_beg, return the matching node.
+ * Search the filesystem B-Tree for cursor->key_beg, return the matching node.
  *
  * The search can begin ANYWHERE in the B-Tree.  As a first step the search
  * iterates up the tree as necessary to properly position itself prior to
@@ -1066,8 +821,7 @@ hammer_btree_delete(hammer_cursor_t cursor)
  * The search is only guarenteed to end up on a leaf if an error code of 0
  * is returned, or if inserting and an error code of ENOENT is returned.
  * Otherwise it can stop at an internal node.  On success a search returns
- * a leaf node unless INCLUSTER is set and the search located a cluster push
- * node (which is an internal node).
+ * a leaf node.
  *
  * COMPLEXITY WARNING!  This is the core B-Tree search code for the entire
  * filesystem, and it is not simple code.  Please note the following facts:
@@ -1076,15 +830,7 @@ hammer_btree_delete(hammer_cursor_t cursor)
  *   right boundary is non-inclusive.  The create_tid is a generic part
  *   of the key for internal nodes.
  *
- * - Leaf nodes contain terminal elements AND spikes.  A spike recurses into
- *   another cluster and contains two leaf elements.. a beginning and an
- *   ending element.  The SPIKE_END element is RANGE-EXCLUSIVE, just like a
- *   boundary.  This means that it is possible to have two elements 
- *   (a spike ending element and a record) side by side with the same key.
- *
- * - Because the SPIKE_END element is range inclusive, it cannot match the
- *   right boundary of the parent node.  SPIKE_BEG and SPIKE_END elements
- *   always come in pairs, and always exist side by side in the same leaf.
+ * - Leaf nodes contain terminal elements only now.
  *
  * - Filesystem lookups typically set HAMMER_CURSOR_ASOF, indicating a
  *   historical search.  ASOF and INSERT are mutually exclusive.  When
@@ -1100,7 +846,6 @@ int
 btree_search(hammer_cursor_t cursor, int flags)
 {
        hammer_node_ondisk_t node;
-       hammer_cluster_t cluster;
        hammer_btree_elm_t elm;
        int error;
        int enospc = 0;
@@ -1111,9 +856,7 @@ btree_search(hammer_cursor_t cursor, int flags)
        flags |= cursor->flags;
 
        if (hammer_debug_btree) {
-               kprintf("SEARCH   %d:%d:%08x[%d] %016llx %02x key=%016llx cre=%016llx\n",
-                       cursor->node->cluster->volume->vol_no,
-                       cursor->node->cluster->clu_no,
+               kprintf("SEARCH   %016llx[%d] %016llx %02x key=%016llx cre=%016llx\n",
                        cursor->node->node_offset, 
                        cursor->index,
                        cursor->key_beg.obj_id,
@@ -1125,36 +868,11 @@ btree_search(hammer_cursor_t cursor, int flags)
 
        /*
         * Move our cursor up the tree until we find a node whos range covers
-        * the key we are trying to locate.  This may move us between
-        * clusters.
+        * the key we are trying to locate.
         *
         * The left bound is inclusive, the right bound is non-inclusive.
-        * It is ok to cursor up too far so when cursoring across a cluster
-        * boundary.
-        *
-        * First see if we can skip the whole cluster.  hammer_cursor_up()
-        * handles both cases but this way we don't check the cluster
-        * bounds when going up the tree within a cluster.
-        *
-        * NOTE: If INCLUSTER is set and we are at the root of the cluster,
-        * hammer_cursor_up() will return ENOENT.
+        * It is ok to cursor up too far.
         */
-       cluster = cursor->node->cluster;
-       for (;;) {
-               r = hammer_btree_cmp(&cursor->key_beg, &cluster->clu_btree_beg);
-               s = hammer_btree_cmp(&cursor->key_beg, &cluster->clu_btree_end);
-
-               if (r >= 0 && s < 0)
-                       break;
-               error = hammer_cursor_toroot(cursor);
-               if (error)
-                       goto done;
-               KKASSERT(cursor->parent);
-               error = hammer_cursor_up(cursor);
-               if (error)
-                       goto done;
-               cluster = cursor->node->cluster;
-       }
        for (;;) {
                r = hammer_btree_cmp(&cursor->key_beg, cursor->left_bound);
                s = hammer_btree_cmp(&cursor->key_beg, cursor->right_bound);
@@ -1177,16 +895,15 @@ btree_search(hammer_cursor_t cursor, int flags)
        }
 
        /*
-        * We better have ended up with a node somewhere, and our second
-        * while loop had better not have traversed up a cluster.
+        * We better have ended up with a node somewhere.
         */
-       KKASSERT(cursor->node != NULL && cursor->node->cluster == cluster);
+       KKASSERT(cursor->node != NULL);
 
        /*
         * If we are inserting we can't start at a full node if the parent
         * is also full (because there is no way to split the node),
         * continue running up the tree until the requirement is satisfied
-        * or we hit the root of the current cluster.
+        * or we hit the root of the filesystem.
         *
         * (If inserting we aren't doing an as-of search so we don't have
         *  to worry about create_check).
@@ -1196,7 +913,7 @@ btree_search(hammer_cursor_t cursor, int flags)
                        if (btree_node_is_full(cursor->node->ondisk) == 0)
                                break;
                } else {
-                       if (btree_node_is_almost_full(cursor->node->ondisk) ==0)
+                       if (btree_node_is_full(cursor->node->ondisk) ==0)
                                break;
                }
                if (cursor->node->ondisk->parent == 0 ||
@@ -1204,17 +921,15 @@ btree_search(hammer_cursor_t cursor, int flags)
                        break;
                }
                error = hammer_cursor_up(cursor);
-               /* cluster and node are now may become stale */
+               /* node may have become stale */
                if (error)
                        goto done;
        }
-       /* cluster = cursor->node->cluster; not needed until next cluster = */
 
-new_cluster:
+re_search:
        /*
         * Push down through internal nodes to locate the requested key.
         */
-       cluster = cursor->node->cluster;
        node = cursor->node->ondisk;
        while (node->type == HAMMER_BTREE_TYPE_INTERNAL) {
                /*
@@ -1233,9 +948,7 @@ new_cluster:
                 * desired key.  This requires numerous special cases.
                 */
                if (hammer_debug_btree) {
-                       kprintf("SEARCH-I %d:%d:%08x count=%d\n",
-                               cursor->node->cluster->volume->vol_no,
-                               cursor->node->cluster->clu_no,
+                       kprintf("SEARCH-I %016llx count=%d\n",
                                cursor->node->node_offset,
                                node->count);
                }
@@ -1336,10 +1049,8 @@ new_cluster:
                elm = &node->elms[i];
 
                if (hammer_debug_btree) {
-                       kprintf("RESULT-I %d:%d:%08x[%d] %016llx %02x "
+                       kprintf("RESULT-I %016llx[%d] %016llx %02x "
                                "key=%016llx cre=%016llx\n",
-                               cursor->node->cluster->volume->vol_no,
-                               cursor->node->cluster->clu_no,
                                cursor->node->node_offset,
                                i,
                                elm->internal.base.obj_id,
@@ -1361,7 +1072,7 @@ new_cluster:
                if (elm->internal.subtree_offset == 0) {
                        error = btree_remove_deleted_element(cursor);
                        if (error == 0)
-                               goto new_cluster;
+                               goto re_search;
                        if (error == EDEADLK &&
                            (flags & HAMMER_CURSOR_INSERT) == 0) {
                                error = ENOENT;
@@ -1384,7 +1095,7 @@ new_cluster:
                 *
                 * If we run out of space we set enospc and continue on
                 * to a leaf to provide the spike code with a good point
-                * of entry.  Enospc is reset if we cross a cluster boundary.
+                * of entry.
                 */
                if ((flags & HAMMER_CURSOR_INSERT) && enospc == 0) {
                        if (btree_node_is_full(node)) {
@@ -1407,19 +1118,17 @@ new_cluster:
                 * the parent) and continue the search.
                 */
                error = hammer_cursor_down(cursor);
-               /* node and cluster become stale */
+               /* node may have become stale */
                if (error)
                        goto done;
                node = cursor->node->ondisk;
-               cluster = cursor->node->cluster;
        }
 
        /*
         * We are at a leaf, do a linear search of the key array.
         *
         * If we encounter a spike element type within the necessary
-        * range we push into it.  Note that SPIKE_END is non-inclusive
-        * of the spike range.
+        * range we push into it.
         *
         * On success the index is set to the matching element and 0
         * is returned.
@@ -1434,9 +1143,7 @@ new_cluster:
        KKASSERT (node->type == HAMMER_BTREE_TYPE_LEAF);
        KKASSERT(node->count <= HAMMER_BTREE_LEAF_ELMS);
        if (hammer_debug_btree) {
-               kprintf("SEARCH-L %d:%d:%08x count=%d\n",
-                       cursor->node->cluster->volume->vol_no,
-                       cursor->node->cluster->clu_no,
+               kprintf("SEARCH-L %016llx count=%d\n",
                        cursor->node->node_offset,
                        node->count);
        }
@@ -1449,81 +1156,6 @@ new_cluster:
                if (hammer_debug_btree > 1)
                        kprintf("  ELM %p %d r=%d\n", &node->elms[i], i, r);
 
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG) {
-                       /*
-                        * SPIKE_BEG.  Stop if we are to the left of the
-                        * spike begin element.
-                        *
-                        * If we are not the last element in the leaf continue
-                        * the loop looking for the SPIKE_END.  If we are
-                        * the last element, however, then push into the
-                        * spike.
-                        *
-                        * If doing an as-of search a Spike demark on a
-                        * create_tid boundary must be pushed into and an
-                        * iteration will be forced if it turned out to be
-                        * the wrong choice.
-                        *
-                        * If not doing an as-of search exact comparisons
-                        * must be used.
-                        *
-                        * enospc must be reset because we have crossed a
-                        * cluster boundary.
-                        */
-                       if (r < 0)
-                               goto failed;
-
-                       /*
-                        * Set the create_check if the spike element
-                        * only differs by its create_tid.
-                        */
-                       if (r == 1) {
-                               cursor->create_check = elm->base.create_tid - 1;
-                               cursor->flags |= HAMMER_CURSOR_CREATE_CHECK;
-                       }
-                       if (i != node->count - 1)
-                               continue;
-                       panic("btree_search: illegal spike, no SPIKE_END "
-                             "in leaf node! %p\n", cursor->node);
-               }
-               if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END) {
-                       /*
-                        * SPIKE_END.  We can only hit this case if we are
-                        * greater or equal to SPIKE_BEG.
-                        *
-                        * If we are <= SPIKE_END we must push into
-                        * it, otherwise continue the search.  The SPIKE_END
-                        * element is range-inclusive.
-                        *
-                        * enospc must be reset because we have crossed a
-                        * cluster boundary.
-                        */
-                       if (r > 0) {
-                               /*
-                                * Continue the search but check for a
-                                * create_tid boundary.  Because the
-                                * SPIKE_END is inclusive we do not have
-                                * to subtract 1 to force an iteration to
-                                * go down the spike.
-                                */
-                               if (r == 1) {
-                                       cursor->create_check =
-                                               elm->base.create_tid - 1;
-                                       cursor->flags |=
-                                               HAMMER_CURSOR_CREATE_CHECK;
-                               }
-                               continue;
-                       }
-                       if (flags & HAMMER_CURSOR_INCLUSTER)
-                               goto success;
-                       cursor->index = i;
-                       error = hammer_cursor_down(cursor);
-                       enospc = 0;
-                       if (error)
-                               goto done;
-                       goto new_cluster;
-               }
-
                /*
                 * We are at a record element.  Stop if we've flipped past
                 * key_beg, not counting the create_tid test.  Allow the
@@ -1551,15 +1183,11 @@ new_cluster:
                                continue;
                        /* success */
                }
-success:
                cursor->index = i;
                error = 0;
                if (hammer_debug_btree) {
-                       kprintf("RESULT-L %d:%d:%08x[%d] (SUCCESS)\n",
-                               cursor->node->cluster->volume->vol_no,
-                               cursor->node->cluster->clu_no,
-                               cursor->node->node_offset,
-                               i);
+                       kprintf("RESULT-L %016llx[%d] (SUCCESS)\n",
+                               cursor->node->node_offset, i);
                }
                goto done;
        }
@@ -1569,11 +1197,8 @@ success:
         */
 failed:
        if (hammer_debug_btree) {
-               kprintf("RESULT-L %d:%d:%08x[%d] (FAILED)\n",
-                       cursor->node->cluster->volume->vol_no,
-                       cursor->node->cluster->clu_no,
-                       cursor->node->node_offset,
-                       i);
+               kprintf("RESULT-L %016llx[%d] (FAILED)\n",
+                       cursor->node->node_offset, i);
        }
 
        /*
@@ -1582,14 +1207,10 @@ failed:
         * If inserting split a full leaf before returning.  This
         * may have the side effect of adjusting cursor->node and
         * cursor->index.
-        *
-        * For now the leaf must have at least 2 free elements to accomodate
-        * the insertion of a spike during recovery.  See the
-        * hammer_btree_insert_cluster() function.
         */
        cursor->index = i;
        if ((flags & HAMMER_CURSOR_INSERT) && enospc == 0 &&
-            btree_node_is_almost_full(node)) {
+            btree_node_is_full(node)) {
                error = btree_split_leaf(cursor);
                if (error) {
                        if (error != ENOSPC)
@@ -1631,21 +1252,16 @@ done:
  * to push into.  We will adjust node and index if that element winds
  * up in the split node.
  *
- * If we are at the root of a cluster a new root must be created with two
- * elements, one pointing to the original root and one pointing to the
+ * If we are at the root of the filesystem a new root must be created with
+ * two elements, one pointing to the original root and one pointing to the
  * newly allocated split node.
- *
- * NOTE! Being at the root of a cluster is different from being at the
- * root of the root cluster.  cursor->parent will not be NULL and
- * cursor->node->ondisk.parent must be tested against 0.  Theoretically
- * we could propogate the algorithm into the parent and deal with multiple
- * 'roots' in the cluster header, but it's easier not to.
  */
 static
 int
 btree_split_internal(hammer_cursor_t cursor)
 {
        hammer_node_ondisk_t ondisk;
+       hammer_mount_t hmp;
        hammer_node_t node;
        hammer_node_t parent;
        hammer_node_t new_node;
@@ -1661,11 +1277,9 @@ btree_split_internal(hammer_cursor_t cursor)
 
        if ((error = hammer_cursor_upgrade(cursor)) != 0)
                return(error);
-       if ((cursor->flags & HAMMER_CURSOR_RECOVER) == 0) {
-               error = hammer_btree_lock_children(cursor, &locklist);
-               if (error)
-                       goto done;
-       }
+       error = hammer_btree_lock_children(cursor, &locklist);
+       if (error)
+               goto done;
 
        /* 
         * We are splitting but elms[split] will be promoted to the parent,
@@ -1678,18 +1292,15 @@ btree_split_internal(hammer_cursor_t cursor)
        split = (ondisk->count + 1) / 2;
        if (cursor->index <= split)
                --split;
+       hmp = node->volume->hmp;
 
        /*
-        * If we are at the root of the cluster, create a new root node with
-        * 1 element and split normally.  Avoid making major modifications
-        * until we know the whole operation will work.
-        *
-        * The root of the cluster is different from the root of the root
-        * cluster.  Use the node's on-disk structure's parent offset to
-        * detect the case.
+        * If we are at the root of the filesystem, create a new root node
+        * with 1 element and split normally.  Avoid making major
+        * modifications until we know the whole operation will work.
         */
        if (ondisk->parent == 0) {
-               parent = hammer_alloc_btree(node->cluster, &error);
+               parent = hammer_alloc_btree(hmp, &error);
                if (parent == NULL)
                        goto done;
                hammer_lock_ex(&parent->lock);
@@ -1698,10 +1309,10 @@ btree_split_internal(hammer_cursor_t cursor)
                ondisk->count = 1;
                ondisk->parent = 0;
                ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
-               ondisk->elms[0].base = node->cluster->clu_btree_beg;
+               ondisk->elms[0].base = hmp->root_btree_beg;
                ondisk->elms[0].base.btype = node->ondisk->type;
                ondisk->elms[0].internal.subtree_offset = node->node_offset;
-               ondisk->elms[1].base = node->cluster->clu_btree_end;
+               ondisk->elms[1].base = hmp->root_btree_end;
                /* ondisk->elms[1].base.btype - not used */
                made_root = 1;
                parent_index = 0;       /* index of current node in parent */
@@ -1709,7 +1320,6 @@ btree_split_internal(hammer_cursor_t cursor)
                made_root = 0;
                parent = cursor->parent;
                parent_index = cursor->parent_index;
-               KKASSERT(parent->cluster == node->cluster);
        }
 
        /*
@@ -1725,7 +1335,7 @@ btree_split_internal(hammer_cursor_t cursor)
         *   0 1 2 3      4 5 6  
         *
         */
-       new_node = hammer_alloc_btree(node->cluster, &error);
+       new_node = hammer_alloc_btree(hmp, &error);
        if (new_node == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
@@ -1793,17 +1403,24 @@ btree_split_internal(hammer_cursor_t cursor)
        }
 
        /*
-        * The cluster's root pointer may have to be updated.
+        * The filesystem's root B-Tree pointer may have to be updated.
         */
        if (made_root) {
-               hammer_modify_cluster(node->cluster);
-               node->cluster->ondisk->clu_btree_root = parent->node_offset;
+               hammer_volume_t volume;
+
+               volume = hammer_get_root_volume(hmp, &error);
+               KKASSERT(error == 0);
+
+               hammer_modify_volume(volume, &volume->ondisk->vol0_btree_root,
+                                    sizeof(hammer_off_t));
+               volume->ondisk->vol0_btree_root = parent->node_offset;
                node->ondisk->parent = parent->node_offset;
                if (cursor->parent) {
                        hammer_unlock(&cursor->parent->lock);
                        hammer_rel_node(cursor->parent);
                }
                cursor->parent = parent;        /* lock'd and ref'd */
+               hammer_rel_volume(volume, 0);
        }
 
 
@@ -1861,25 +1478,19 @@ btree_split_leaf(hammer_cursor_t cursor)
        hammer_node_ondisk_t ondisk;
        hammer_node_t parent;
        hammer_node_t leaf;
+       hammer_mount_t hmp;
        hammer_node_t new_leaf;
        hammer_btree_elm_t elm;
        hammer_btree_elm_t parent_elm;
        hammer_base_elm_t mid_boundary;
-       hammer_node_locklist_t locklist = NULL;
        int parent_index;
        int made_root;
        int split;
        int error;
-       int i;
        const size_t esize = sizeof(*elm);
 
        if ((error = hammer_cursor_upgrade(cursor)) != 0)
                return(error);
-       if ((cursor->flags & HAMMER_CURSOR_RECOVER) == 0) {
-               error = hammer_btree_lock_children(cursor, &locklist);
-               if (error)
-                       goto done;
-       }
 
        /* 
         * Calculate the split point.  If the insertion point will be on
@@ -1895,13 +1506,9 @@ btree_split_leaf(hammer_cursor_t cursor)
        if (cursor->index <= split)
                --split;
        error = 0;
+       hmp = leaf->volume->hmp;
 
        elm = &ondisk->elms[split];
-       if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END) {
-               KKASSERT(split &&
-                       elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
-               --split;
-       }
 
        /*
         * If we are at the root of the tree, create a new root node with
@@ -1909,7 +1516,7 @@ btree_split_leaf(hammer_cursor_t cursor)
         * until we know the whole operation will work.
         */
        if (ondisk->parent == 0) {
-               parent = hammer_alloc_btree(leaf->cluster, &error);
+               parent = hammer_alloc_btree(hmp, &error);
                if (parent == NULL)
                        goto done;
                hammer_lock_ex(&parent->lock);
@@ -1918,10 +1525,10 @@ btree_split_leaf(hammer_cursor_t cursor)
                ondisk->count = 1;
                ondisk->parent = 0;
                ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
-               ondisk->elms[0].base = leaf->cluster->clu_btree_beg;
+               ondisk->elms[0].base = hmp->root_btree_beg;
                ondisk->elms[0].base.btype = leaf->ondisk->type;
                ondisk->elms[0].internal.subtree_offset = leaf->node_offset;
-               ondisk->elms[1].base = leaf->cluster->clu_btree_end;
+               ondisk->elms[1].base = hmp->root_btree_end;
                /* ondisk->elms[1].base.btype = not used */
                made_root = 1;
                parent_index = 0;       /* insertion point in parent */
@@ -1929,7 +1536,6 @@ btree_split_leaf(hammer_cursor_t cursor)
                made_root = 0;
                parent = cursor->parent;
                parent_index = cursor->parent_index;
-               KKASSERT(parent->cluster == leaf->cluster);
        }
 
        /*
@@ -1944,7 +1550,7 @@ btree_split_leaf(hammer_cursor_t cursor)
         *         /   \
         *  L L L L     L L L L
         */
-       new_leaf = hammer_alloc_btree(leaf->cluster, &error);
+       new_leaf = hammer_alloc_btree(hmp, &error);
        if (new_leaf == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
@@ -1992,50 +1598,31 @@ btree_split_leaf(hammer_cursor_t cursor)
        bcopy(parent_elm, parent_elm + 1,
              (ondisk->count - parent_index) * esize);
 
-       /*
-        * Create the separator.  XXX At the moment use exactly the
-        * right-hand element if this is a recovery operation in order
-        * to guarantee that it does not bisect the spike elements in a
-        * later call to hammer_btree_insert_cluster().
-        */
-       if (cursor->flags & HAMMER_CURSOR_RECOVER) {
-               parent_elm->base = elm[0].base;
-       } else {
-               hammer_make_separator(&elm[-1].base, &elm[0].base,
-                                     &parent_elm->base);
-       }
+       hammer_make_separator(&elm[-1].base, &elm[0].base, &parent_elm->base);
        parent_elm->internal.base.btype = new_leaf->ondisk->type;
        parent_elm->internal.subtree_offset = new_leaf->node_offset;
        mid_boundary = &parent_elm->base;
        ++ondisk->count;
 
        /*
-        * The children of new_leaf need their parent pointer set to new_leaf.
-        * The children have already been locked by btree_lock_children().
-        *
-        * The leaf's elements are either TYPE_RECORD or TYPE_SPIKE_*.  Only
-        * elements of BTREE_TYPE_SPIKE_END really requires any action.
-        */
-       for (i = 0; i < new_leaf->ondisk->count; ++i) {
-               elm = &new_leaf->ondisk->elms[i];
-               error = btree_set_parent(new_leaf, elm);
-               if (error) {
-                       panic("btree_split_internal: btree-fixup problem");
-               }
-       }
-
-       /*
-        * The cluster's root pointer may have to be updated.
+        * The filesystem's root B-Tree pointer may have to be updated.
         */
        if (made_root) {
-               hammer_modify_cluster(leaf->cluster);
-               leaf->cluster->ondisk->clu_btree_root = parent->node_offset;
+               hammer_volume_t volume;
+
+               volume = hammer_get_root_volume(hmp, &error);
+               KKASSERT(error == 0);
+
+               hammer_modify_volume(volume, &volume->ondisk->vol0_btree_root,
+                                    sizeof(hammer_off_t));
+               volume->ondisk->vol0_btree_root = parent->node_offset;
                leaf->ondisk->parent = parent->node_offset;
                if (cursor->parent) {
                        hammer_unlock(&cursor->parent->lock);
                        hammer_rel_node(cursor->parent);
                }
                cursor->parent = parent;        /* lock'd and ref'd */
+               hammer_rel_volume(volume, 0);
        }
 
        /*
@@ -2070,19 +1657,14 @@ btree_split_leaf(hammer_cursor_t cursor)
        cursor->right_bound = &parent_elm[1].internal.base;
 
        /*
-        * Note: The right assertion is typically > 0, but if the last element
-        * is a SPIKE_END it can be == 0 because the spike-end is non-inclusive
-        * of the range being spiked.
-        *
-        * This may seem a bit odd but it works.
+        * Assert that the bounds are correct.
         */
        KKASSERT(hammer_btree_cmp(cursor->left_bound,
                 &cursor->node->ondisk->elms[0].leaf.base) <= 0);
        KKASSERT(hammer_btree_cmp(cursor->right_bound,
-                &cursor->node->ondisk->elms[cursor->node->ondisk->count-1].leaf.base) >= 0);
+                &cursor->node->ondisk->elms[cursor->node->ondisk->count-1].leaf.base) > 0);
 
 done:
-       hammer_btree_unlock_children(&locklist);
        hammer_cursor_downgrade(cursor);
        return (error);
 }
@@ -2145,17 +1727,11 @@ hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid)
 
                /*
                 * Stop if the right-hand bound's create_tid does not
-                * need to be corrected.   Note that if the parent is
-                * a cluster the bound is pointing at the actual bound
-                * in the cluster header, not the SPIKE_END element in
-                * the parent cluster, so we don't have to worry about
-                * the fact that SPIKE_END is range-inclusive.
+                * need to be corrected.
                 */
                if (cursor->right_bound->create_tid >= tid)
                        break;
 
-               KKASSERT(cursor->parent->ondisk->elms[cursor->parent_index].base.btype != HAMMER_BTREE_TYPE_SPIKE_BEG);
-
                rhb = kmalloc(sizeof(*rhb), M_HAMMER, M_WAITOK|M_ZERO);
                rhb->node = cursor->parent;
                rhb->index = cursor->parent_index;
@@ -2174,9 +1750,8 @@ hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid)
        error = 0;
        while (error == 0 && (rhb = TAILQ_FIRST(&rhb_list)) != NULL) {
                error = hammer_cursor_seek(cursor, rhb->node, rhb->index);
-               kprintf("CORRECT RHB %d:%d:%08x index %d type=%c\n",
-                       rhb->node->cluster->volume->vol_no,
-                       rhb->node->cluster->clu_no, rhb->node->node_offset,
+               kprintf("CORRECT RHB %016llx index %d type=%c\n",
+                       rhb->node->node_offset,
                        rhb->index, cursor->node->ondisk->type);
                if (error)
                        break;
@@ -2197,33 +1772,6 @@ hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid)
                        ++cursor->index;
                        error = hammer_btree_correct_lhb(cursor, tid);
                        break;
-               case HAMMER_BTREE_TYPE_LEAF:
-                       /*
-                        * Right-boundary for parent at leaf node.  Both
-                        * the SPIKE_END and the cluster header must be
-                        * corrected, but we don't have to traverse down
-                        * (there's nothing TO traverse down other then what
-                        * we've already recorded).
-                        *
-                        * The SPIKE_END is range-inclusive.
-                        */
-                       error = hammer_cursor_down(cursor);
-                       if (error == 0)
-                               error = hammer_lock_upgrade(&cursor->parent->lock);
-                       if (error == 0) {
-                               kprintf("hammer_btree_correct_rhb-X @%d:%d:%08x\n",
-                                       cursor->parent->cluster->volume->vol_no,
-                                       cursor->parent->cluster->clu_no,
-                                       cursor->parent->node_offset);
-                               hammer_modify_node(cursor->parent);
-                               elm = &cursor->parent->ondisk->elms[cursor->parent_index].base;
-                               KKASSERT(elm->btype == HAMMER_BTREE_TYPE_SPIKE_END);
-                               elm->create_tid = tid - 1;
-                               hammer_modify_cluster(cursor->node->cluster);
-                               cursor->node->cluster->ondisk->clu_btree_end.create_tid = tid;
-                               cursor->node->cluster->clu_btree_end.create_tid = tid;
-                       }
-                       break;
                default:
                        panic("hammer_btree_correct_rhb(): Bad node type");
                        error = EINVAL;
@@ -2289,7 +1837,7 @@ hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid)
                        elm = &cursor->node->ondisk->elms[cursor->index].base;
                        if (elm->btype == HAMMER_BTREE_TYPE_RECORD)
                                break;
-                       KKASSERT(elm->btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
+                       panic("Illegal leaf record type %02x", elm->btype);
                }
                error = hammer_cursor_down(cursor);
                if (error)
@@ -2322,41 +1870,10 @@ hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid)
 
                elm = &cursor->node->ondisk->elms[cursor->index].base;
                if (cursor->node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
-                       kprintf("hammer_btree_correct_lhb-I @%d:%d:%08x @%d\n",
-                               cursor->node->cluster->volume->vol_no,
-                               cursor->node->cluster->clu_no,
+                       kprintf("hammer_btree_correct_lhb-I @%016llx[%d]\n",
                                cursor->node->node_offset, cursor->index);
                        hammer_modify_node(cursor->node);
                        elm->create_tid = tid;
-               } else if (elm->btype == HAMMER_BTREE_TYPE_SPIKE_BEG) {
-                       /*
-                        * SPIKE_BEG, also correct cluster header.  Occurs
-                        * only while we are traversing the left-hand
-                        * boundary.
-                        */
-                       kprintf("hammer_btree_correct_lhb-B @%d:%d:%08x\n",
-                               cursor->node->cluster->volume->vol_no,
-                               cursor->node->cluster->clu_no,
-                               cursor->node->node_offset);
-                       hammer_modify_node(cursor->node);
-                       elm->create_tid = tid;
-
-                       /*
-                        * We can only cursor down through SPIKE_END.
-                        */
-                       ++cursor->index;
-                       error = hammer_cursor_down(cursor);
-                       if (error == 0)
-                               error = hammer_lock_upgrade(&cursor->parent->lock);
-                       if (error == 0) {
-                               hammer_modify_node(cursor->parent);
-                               elm = &cursor->parent->ondisk->elms[cursor->parent_index - 1].base;
-                               KKASSERT(elm->btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
-                               elm->create_tid = tid;
-                               hammer_modify_cluster(cursor->node->cluster);
-                               cursor->node->cluster->ondisk->clu_btree_end.create_tid = tid;
-                               cursor->node->cluster->clu_btree_end.create_tid = tid;
-                       }
                } else {
                        panic("hammer_btree_correct_lhb(): Bad element type");
                }
@@ -2393,106 +1910,23 @@ btree_remove(hammer_cursor_t cursor)
        hammer_node_ondisk_t ondisk;
        hammer_btree_elm_t elm;
        hammer_node_t node;
-       hammer_node_t save;
        hammer_node_t parent;
        const int esize = sizeof(*elm);
        int error;
 
-       /*
-        * If we are at the root of the cluster we must be able to
-        * successfully delete the HAMMER_BTREE_SPIKE_* leaf elements in
-        * the parent in order to be able to destroy the cluster.
-        */
        node = cursor->node;
 
+       /*
+        * When deleting the root of the filesystem convert it to
+        * an empty leaf node.  Internal nodes cannot be empty.
+        */
        if (node->ondisk->parent == 0) {
                hammer_modify_node(node);
                ondisk = node->ondisk;
                ondisk->type = HAMMER_BTREE_TYPE_LEAF;
                ondisk->count = 0;
                cursor->index = 0;
-               error = 0;
-
-               /*
-                * When trying to delete a cluster we need to exclusively
-                * lock the cluster root, its parent (leaf in parent cluster),
-                * AND the parent of that leaf if it's going to be empty,
-                * because we can't leave around an empty leaf.
-                *
-                * XXX this is messy due to potentially recursive locks.
-                * downgrade the cursor, get a second shared lock on the
-                * node that cannot deadlock because we only own shared locks
-                * then, cursor-up, and re-upgrade everything.  If the
-                * upgrades EDEADLK then don't try to remove the cluster
-                * at this time.
-                */
-               if ((parent = cursor->parent) != NULL) {
-                       hammer_cursor_downgrade(cursor);
-                       save = node;
-                       hammer_ref_node(save);
-                       hammer_lock_sh(&save->lock);
-
-                       /*
-                        * After the cursor up save has the empty root node
-                        * of the target cluster to be deleted, cursor->node
-                        * is at the leaf containing the spikes, and
-                        * cursor->parent is the parent of that leaf.
-                        *
-                        * cursor->node and cursor->parent are both in the
-                        * parent cluster of the cluster being deleted.
-                        */
-                       error = hammer_cursor_up(cursor);
-
-                       if (error == 0)
-                               error = hammer_cursor_upgrade(cursor);
-                       if (error == 0)
-                               error = hammer_lock_upgrade(&save->lock);
-
-                       if (error) {
-                               /* may be EDEADLK */
-                               kprintf("BTREE_REMOVE: Cannot delete cluster\n");
-                               Debugger("BTREE_REMOVE");
-                       } else {
-                               /* 
-                                * cursor->node is now the leaf in the parent
-                                * cluster containing the spike elements.
-                                *
-                                * The cursor should be pointing at the
-                                * SPIKE_END element.
-                                *
-                                * Remove the spike elements and recurse
-                                * if the leaf becomes empty.
-                                */
-                               node = cursor->node;
-                               hammer_modify_node(node);
-                               ondisk = node->ondisk;
-                               KKASSERT(cursor->index > 0);
-                               --cursor->index;
-                               elm = &ondisk->elms[cursor->index];
-                               KKASSERT(elm[0].leaf.base.btype ==
-                                        HAMMER_BTREE_TYPE_SPIKE_BEG);
-                               KKASSERT(elm[1].leaf.base.btype ==
-                                        HAMMER_BTREE_TYPE_SPIKE_END);
-
-                               /*
-                                * Ok, remove it and the underlying record.
-                                */
-                               hammer_free_record(node->cluster,
-                                                  elm->leaf.rec_offset,
-                                                  HAMMER_RECTYPE_CLUSTER);
-                               bcopy(elm + 2, elm, (ondisk->count -
-                                                   cursor->index - 2) * esize);
-                               ondisk->count -= 2;
-                               save->flags |= HAMMER_NODE_DELETED;
-                               save->cluster->flags |= HAMMER_CLUSTER_DELETED;
-                               hammer_flush_node(save);
-                               hammer_unlock(&save->lock);
-                               hammer_rel_node(save);
-                               if (ondisk->count == 0)
-                                       error = EAGAIN;
-                       }
-               }
-               return(error);
+               return(0);
        }
 
        /*
@@ -2596,22 +2030,12 @@ btree_remove_deleted_element(hammer_cursor_t cursor)
  * If the element represents a pointer to an internal node that node's
  * parent must be adjusted to the element's new location.
  *
- * If the element represents a spike the target cluster's header must
- * be adjusted to point to the element's new location.  This only
- * applies to HAMMER_SPIKE_END. 
- *
- * GET_CLUSTER_NORECOVER must be used to avoid a recovery recursion during
- * the rebuild of the recovery cluster's B-Tree, which can blow the kernel
- * stack.
- *
  * XXX deadlock potential here with our exclusive locks
  */
 static
 int
 btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
 {
-       hammer_volume_t volume;
-       hammer_cluster_t cluster;
        hammer_node_t child;
        int error;
 
@@ -2620,7 +2044,7 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
        switch(elm->base.btype) {
        case HAMMER_BTREE_TYPE_INTERNAL:
        case HAMMER_BTREE_TYPE_LEAF:
-               child = hammer_get_node(node->cluster,
+               child = hammer_get_node(node->volume->hmp,
                                        elm->internal.subtree_offset, &error);
                if (error == 0) {
                        hammer_modify_node(child);
@@ -2628,24 +2052,6 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
                        hammer_rel_node(child);
                }
                break;
-       case HAMMER_BTREE_TYPE_SPIKE_END:
-               volume = hammer_get_volume(node->cluster->volume->hmp,
-                                          elm->leaf.spike_vol_no, &error);
-               if (error)
-                       break;
-               cluster = hammer_get_cluster(volume, elm->leaf.spike_clu_no,
-                                            &error, GET_CLUSTER_NORECOVER);
-                hammer_rel_volume(volume, 0);
-               if (error)
-                       break;
-               hammer_modify_cluster(cluster);
-               cluster->ondisk->clu_btree_parent_offset = node->node_offset;
-               KKASSERT(cluster->ondisk->clu_btree_parent_clu_no ==
-                        node->cluster->clu_no);
-               KKASSERT(cluster->ondisk->clu_btree_parent_vol_no ==
-                        node->cluster->volume->vol_no);
-               hammer_rel_cluster(cluster, 0);
-               break;
        default:
                break;
        }
@@ -2660,10 +2066,6 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm)
  * If we don't lock the children we can really mess up cursors which block
  * trying to cursor-up into our node.
  *
- * WARNING: Cannot be used when doing B-tree operations on a recovery
- * cluster because the target cluster may require recovery, resulting
- * in a deep recursion which blows the kernel stack.
- *
  * On failure EDEADLK (or some other error) is returned.  If a deadlock
  * error is returned the cursor is adjusted to block on termination.
  */
@@ -2675,8 +2077,6 @@ hammer_btree_lock_children(hammer_cursor_t cursor,
        hammer_node_locklist_t item;
        hammer_node_ondisk_t ondisk;
        hammer_btree_elm_t elm;
-       hammer_volume_t volume;
-       hammer_cluster_t cluster;
        hammer_node_t child;
        int error;
        int i;
@@ -2687,34 +2087,15 @@ hammer_btree_lock_children(hammer_cursor_t cursor,
        for (i = 0; error == 0 && i < ondisk->count; ++i) {
                elm = &ondisk->elms[i];
 
-               child = NULL;
                switch(elm->base.btype) {
                case HAMMER_BTREE_TYPE_INTERNAL:
                case HAMMER_BTREE_TYPE_LEAF:
-                       child = hammer_get_node(node->cluster,
+                       child = hammer_get_node(node->volume->hmp,
                                                elm->internal.subtree_offset,
                                                &error);
                        break;
-               case HAMMER_BTREE_TYPE_SPIKE_END:
-                       volume = hammer_get_volume(node->cluster->volume->hmp,
-                                                  elm->leaf.spike_vol_no,
-                                                  &error);
-                       if (error)
-                               break;
-                       cluster = hammer_get_cluster(volume,
-                                                    elm->leaf.spike_clu_no,
-                                                    &error,
-                                                    0);
-                       hammer_rel_volume(volume, 0);
-                       if (error)
-                               break;
-                       KKASSERT(cluster->ondisk->clu_btree_root != 0);
-                       child = hammer_get_node(cluster,
-                                               cluster->ondisk->clu_btree_root,
-                                               &error);
-                       hammer_rel_cluster(cluster, 0);
-                       break;
                default:
+                       child = NULL;
                        break;
                }
                if (child) {