From 427e5fc6c45276cf442ea92139f0a177164d2f94 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 1 Nov 2007 20:53:05 +0000 Subject: [PATCH] HAMMER part 1/many. This is a clear-my-plate commit. * Skeleton VFS infrastructure. No VFS ops yet. * Core B-Tree infrastructure - including the delete & rebalance code, but not yet including the cluster extension code. * Core in-memory structures and related locking and tracking primitives. * Core A-List (allocator) and buffer management infrastructure. --- sys/vfs/hammer/Makefile | 5 +- sys/vfs/hammer/hammer.h | 203 +++- sys/vfs/hammer/hammer_alist.c | 3 +- sys/vfs/hammer/hammer_btree.c | 1591 ++++++++++++++++++++++++++++++++ sys/vfs/hammer/hammer_btree.h | 210 ++++- sys/vfs/hammer/hammer_disk.h | 38 +- sys/vfs/hammer/hammer_inode.c | 251 +++++ sys/vfs/hammer/hammer_ondisk.c | 1417 ++++++++++++++++++++++++++++ sys/vfs/hammer/hammer_subs.c | 70 ++ sys/vfs/hammer/hammer_vfsops.c | 264 ++++++ sys/vfs/hammer/hammer_vnops.c | 293 ++++++ 11 files changed, 4269 insertions(+), 76 deletions(-) create mode 100644 sys/vfs/hammer/hammer_btree.c create mode 100644 sys/vfs/hammer/hammer_inode.c create mode 100644 sys/vfs/hammer/hammer_ondisk.c create mode 100644 sys/vfs/hammer/hammer_subs.c create mode 100644 sys/vfs/hammer/hammer_vfsops.c create mode 100644 sys/vfs/hammer/hammer_vnops.c diff --git a/sys/vfs/hammer/Makefile b/sys/vfs/hammer/Makefile index d4a2e5d2a9..d21f269dc5 100644 --- a/sys/vfs/hammer/Makefile +++ b/sys/vfs/hammer/Makefile @@ -1,8 +1,9 @@ # -# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.1 2007/10/10 19:37:25 dillon Exp $ +# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.2 2007/11/01 20:53:05 dillon Exp $ KMOD= hammer -SRCS= hammer_vfsops.c hammer_vnops.c hammer_inode.c hammer_btree.c +SRCS= hammer_vfsops.c hammer_vnops.c hammer_inode.c \ + hammer_subs.c hammer_ondisk.c hammer_btree.c NOMAN= .include diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index d289986ae5..1bd043011c 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,18 +31,24 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.2 2007/10/12 18:57:45 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.3 2007/11/01 20:53:05 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS * implementation. See hammer_disk.h for on-disk structures. */ +#include +#include +#include +#include #include #include -#include -#include "hammer_disk.h" +#include +#include +#include #include "hammer_alist.h" +#include "hammer_disk.h" #include "hammer_mount.h" #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) @@ -58,25 +64,23 @@ typedef struct hammer_inode_info { hammer_tid_t obj_asof; /* (key) snapshot transid or 0 */ } *hammer_inode_info_t; -/* - * Key and caching structure used for HAMMER B-Tree lookups. - */ -struct hammer_btree_info { - struct hammer_base_elm key; - struct hammer_cluster *cluster; - struct buf *bp_node; - struct buf *bp2; - struct buf *bp3; - struct hammer_btree_node *node; /* marker for insertion/deletion */ - int index; /* marker for insertion/deletion */ - union hammer_btree_elm *elm; /* marker for insertion/deletion */ - union hammer_record_ondisk *rec;/* returned record pointer */ - union hammer_data_ondisk *data; /* returned data pointer */ +struct hammer_lock { + int refs; + int wanted; + struct thread *locktd; }; -#define HAMMER_BTREE_GET_RECORD 0x0001 -#define HAMMER_BTREE_GET_DATA 0x0002 -#define HAMMER_BTREE_CLUSTER_TAG 0x0004 /* stop at the cluster tag */ +static __inline int +hammer_islocked(struct hammer_lock *lock) +{ + return(lock->refs > 0); +} + +static __inline int +hammer_islastref(struct hammer_lock *lock) +{ + return(lock->refs == 1); +} /* * Structures used to internally represent an inode @@ -94,6 +98,7 @@ struct hammer_inode { struct vnode *vp; struct hammer_inode_record ino_rec; struct hammer_inode_data ino_data; + struct hammer_lock lock; }; /* @@ -102,42 +107,83 @@ struct hammer_inode { struct hammer_volume; struct hammer_cluster; +struct hammer_supercl; +struct hammer_buffer; RB_HEAD(hammer_vol_rb_tree, hammer_volume); RB_HEAD(hammer_clu_rb_tree, hammer_cluster); +RB_HEAD(hammer_scl_rb_tree, hammer_supercl); +RB_HEAD(hammer_buf_rb_tree, hammer_buffer); RB_PROTOTYPE2(hammer_vol_rb_tree, hammer_volume, rb_node, hammer_vol_rb_compare, int32_t); RB_PROTOTYPE2(hammer_clu_rb_tree, hammer_cluster, rb_node, hammer_clu_rb_compare, int32_t); +RB_PROTOTYPE2(hammer_scl_rb_tree, hammer_supercl, rb_node, + hammer_scl_rb_compare, int32_t); +RB_PROTOTYPE2(hammer_buf_rb_tree, hammer_buffer, rb_node, + hammer_buf_rb_compare, int32_t); struct hammer_volume { RB_ENTRY(hammer_volume) rb_node; struct hammer_clu_rb_tree rb_clus_root; + struct hammer_scl_rb_tree rb_scls_root; struct buf *bp; struct hammer_volume_ondisk *ondisk; + struct hammer_alist_live alist; int32_t vol_no; int32_t vol_clsize; int64_t cluster_base; /* base offset of cluster 0 */ char *vol_name; struct vnode *devvp; struct hammer_mount *hmp; - int lock_count; - int ref_count; - int flags; + struct hammer_lock lock; + int vol_flags; + int modified; }; -#define HAMFS_CLUSTER_DIRTY 0x0001 +struct hammer_supercl { + RB_ENTRY(hammer_supercl) rb_node; + struct buf *bp; + struct hammer_supercl_ondisk *ondisk; + struct hammer_volume *volume; + struct hammer_alist_live alist; + int32_t scl_no; + int64_t scl_offset; + struct hammer_lock lock; + int modified; +}; struct hammer_cluster { RB_ENTRY(hammer_cluster) rb_node; + struct hammer_buf_rb_tree rb_bufs_root; struct buf *bp; struct hammer_cluster_ondisk *ondisk; struct hammer_volume *volume; + struct hammer_alist_live alist_master; + struct hammer_alist_live alist_btree; + struct hammer_alist_live alist_record; + struct hammer_alist_live alist_mdata; int32_t clu_no; - int lock_count; - int ref_count; + int64_t clu_offset; + struct hammer_lock lock; + int modified; +}; + +struct hammer_buffer { + RB_ENTRY(hammer_buffer) rb_node; + struct buf *bp; + hammer_fsbuf_ondisk_t ondisk; + struct hammer_cluster *cluster; + struct hammer_volume *volume; + struct hammer_alist_live alist; + int32_t buf_no; + int64_t buf_offset; + struct hammer_lock lock; + int modified; }; +#define HAMFS_CLUSTER_DIRTY 0x0001 + /* * Internal hammer mount data structure */ @@ -148,6 +194,9 @@ struct hammer_mount { struct hammer_vol_rb_tree rb_vols_root; struct hammer_volume *rootvol; struct hammer_cluster *rootcl; + /* struct hammer_volume *cache_volume */ + /* struct hammer_cluster *cache_cluster */ + /* struct hammer_buffer *cache_buffer */ uuid_t fsid; }; @@ -157,6 +206,13 @@ struct hammer_mount { #if defined(_KERNEL) extern struct vop_ops hammer_vnode_vops; +extern struct hammer_alist_config Buf_alist_config; +extern struct hammer_alist_config Vol_normal_alist_config; +extern struct hammer_alist_config Vol_super_alist_config; +extern struct hammer_alist_config Supercl_alist_config; +extern struct hammer_alist_config Clu_master_alist_config; +extern struct hammer_alist_config Clu_slave_alist_config; + int hammer_vop_inactive(struct vop_inactive_args *); int hammer_vop_reclaim(struct vop_reclaim_args *); int hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp); @@ -167,11 +223,100 @@ int hammer_load_volume(struct hammer_mount *hmp, const char *volname); struct hammer_cluster *hammer_load_cluster(struct hammer_volume *volume, int clu_no, int *errorp); -int hammer_btree_lookup(struct hammer_btree_info *info, int flags); -void hammer_btree_lookup_done(struct hammer_btree_info *info); +void hammer_lock(struct hammer_lock *lock); +void hammer_unlock(struct hammer_lock *lock); + +int hammer_btree_lookup(hammer_btree_info_t info, + hammer_base_elm_t key, int flags); +int hammer_btree_insert(hammer_btree_info_t info, + hammer_btree_leaf_elm_t elm); +int hammer_btree_delete(hammer_btree_info_t info, hammer_base_elm_t key); +int hammer_btree_cursor_init(hammer_btree_cursor_t cursor, + struct hammer_cluster *cluster); +void hammer_btree_cursor_done(hammer_btree_cursor_t cusor); +int hammer_btree_info_init(hammer_btree_info_t info, + struct hammer_cluster *cluster); +void hammer_btree_info_done(hammer_btree_info_t info); void *hammer_bread(struct hammer_cluster *cluster, int32_t cloff, - int *errorp, struct buf **bufp); + u_int64_t buf_type, + int *errorp, struct hammer_buffer **bufferp); + +struct hammer_volume *hammer_get_volume(struct hammer_mount *hmp, + int32_t vol_no, int *errorp); +struct hammer_supercl *hammer_get_supercl(struct hammer_volume *volume, + int32_t scl_no, int *errorp, int isnew); +struct hammer_cluster *hammer_get_cluster(struct hammer_volume *volume, + int32_t clu_no, int *errorp, int isnew); +struct hammer_buffer *hammer_get_buffer(struct hammer_cluster *cluster, + int32_t buf_no, int64_t buf_type, int *errorp); +void hammer_dup_buffer(struct hammer_buffer **bufferp, + struct hammer_buffer *buffer); +void hammer_dup_cluster(struct hammer_cluster **clusterp, + struct hammer_cluster *cluster); +void *hammer_alloc_btree(struct hammer_cluster *cluster, + int *errorp, struct hammer_buffer **bufferp); +void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes, + int *errorp, struct hammer_buffer **bufferp); +void *hammer_alloc_record(struct hammer_cluster *cluster, + int *errorp, struct hammer_buffer **bufferp); +void hammer_free_btree_ptr(struct hammer_buffer *buffer, + hammer_btree_node_t node); +void hammer_free_data_ptr(struct hammer_buffer *buffer, + void *data, int bytes); +void hammer_free_record_ptr(struct hammer_buffer *buffer, + union hammer_record_ondisk *rec); +void hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset); +void hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset, + int32_t bytes); +void hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset); + +void hammer_put_volume(struct hammer_volume *volume); +void hammer_put_supercl(struct hammer_supercl *supercl); +void hammer_put_cluster(struct hammer_cluster *cluster); +void hammer_put_buffer(struct hammer_buffer *buffer); + +void hammer_init_alist_config(void); #endif +/* + * Inline support functions (not kernel specific) + */ +static __inline void +hammer_modify_volume(struct hammer_volume *volume) +{ + volume->modified = 1; +} + +static __inline void +hammer_modify_supercl(struct hammer_supercl *supercl) +{ + supercl->modified = 1; +} + +static __inline void +hammer_modify_cluster(struct hammer_cluster *cluster) +{ + cluster->modified = 1; +} + +static __inline void +hammer_modify_buffer(struct hammer_buffer *buffer) +{ + buffer->modified = 1; +} + +/* + * Return the cluster-relative byte offset of an element within a buffer + */ +static __inline int +hammer_bclu_offset(struct hammer_buffer *buffer, void *ptr) +{ + int bclu_offset; + + bclu_offset = buffer->buf_no * HAMMER_BUFSIZE + + ((char *)ptr - (char *)buffer->ondisk); + return(bclu_offset); +} + diff --git a/sys/vfs/hammer/hammer_alist.c b/sys/vfs/hammer/hammer_alist.c index 4682eccaaa..512a7a3d3c 100644 --- a/sys/vfs/hammer/hammer_alist.c +++ b/sys/vfs/hammer/hammer_alist.c @@ -38,7 +38,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.c,v 1.3 2007/10/16 18:16:42 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.c,v 1.4 2007/11/01 20:53:05 dillon Exp $ */ /* * This module implements a generic allocator through the use of a hinted @@ -81,6 +81,7 @@ #include #include +#include "hammer_alist.h" #include "hammer_disk.h" #else diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c new file mode 100644 index 0000000000..455c297475 --- /dev/null +++ b/sys/vfs/hammer/hammer_btree.c @@ -0,0 +1,1591 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ + +/* + * HAMMER BH-Tree index + * + * HAMMER implements a modified B+Tree. In documentation this will + * simply be refered to as the HAMMER BH-Tree. Basically a BH-Tree + * looks like a B+Tree (A B-Tree which stores its records only at the leafs + * of the tree), but adds two additional boundary elements which describe + * the left-most and right-most element a node is able to represent. In + * otherwords, we have boundary elements at the two ends of a BH-Tree node + * instead of sub-tree pointers. + * + * A BH-Tree internal node looks like this: + * + * B N N N N N N B <-- boundary and internal elements + * S S S S S S S <-- subtree pointers + * + * A BH-Tree leaf node basically looks like this: + * + * L L L L L L L L <-- leaf elemenets + * + * The recursion radix is reduced by 2 relative to a normal B-Tree but + * we get a number of significant benefits for our troubles. + * + * The big benefit to using a BH-Tree is that it is possible to cache + * pointers into the middle of the tree and not have to start searches, + * insertions, OR deletions at the root node. In particular, searches are + * able to progress in a definitive direction from any point in the tree + * without revisting nodes. This greatly improves the efficiency of many + * operations, most especially record appends. + * + * BH-Trees also make the stacking of trees fairly straightforward. + */ +#include "hammer.h" +#include +#include + +static int btree_search(hammer_btree_cursor_t cursor, hammer_base_elm_t key, + int flags); +static int btree_cursor_set_cluster(hammer_btree_cursor_t cursor, + struct hammer_cluster *cluster); +static int btree_cursor_set_cluster_by_value(hammer_btree_cursor_t cursor, + int32_t vol_no, int32_t clu_no, hammer_tid_t clu_id); +static int btree_cursor_up(hammer_btree_cursor_t cursor); +static int btree_cursor_down(hammer_btree_cursor_t cursor); +static int btree_split_internal(hammer_btree_cursor_t cursor); +static int btree_split_leaf(hammer_btree_cursor_t cursor); +static int btree_rebalance_node(hammer_btree_cursor_t cursor); +static int btree_collapse(hammer_btree_cursor_t cursor); + +/* + * Compare two BH-Tree elements, return -1, 0, or +1 (e.g. similar to strcmp). + */ +static int +hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2) +{ + if (key1->obj_id < key2->obj_id) + return(-1); + if (key1->obj_id > key2->obj_id) + return(1); + + if (key1->rec_type < key2->rec_type) + return(-1); + if (key1->rec_type > key2->rec_type) + return(1); + + if (key1->key < key2->key) + return(-1); + if (key1->key > key2->key) + return(1); + + if (key1->create_tid < key2->create_tid) + return(-1); + if (key1->create_tid > key2->create_tid) + return(1); + + return(0); +} + +/* + * Create a separator half way inbetween key1 and key2. For fields just + * one unit apart, the separator will match key2. + */ +#define MAKE_SEPARATOR(key1, key2, dest, field) \ + dest->field = key1->field + ((key2->field - key1->field + 1) >> 1); + +static void +hammer_make_separator(hammer_base_elm_t key1, hammer_base_elm_t key2, + hammer_base_elm_t dest) +{ + MAKE_SEPARATOR(key1, key2, dest, obj_id); + MAKE_SEPARATOR(key1, key2, dest, rec_type); + MAKE_SEPARATOR(key1, key2, dest, key); + MAKE_SEPARATOR(key1, key2, dest, create_tid); + dest->delete_tid = 0; + dest->obj_type = 0; + dest->reserved07 = 0; +} + +#undef MAKE_SEPARATOR + +/* + * Return whether a generic internal or leaf node is full + */ +static int +btree_node_is_full(struct hammer_base_node *node) +{ + switch(node->type) { + case HAMMER_BTREE_INTERNAL_NODE: + if (node->count == HAMMER_BTREE_INT_ELMS) + return(1); + break; + case HAMMER_BTREE_LEAF_NODE: + if (node->count == HAMMER_BTREE_LEAF_ELMS) + return(1); + break; + default: + panic("illegal btree subtype"); + } + return(0); +} + +static int +btree_max_elements(u_int8_t type) +{ + if (type == HAMMER_BTREE_LEAF_NODE) + return(HAMMER_BTREE_LEAF_ELMS); + if (type == HAMMER_BTREE_INTERNAL_NODE) + return(HAMMER_BTREE_INT_ELMS); + panic("btree_max_elements: bad type %d\n", type); +} + +/* + * Initialize a cursor, setting the starting point for a BH-Tree search. + * + * The passed cluster must be locked. This function will add a reference + * to it. + */ +int +hammer_btree_cursor_init(hammer_btree_cursor_t cursor, + struct hammer_cluster *cluster) +{ + int error; + + cursor->cluster = NULL; + cursor->node_buffer = NULL; + cursor->parent_buffer = NULL; + cursor->node = NULL; + cursor->parent = NULL; + cursor->index = 0; + cursor->parent_index = 0; + error = btree_cursor_set_cluster(cursor, cluster); + return(error); +} + +/* + * Clean up a HAMMER BH-Tree cursor after we are finished using it. + */ +void +hammer_btree_cursor_done(hammer_btree_cursor_t cursor) +{ + if (cursor->node_buffer) { + hammer_put_buffer(cursor->node_buffer); + cursor->node_buffer = NULL; + } + if (cursor->parent_buffer) { + hammer_put_buffer(cursor->parent_buffer); + cursor->parent_buffer = NULL; + } + if (cursor->cluster) { + hammer_put_cluster(cursor->cluster); + cursor->cluster = NULL; + } + cursor->node = NULL; + cursor->parent = NULL; + cursor->left_bound = NULL; + cursor->right_bound = NULL; + cursor->index = 0; + cursor->parent_index = 0; +} + +/* + * Initialize a btree info structure and its associated cursor prior to + * running a BH-Tree operation. + */ +int +hammer_btree_info_init(hammer_btree_info_t info, struct hammer_cluster *cluster) +{ + int error; + + error = hammer_btree_cursor_init(&info->cursor, cluster); + info->data_buffer = NULL; + info->record_buffer = NULL; + info->data = NULL; + info->rec = NULL; + return (error); +} + +/* + * Clean up a BH-Tree info structure after we are finished using it. + */ +void +hammer_btree_info_done(hammer_btree_info_t info) +{ + hammer_btree_cursor_done(&info->cursor); + if (info->data_buffer) { + hammer_put_buffer(info->data_buffer); + info->data_buffer = NULL; + } + if (info->record_buffer) { + hammer_put_buffer(info->record_buffer); + info->record_buffer = NULL; + } + info->data = NULL; + info->rec = NULL; +} + +/* + * Search a cluster's BH-Tree. Return the matching node. The search + * normally traverses clusters but will terminate at a cluster entry + * in a leaf if HAMMER_BTREE_CLUSTER_TAG is specified. If this flag + * is specified EIO is returned if the search would otherwise have to + * cursor up into a parent cluster. + * + * The cursor must be completely initialized on entry. If the cursor is + * at the root of a cluster, the parent pointer & buffer may be NULL (in + * that case the bounds point to the bounds in the cluster header). The + * node_buffer and node must always be valid. + * + * The search code may be forced to iterate up the tree if the conditions + * required for an insertion or deletion are not met. This does not occur + * very often. + * + * INSERTIONS: The search will split full nodes and leaves on its way down + * and guarentee that the leaf it ends up on is not full. + * + * DELETIONS: The search will rebalance the tree on its way down. + */ +static +int +btree_search(hammer_btree_cursor_t cursor, hammer_base_elm_t key, int flags) +{ + struct hammer_btree_leaf_node *leaf; + int error; + int i; + int r; + + /* + * Move our cursor up the tree until we find a node whos range covers + * the key we are trying to locate. This may move us between + * clusters. + * + * Since the root cluster always has a root BH-Tree node, info->node + * is always non-NULL if no error occured. The parent field will be + * non-NULL unless we are at the root of a cluster. + */ + while (hammer_btree_cmp(key, cursor->left_bound) < 0 || + hammer_btree_cmp(key, cursor->right_bound) >= 0) { + /* + * Must stay in current cluster if flagged, code should never + * use the flag if it wants us to traverse to the parent + * cluster. + */ + if (cursor->parent == NULL && + (flags & HAMMER_BTREE_CLUSTER_TAG)) { + return(EIO); + } + error = btree_cursor_up(cursor); + if (error) + return(error); + } + KKASSERT(cursor->node != NULL); + + /* + * If we are inserting we can't start at a full node if the parent + * is also full (because there is no way to split the node), + * continue running up the tree until we hit the root of the + * current cluster or until the requirement is satisfied. + */ + while (flags & HAMMER_BTREE_INSERT) { + if (btree_node_is_full(&cursor->node->base) == 0) + break; + if (cursor->parent == NULL) + break; + if (cursor->parent->base.count != HAMMER_BTREE_INT_ELMS) + break; + error = btree_cursor_up(cursor); + if (error) + return (error); + } + + /* + * If we are deleting we can't start at a node with only one element + * unless it is root, because all of our code assumes that nodes + * will never be empty. + * + * This handles the case where the cursor is sitting at a leaf and + * either the leaf or parent contain an insufficient number of + * elements. + */ + while (flags & HAMMER_BTREE_DELETE) { + if (cursor->node->base.count > 1) + break; + if (cursor->parent == NULL) + break; + KKASSERT(cursor->node->base.count != 0); + error = btree_cursor_up(cursor); + if (error) + return (error); + } + +new_cluster: + /* + * Push down through internal nodes to locate the requested key. + */ + while (cursor->node->base.type == HAMMER_BTREE_INTERNAL_NODE) { + struct hammer_btree_internal_node *node; + + /* + * If we are a the root node and deleting, try to collapse + * all of the root's children into the root. This is the + * only point where tree depth is reduced. + */ + if ((flags & HAMMER_BTREE_DELETE) && cursor->parent == NULL) { + error = btree_collapse(cursor); + if (error) + return (error); + } + + /* + * Scan the node (XXX binary search) to find the subtree + * index to push down into. We go one-past, then back-up. + * The key should never be less then the left-hand boundary + * so I should never wind up 0. + */ + node = &cursor->node->internal; + for (i = 0; i < node->base.count; ++i) { + r = hammer_btree_cmp(key, &node->elms[i].base); + if (r < 0) + break; + } + KKASSERT(i != 0); + + /* + * The push-down index is now i - 1. + */ + --i; + cursor->index = i; + + /* + * Handle insertion and deletion requirements. + * + * If inserting split full nodes. The split code will + * adjust cursor->node and cursor->index if the current + * index winds up in the new node. + */ + if (flags & HAMMER_BTREE_INSERT) { + if (node->base.count == HAMMER_BTREE_INT_ELMS) { + error = btree_split_internal(cursor); + if (error) + return(error); + /* + * reload stale pointers + */ + i = cursor->index; + node = &cursor->node->internal; + } + } + + /* + * If deleting rebalance - do not allow the child to have + * just one element or we will not be able to delete it. + * + * Neither internal or leaf nodes (except a root-leaf) are + * allowed to drop to 0 elements. + * + * Our separators may have been reorganized after rebalancing, + * so we have to pop back up and rescan. + */ + if (flags & HAMMER_BTREE_DELETE) { + if (node->elms[i].subtree_count <= 1) { + error = btree_rebalance_node(cursor); + if (error) + return(error); + /* cursor->index is invalid after call */ + goto new_cluster; + } + } + + /* + * Push down (push into new node, existing node becomes + * the parent). + */ + error = btree_cursor_down(cursor); + if (error) + return (error); + } + + + /* + * We are at a leaf, do a linear search of the key array. + * (XXX do a binary search). On success the index is set to the + * matching element, on failure the index is set to the insertion + * point. + * + * Boundaries are not stored in leaf nodes, so the index can wind + * up to the left of element 0 (index == 0) or past the end of + * the array (index == leaf->base.count). + */ + leaf = &cursor->node->leaf; + KKASSERT(leaf->base.count <= HAMMER_BTREE_LEAF_ELMS); + + for (i = 0; i < leaf->base.count; ++i) { + r = hammer_btree_cmp(key, &leaf->elms[i].base); + if (r < 0) + break; + if (r == 0) { + /* + * Return an exact match unless this is a cluster + * element. If it is, and the cluster tag flag has + * not been set, push into the new cluster and + * continue the search. + */ + cursor->index = i; + if ((leaf->elms[i].base.obj_type & + HAMMER_OBJTYPE_CLUSTER_FLAG) && + (flags & HAMMER_BTREE_CLUSTER_TAG) == 0) { + error = btree_cursor_down(cursor); + if (error) + return (error); + goto new_cluster; + } + return(0); + } + } + + /* + * We could not find an exact match. Check for a cluster + * recursion. The cluster's range is bracketed by two + * leaf elements. One of the two must be in this node. + */ + if ((flags & HAMMER_BTREE_CLUSTER_TAG) == 0) { + if (i == leaf->base.count) { + if (leaf->elms[i-1].base.obj_type & + HAMMER_OBJTYPE_CLUSTER_FLAG) { + cursor->index = i - 1; + error = btree_cursor_down(cursor); + if (error) + return (error); + goto new_cluster; + } + } else { + if (leaf->elms[i].base.obj_type & + HAMMER_OBJTYPE_CLUSTER_FLAG) { + cursor->index = i; + error = btree_cursor_down(cursor); + if (error) + return (error); + goto new_cluster; + } + } + } + + /* + * If inserting split a full leaf before returning. This + * may have the side effect of adjusting cursor->node and + * cursor->index. + * + * We delayed the split in order to avoid any unnecessary splits. + * + * XXX parent's parent's subtree_count will be wrong after + * this (keep parent of parent around too? ugh). + */ + cursor->index = i; + if ((flags & HAMMER_BTREE_INSERT) && + leaf->base.count == HAMMER_BTREE_LEAF_ELMS) { + error = btree_split_leaf(cursor); + /* NOT USED + i = cursor->index; + node = &cursor->node->internal; + */ + if (error) + return(error); + } + return(ENOENT); +} + +/* + * Look up the key in the HAMMER BH-Tree and fill in the rest of the + * info structure with the results according to flags. 0 is returned on + * success, non-zero on error. + * + * The caller initializes (key, cluster) and makes the call. The cluster + * must be referenced and locked on call. The function can chain through + * multiple clusters and will replace the passed cluster as it does so, + * dereferencing and unlocking it, and referencing and locking the chain. + * On return info->cluster will still be referenced and locked but may + * represent a different cluster. + */ +int +hammer_btree_lookup(hammer_btree_info_t info, hammer_base_elm_t key, int flags) +{ + hammer_btree_leaf_elm_t elm; + struct hammer_cluster *cluster; + int32_t cloff; + int error; + int iscl; + + error = btree_search(&info->cursor, key, flags); + if (error) + return (error); + + /* + * Extract the record and data reference if requested. + * + * A cluster record type has no data reference, the information + * is stored directly in the record and BH-Tree element. + * + * The case where the data reference resolves to the same buffer + * as the record reference must be handled. + */ + elm = &info->cursor.node->leaf.elms[info->cursor.index]; + iscl = (elm->base.obj_type & HAMMER_OBJTYPE_CLUSTER_FLAG) != 0; + cluster = info->cursor.cluster; + if ((flags & HAMMER_BTREE_GET_RECORD) && error == 0) { + cloff = iscl ? elm->cluster.rec_offset : elm->record.rec_offset; + + + info->rec = hammer_bread(cluster, cloff, HAMMER_FSBUF_RECORDS, + &error, &info->record_buffer); + } else { + cloff = 0; + } + if ((flags & HAMMER_BTREE_GET_DATA) && iscl == 0 && error == 0) { + if ((cloff ^ elm->record.data_offset) & ~HAMMER_BUFMASK) { + info->data = hammer_bread(cluster, + elm->record.data_offset, + HAMMER_FSBUF_DATA, + &error, &info->record_buffer); + } else { + info->data = (void *) + ((char *)info->data_buffer->ondisk + + (elm->record.data_offset & HAMMER_BUFMASK)); + } + } + return(error); +} + + +/* + * Insert a record into a BH-Tree's cluster. The caller has already + * reserved space for the record and data and must handle a ENOSPC + * return. + */ +int +hammer_btree_insert(struct hammer_btree_info *info, hammer_btree_leaf_elm_t elm) +{ + struct hammer_btree_cursor *cursor; + struct hammer_btree_internal_node *parent; + struct hammer_btree_leaf_node *leaf; + int error; + int i; + + /* + * Issue a search to get our cursor at the right place. The search + * will get us to a leaf node. + * + * The search also does some setup for our insert, so there is always + * room in the leaf. + */ + error = btree_search(&info->cursor, &elm->base, HAMMER_BTREE_INSERT); + if (error != ENOENT) { + if (error == 0) + error = EEXIST; + return (error); + } + + /* + * Insert the element at the leaf node and update the count in the + * parent. It is possible for parent to be NULL, indicating that + * the root of the BH-Tree in the cluster is a leaf. It is also + * possible for the leaf to be empty. + * + * Remember that the right-hand boundary is not included in the + * count. + */ + cursor = &info->cursor; + leaf = &cursor->node->leaf; + i = cursor->index; + KKASSERT(leaf->base.count < HAMMER_BTREE_LEAF_ELMS); + bcopy(&leaf->elms[i], &leaf->elms[i+1], + (leaf->base.count - i + 1) * sizeof(leaf->elms[0])); + leaf->elms[i] = *elm; + ++leaf->base.count; + + if ((parent = cursor->parent) != NULL) { + i = cursor->parent_index; + ++parent->elms[i].subtree_count; + KKASSERT(parent->elms[i].subtree_count <= leaf->base.count); + hammer_modify_buffer(cursor->parent_buffer); + } + hammer_modify_buffer(cursor->node_buffer); + return(0); +} + +/* + * Delete a record from the BH-Tree. + */ +int +hammer_btree_delete(struct hammer_btree_info *info, hammer_base_elm_t key) +{ + struct hammer_btree_cursor *cursor; + struct hammer_btree_internal_node *parent; + struct hammer_btree_leaf_node *leaf; + int error; + int i; + + /* + * Locate the leaf element to delete. The search is also responsible + * for doing some of the rebalancing work on its way down. + */ + error = btree_search(&info->cursor, key, HAMMER_BTREE_DELETE); + if (error) + return (error); + + /* + * Delete the element from the leaf node. We leave empty leafs alone + * and instead depend on a future search to locate and destroy them. + * Otherwise we would have to recurse back up the tree to adjust + * the parent's subtree_count and we do not want to do that. + * + * Remember that the right-hand boundary is not included in the + * count. + */ + cursor = &info->cursor; + leaf = &cursor->node->leaf; + i = cursor->index; + + KKASSERT(cursor->node->base.type == HAMMER_BTREE_LEAF_NODE); + bcopy(&leaf->elms[i+1], &leaf->elms[i], + (leaf->base.count - i) * sizeof(leaf->elms[0])); + --leaf->base.count; + if ((parent = cursor->parent) != NULL) { + /* + * Adjust parent's notion of the leaf's count. subtree_count + * is only approximately, it is allowed to be too small but + * never allowed to be too large. Make sure we don't drop + * the count below 0. + */ + i = cursor->parent_index; + if (parent->elms[i].subtree_count) + --parent->elms[i].subtree_count; + KKASSERT(parent->elms[i].subtree_count <= leaf->base.count); + hammer_modify_buffer(cursor->parent_buffer); + } + hammer_modify_buffer(cursor->node_buffer); + return(0); +} + +/************************************************************************ + * CURSOR SUPPORT * + ************************************************************************ + * + * These routines do basic cursor operations. This support will probably + * be expanded in the future to add link fields for linear scans. + */ + +/* + * Unconditionally set the cursor to the root of the specified cluster. + * The current cursor node is set to the root node of the cluster (which + * can be an internal node or a degenerate leaf), and the parent info + * is cleaned up and cleared. + * + * The passed cluster must be locked. This function will add a reference + * to it. The cursor must already have a cluster assigned to it, which we + * will replace. + */ +static +int +btree_cursor_set_cluster_by_value(hammer_btree_cursor_t cursor, + int32_t vol_no, int32_t clu_no, hammer_tid_t clu_id) +{ + struct hammer_cluster *cluster; + struct hammer_volume *volume; + int error = 0; + + if (vol_no < 0) + return(EIO); + cluster = cursor->cluster; + KKASSERT(cluster != NULL); + if (vol_no == cluster->volume->vol_no) { + cluster = hammer_get_cluster(cluster->volume, clu_no, + &error, 0); + } else { + volume = hammer_get_volume(cluster->volume->hmp, + vol_no, &error); + if (volume) { + cluster = hammer_get_cluster(volume, clu_no, + &error, 0); + hammer_put_volume(volume); + } else { + cluster = NULL; + } + } + + /* + * Make sure the cluster id matches. XXX At the moment the + * clu_id in the btree cluster element is only 32 bits, so only + * compare the low 32 bits. + */ + if (cluster) { + if ((int32_t)cluster->ondisk->clu_id == (int32_t)clu_id) { + btree_cursor_set_cluster(cursor, cluster); + } else { + error = EIO; + } + hammer_put_cluster(cluster); + } + return (error); +} + +static +int +btree_cursor_set_cluster(hammer_btree_cursor_t cursor, + struct hammer_cluster *cluster) +{ + int error = 0; + + hammer_dup_cluster(&cursor->cluster, cluster); + cursor->node = hammer_bread(cluster, + cluster->ondisk->clu_btree_root, + HAMMER_FSBUF_BTREE, + &error, + &cursor->node_buffer); + cursor->index = 0; + if (cursor->parent) { + hammer_put_buffer(cursor->parent_buffer); + cursor->parent_buffer = NULL; + cursor->parent = NULL; + cursor->parent_index = 0; + } + cursor->left_bound = &cluster->ondisk->clu_btree_beg; + cursor->right_bound = &cluster->ondisk->clu_btree_end; + return (error); +} + +/* + * Cursor the node up to the parent node. If at the root of a cluster, + * cursor to the root of the cluster's parent cluster. Note carefully + * that we do NOT scan the parent cluster to find the leaf that dropped + * into our current cluster. + * + * This function is primarily used by the search code to avoid having + * to search from the root of the filesystem BH-Tree. + */ +static +int +btree_cursor_up(hammer_btree_cursor_t cursor) +{ + struct hammer_cluster_ondisk *ondisk; + struct hammer_btree_internal_node *parent; + int error; + int i; + int r; + + error = 0; + if (cursor->parent == NULL) { + /* + * We are at the root of the cluster, pop up to the root + * of the parent cluster. Return EIO if we are at the + * root cluster of the filesystem. + */ + ondisk = cursor->cluster->ondisk; + error = btree_cursor_set_cluster_by_value( + cursor, + ondisk->clu_btree_parent_vol_no, + ondisk->clu_btree_parent_clu_no, + ondisk->clu_btree_parent_clu_id); + } else { + /* + * Copy the current node's parent info into the node and load + * a new parent. + */ + cursor->index = cursor->parent_index; + cursor->node = (hammer_btree_node_t)cursor->parent; + hammer_dup_buffer(&cursor->node_buffer, cursor->parent_buffer); + + /* + * Load the parent's parent into parent and figure out the + * parent's element index for its child. Just NULL it out + * if we hit the root of the cluster. + */ + if (cursor->parent->base.parent) { + parent = hammer_bread(cursor->cluster, + cursor->node->base.parent, + HAMMER_FSBUF_BTREE, + &error, + &cursor->parent_buffer); + for (i = 0; i < parent->base.count; ++i) { + r = hammer_btree_cmp( + &cursor->node->internal.elms[0].base, + &parent->elms[i].base); + if (r < 0) + break; + } + cursor->parent = parent; + cursor->parent_index = i - 1; + KKASSERT(parent->elms[i].subtree_offset == + hammer_bclu_offset(cursor->node_buffer, + cursor->node)); + } else { + hammer_put_buffer(cursor->parent_buffer); + cursor->parent = NULL; + cursor->parent_buffer = NULL; + cursor->parent_index = 0; + } + } + return(error); +} + +/* + * Cursor down into (node, index) + * + * Push down into the current cursor. The current cursor becomes the parent. + * If the current cursor represents a leaf cluster element this function will + * push into the root of a new cluster and clear the parent fields. + * + * Pushin down at a leaf which is not a cluster element returns EIO. + */ +static +int +btree_cursor_down(hammer_btree_cursor_t cursor) +{ + hammer_btree_node_t node; + int error; + + node = cursor->node; + if (node->base.type == HAMMER_BTREE_LEAF_NODE) { + /* + * Push into another cluster + */ + hammer_btree_leaf_elm_t elm; + + elm = &node->leaf.elms[cursor->index]; + if (elm->base.rec_type == HAMMER_RECTYPE_CLUSTER) { + error = btree_cursor_set_cluster_by_value( + cursor, + elm->cluster.vol_no, + elm->cluster.clu_no, + elm->cluster.verifier); + } else { + error = EIO; + } + } else { + /* + * Push into another BH-Tree node (internal or leaf) + */ + struct hammer_btree_internal_elm *elm; + + KKASSERT(node->base.type == HAMMER_BTREE_INTERNAL_NODE); + elm = &node->internal.elms[cursor->index]; + KKASSERT(elm->subtree_offset != 0); + + cursor->parent_index = cursor->index; + cursor->parent = &cursor->node->internal; + hammer_dup_buffer(&cursor->parent_buffer, cursor->node_buffer); + + cursor->node = hammer_bread(cursor->cluster, + elm->subtree_offset, + HAMMER_FSBUF_BTREE, + &error, + &cursor->node_buffer); + cursor->index = 0; + KKASSERT(cursor->node == NULL || + cursor->node->base.parent == elm->subtree_offset); + } + return(error); +} + +/************************************************************************ + * SPLITTING AND MERGING * + ************************************************************************ + * + * These routines do all the dirty work required to split and merge nodes. + */ + +/* + * Split an internal into two nodes and move the separator at the split + * point to the parent. Note that the parent's parent's element pointing + * to our parent will have an incorrect subtree_count (we don't update it). + * It will be low, which is ok. + * + * Cursor->index indicates the element the caller intends to push into. + * We will adjust cursor->node and cursor->index if that element winds + * up in the split node. + */ +static +int +btree_split_internal(hammer_btree_cursor_t cursor) +{ + struct hammer_btree_internal_node *parent; + struct hammer_btree_internal_node *node; + struct hammer_btree_internal_node *new_node; + struct hammer_btree_internal_elm *elm; + struct hammer_btree_internal_elm *parent_elm; + struct hammer_buffer *new_buffer; + int32_t parent_offset; + int parent_index; + int made_root; + int split; + int error; + const size_t esize = sizeof(struct hammer_btree_internal_elm); + + /* + * We are splitting but elms[split] will be promoted to the parent, + * leaving the right hand node with one less element. If the + * insertion point will be on the left-hand side adjust the split + * point to give the right hand side one additional node. + */ + node = &cursor->node->internal; + split = (node->base.count + 1) / 2; + if (cursor->index <= split) + --split; + error = 0; + + /* + * If we are at the root of the tree, create a new root node with + * 1 element and split normally. Avoid making major modifications + * until we know the whole operation will work. + */ + parent = cursor->parent; + if (parent == NULL) { + parent = hammer_alloc_btree(cursor->cluster, &error, + &cursor->parent_buffer); + if (parent == NULL) + return(error); + parent->base.count = 1; + parent->base.parent = 0; + parent->base.type = HAMMER_BTREE_INTERNAL_NODE; + parent->base.subtype = node->base.type; + parent->elms[0].base = cursor->cluster->ondisk->clu_btree_beg; + parent->elms[0].subtree_offset = + hammer_bclu_offset(cursor->node_buffer, node); + parent->elms[1].base = cursor->cluster->ondisk->clu_btree_end; + made_root = 1; + cursor->parent_index = 0; /* insertion point in parent */ + } else { + made_root = 0; + } + parent_offset = hammer_bclu_offset(cursor->parent_buffer, parent); + + /* + * Split node into new_node at the split point. + * + * B O O O P N N B <-- P = node->elms[split] + * 0 1 2 3 4 5 6 <-- subtree indices + * + * x x P x x + * s S S s + * / \ + * B O O O B B N N B <--- inner boundary points are 'P' + * 0 1 2 3 4 5 6 + * + */ + new_buffer = NULL; + new_node = hammer_alloc_btree(cursor->cluster, &error, &new_buffer); + if (new_node == NULL) { + if (made_root) + hammer_free_btree_ptr(cursor->parent_buffer, + (hammer_btree_node_t)parent); + return(error); + } + + /* + * Create the new node. P become the left-hand boundary in the + * new node. Copy the right-hand boundary as well. + * + * elm is the new separator. + */ + elm = &node->elms[split]; + bcopy(elm, &new_node->elms[0], (node->base.count - split + 1) * esize); + new_node->base.count = node->base.count - split; + new_node->base.parent = parent_offset; + new_node->base.type = HAMMER_BTREE_INTERNAL_NODE; + new_node->base.subtype = node->base.subtype; + KKASSERT(node->base.type == new_node->base.type); + + /* + * Cleanup the original node. P becomes the new boundary, its + * subtree_offset was moved to the new node. If we had created + * a new root its parent pointer may have changed. + */ + node->base.parent = parent_offset; + elm->subtree_offset = 0; + + /* + * Insert the separator into the parent, fixup the parent's + * reference to the original node, and reference the new node. + * The separator is P. + * + * Remember that base.count does not include the right-hand boundary. + */ + parent_index = cursor->parent_index; + parent->elms[parent_index].subtree_count = split; + parent_elm = &parent->elms[parent_index+1]; + bcopy(parent_elm, parent_elm + 1, + (parent->base.count - parent_index) * esize); + parent_elm->base = elm->base; /* separator P */ + parent_elm->subtree_offset = hammer_bclu_offset(new_buffer, new_node); + parent_elm->subtree_count = new_node->base.count; + + hammer_modify_buffer(new_buffer); + hammer_modify_buffer(cursor->parent_buffer); + hammer_modify_buffer(cursor->node_buffer); + + /* + * The cluster's root pointer may have to be updated. + */ + if (made_root) { + cursor->cluster->ondisk->clu_btree_root = node->base.parent; + hammer_modify_cluster(cursor->cluster); + } + + /* + * Ok, now adjust the cursor depending on which element the original + * index was pointing at. If we are >= the split point the push node + * is now in the new node. + * + * NOTE: If we are at the split point itself we cannot stay with the + * original node because the push index will point at the right-hand + * boundary, which is illegal. + */ + if (cursor->index >= split) { + cursor->index -= split; + cursor->node = (hammer_btree_node_t)new_node; + hammer_put_buffer(cursor->node_buffer); + cursor->node_buffer = new_buffer; + } + + return (0); +} + +/* + * Same as the above, but splits a full leaf node. + */ +static +int +btree_split_leaf(hammer_btree_cursor_t cursor) +{ + struct hammer_btree_internal_node *parent; + struct hammer_btree_leaf_node *leaf; + struct hammer_btree_leaf_node *new_leaf; + union hammer_btree_leaf_elm *elm; + struct hammer_btree_internal_elm *parent_elm; + struct hammer_buffer *new_buffer; + int32_t parent_offset; + int parent_index; + int made_root; + int split; + int error; + const size_t esize = sizeof(struct hammer_btree_internal_elm); + + /* + * We are splitting but elms[split] will be promoted to the parent, + * leaving the right hand node with one less element. If the + * insertion point will be on the left-hand side adjust the split + * point to give the right hand side one additional node. + */ + leaf = &cursor->node->leaf; + split = (leaf->base.count + 1) / 2; + if (cursor->index <= split) + --split; + error = 0; + + /* + * If we are at the root of the tree, create a new root node with + * 1 element and split normally. Avoid making major modifications + * until we know the whole operation will work. + */ + parent = cursor->parent; + if (parent == NULL) { + parent = hammer_alloc_btree(cursor->cluster, &error, + &cursor->parent_buffer); + if (parent == NULL) + return(error); + parent->base.count = 1; + parent->base.parent = 0; + parent->base.type = HAMMER_BTREE_INTERNAL_NODE; + parent->base.subtype = leaf->base.type; + parent->elms[0].base = cursor->cluster->ondisk->clu_btree_beg; + parent->elms[0].subtree_offset = + hammer_bclu_offset(cursor->node_buffer, leaf); + parent->elms[1].base = cursor->cluster->ondisk->clu_btree_end; + made_root = 1; + cursor->parent_index = 0; /* insertion point in parent */ + } else { + made_root = 0; + } + parent_offset = hammer_bclu_offset(cursor->parent_buffer, parent); + + /* + * Split leaf into new_leaf at the split point. Select a separator + * value in-between the two leafs but with a bent towards the right + * leaf since comparisons use an 'elm >= separator' inequality. + * + * L L L L L L L L + * + * x x P x x + * s S S s + * / \ + * L L L L L L L L + */ + new_buffer = NULL; + new_leaf = hammer_alloc_btree(cursor->cluster, &error, &new_buffer); + if (new_leaf == NULL) { + if (made_root) + hammer_free_btree_ptr(cursor->parent_buffer, + (hammer_btree_node_t)parent); + return(error); + } + + /* + * Create the new node. P become the left-hand boundary in the + * new node. Copy the right-hand boundary as well. + */ + elm = &leaf->elms[split]; + bcopy(elm, &new_leaf->elms[0], (leaf->base.count - split) * esize); + new_leaf->base.count = leaf->base.count - split; + new_leaf->base.parent = parent_offset; + new_leaf->base.type = HAMMER_BTREE_LEAF_NODE; + new_leaf->base.subtype = 0; + KKASSERT(leaf->base.type == new_leaf->base.type); + + /* + * Cleanup the original node. P becomes the new boundary, its + * subtree_offset was moved to the new node. If we had created + * a new root its parent pointer may have changed. + */ + leaf->base.parent = parent_offset; + + /* + * Insert the separator into the parent, fixup the parent's + * reference to the original node, and reference the new node. + * The separator is P. + * + * Remember that base.count does not include the right-hand boundary. + * We are copying parent_index+1 to parent_index+2, not +0 to +1. + */ + parent_index = cursor->parent_index; + parent->elms[parent_index].subtree_count = split; + parent_elm = &parent->elms[parent_index+1]; + if (parent_index + 1 != parent->base.count) { + bcopy(parent_elm, parent_elm + 1, + (parent->base.count - parent_index - 1) * esize); + } + hammer_make_separator(&elm[-1].base, &elm[0].base, &parent_elm->base); + parent_elm->subtree_offset = hammer_bclu_offset(new_buffer, new_leaf); + parent_elm->subtree_count = new_leaf->base.count; + + hammer_modify_buffer(new_buffer); + hammer_modify_buffer(cursor->parent_buffer); + hammer_modify_buffer(cursor->node_buffer); + + /* + * The cluster's root pointer may have to be updated. + */ + if (made_root) { + cursor->cluster->ondisk->clu_btree_root = leaf->base.parent; + hammer_modify_cluster(cursor->cluster); + } + + /* + * Ok, now adjust the cursor depending on which element the original + * index was pointing at. If we are >= the split point the push node + * is now in the new node. + * + * NOTE: If we are at the split point itself we cannot stay with the + * original node because the push index will point at the right-hand + * boundary, which is illegal. + */ + if (cursor->index >= split) { + cursor->index -= split; + cursor->node = (hammer_btree_node_t)new_leaf; + hammer_put_buffer(cursor->node_buffer); + cursor->node_buffer = new_buffer; + } + + return (0); +} + +/* + * This routine is called on an internal node prior to recursing down + * through (node, index) when (node, index) MIGHT have too few elements for + * the caller to perform a deletion. + * + * cursor->index is invalid on return because the separators may have gotten + * adjusted, the caller must rescan the node's elements. The caller may set + * cursor->index to -1 if it wants us to do a general rebalancing. + * + * NOTE: Because we do not update the parent's parent in the split code, + * the subtree_count used by the caller may be incorrect. We correct it + * here. Also note that we cannot change the depth of the tree's leaf + * nodes here (see btree_collapse()). + * + * This routine rebalances the children of the node, collapsing children + * together if possible. On return each child will have at least L/2-1 + * elements unless the node only has one child. + */ +static +int +btree_rebalance_node(hammer_btree_cursor_t cursor) +{ + struct hammer_btree_internal_node *node; + hammer_btree_node_t children[HAMMER_BTREE_INT_ELMS]; + struct hammer_buffer *child_buffer[HAMMER_BTREE_INT_ELMS]; + hammer_btree_node_t child; + hammer_btree_elm_inmemory_t elms; + int i, j, n, nelms, goal; + int maxelms, halfelms; + int error; + + /* + * Basic setup + */ + node = &cursor->node->internal; + KKASSERT(node->elms[cursor->index].subtree_offset != 0); + error = 0; + + /* + * Load the children of node and do any necessary corrections + * to subtree_count. subtree_count may be incorrect due to the + * way insertions split nodes. Get a count of the total number + * of elements held by our children. + */ + error = 0; + + for (i = n = 0; i < node->base.count; ++i) { + struct hammer_btree_internal_elm *elm; + + elm = &node->elms[i]; + children[i] = NULL; + child_buffer[i] = NULL; /* must be preinitialized for bread */ + if (elm->subtree_offset == 0) + continue; + child = hammer_bread(cursor->cluster, elm->subtree_offset, + HAMMER_FSBUF_BTREE, &error, + &child_buffer[i]); + children[i] = child; + if (child == NULL) + continue; + KKASSERT(node->base.subtype == child->base.type); + + /* + * Accumulate n for a good child, update the node's count + * if it was wrong. + */ + if (node->elms[i].subtree_count != child->base.count) { + node->elms[i].subtree_count = child->base.count; + } + n += node->elms[i].subtree_count; + } + if (error) + goto failed; + + /* + * Collect all the children's elements together + */ + nelms = n; + elms = kmalloc(sizeof(*elms) * (nelms + 1), M_HAMMER, M_WAITOK|M_ZERO); + for (i = n = 0; i < node->base.count; ++i) { + child = children[i]; + for (j = 0; j < child->base.count; ++j) { + elms[n].owner = child; + if (node->base.subtype == HAMMER_BTREE_LEAF_NODE) + elms[n].u.leaf = child->leaf.elms[j]; + else + elms[n].u.internal = child->internal.elms[j]; + ++n; + } + } + KKASSERT(n == nelms); + + /* + * Store a boundary in the elms array to ease the code below. This + * is only used if the children are internal nodes. + */ + elms[n].u.internal = node->elms[i]; + + /* + * Calculate the number of elements each child should have (goal) by + * reducing the number of elements until we achieve at least + * halfelms - 1 per child, unless we are a degenerate case. + */ + maxelms = btree_max_elements(node->base.subtype); + halfelms = maxelms / 2; + + goal = halfelms - 1; + while (i && n / i < goal) + --i; + + /* + * Now rebalance using the specified goal + */ + for (i = n = 0; i < node->base.count; ++i) { + struct hammer_buffer *subchild_buffer = NULL; + struct hammer_btree_internal_node *subchild; + + child = children[i]; + for (j = 0; j < goal && n < nelms; ++j) { + if (node->base.subtype == HAMMER_BTREE_LEAF_NODE) { + child->leaf.elms[j] = elms[n].u.leaf; + } else { + child->internal.elms[j] = elms[n].u.internal; + } + + /* + * If the element's parent has changed we have to + * update the parent pointer. This is somewhat + * expensive. + */ + if (elms[n].owner != child && + node->base.subtype == HAMMER_BTREE_INTERNAL_NODE) { + subchild = hammer_bread(cursor->cluster, + elms[n].u.internal.subtree_offset, + HAMMER_FSBUF_BTREE, + &error, + &subchild_buffer); + if (subchild) { + subchild->base.parent = + hammer_bclu_offset(child_buffer[i], + child); + hammer_modify_buffer(subchild_buffer); + } + /* XXX error */ + } + ++n; + } + /* + * Set right boundary if the children are internal nodes. + */ + if (node->base.subtype == HAMMER_BTREE_INTERNAL_NODE) + child->internal.elms[j] = elms[n].u.internal; + child->base.count = j; + hammer_modify_buffer(child_buffer[i]); + if (subchild_buffer) + hammer_put_buffer(subchild_buffer); + + /* + * If we have run out of elements, break out + */ + if (n == nelms) + break; + } + + /* + * Physically destroy any left-over children. These children's + * elements have been packed into prior children. The node's + * right hand boundary and count gets shifted to index i. + * + * The subtree count in the node's parent MUST be updated because + * we are removing elements. The subtree_count field is allowed to + * be too small, but not too large! + */ + if (i != node->base.count) { + n = i; + node->elms[n] = node->elms[node->base.count]; + while (i < node->base.count) { + hammer_free_btree_ptr(child_buffer[i], children[i]); + hammer_put_buffer(child_buffer[i]); + ++i; + } + node->base.count = n; + if (cursor->parent) { + cursor->parent->elms[cursor->parent_index].subtree_count = n; + hammer_modify_buffer(cursor->parent_buffer); + } + } + + kfree(elms, M_HAMMER); +failed: + hammer_modify_buffer(cursor->node_buffer); + for (i = 0; i < node->base.count; ++i) { + if (child_buffer[i]) + hammer_put_buffer(child_buffer[i]); + } + return (error); +} + +/* + * This routine is only called if the cursor is at the root node and the + * root node is an internal node. We attempt to collapse the root node + * by replacing it with all of its children, reducing tree depth by one. + * + * This is the only way to reduce tree depth in a HAMMER filesystem. + * Note that all leaf nodes are at the same depth. + * + * This is a fairly expensive operation because we not only have to load + * the root's children, we also have to scan each child and adjust the + * parent offset for each element in each child. Nasty all around. + */ +static +int +btree_collapse(hammer_btree_cursor_t cursor) +{ + hammer_btree_node_t root, child; + hammer_btree_node_t children[HAMMER_BTREE_INT_ELMS]; + struct hammer_buffer *child_buffer[HAMMER_BTREE_INT_ELMS]; + int count; + int i, j, n; + int root_modified; + int error; + int32_t root_offset; + u_int8_t subsubtype; + + root = cursor->node; + count = root->base.count; + root_offset = hammer_bclu_offset(cursor->node_buffer, root); + + /* + * Sum up the number of children each element has. This value is + * only approximate due to the way the insertion node works. It + * may be too small but it will never be too large. + * + * Quickly terminate the collapse if the elements have too many + * children. + */ + KKASSERT(root->base.parent == 0); /* must be root node */ + KKASSERT(root->base.type == HAMMER_BTREE_INTERNAL_NODE); + KKASSERT(count <= HAMMER_BTREE_INT_ELMS); + + for (i = n = 0; i < count; ++i) { + n += root->internal.elms[i].subtree_count; + } + if (n > btree_max_elements(root->base.subtype)) + return(0); + + /* + * Iterate through the elements again and correct the subtree_count. + * Terminate the collapse if we wind up with too many. + */ + error = 0; + root_modified = 0; + + for (i = n = 0; i < count; ++i) { + struct hammer_btree_internal_elm *elm; + + elm = &root->internal.elms[i]; + child_buffer[i] = NULL; + children[i] = NULL; + if (elm->subtree_offset == 0) + continue; + child = hammer_bread(cursor->cluster, elm->subtree_offset, + HAMMER_FSBUF_BTREE, &error, + &child_buffer[i]); + children[i] = child; + if (child == NULL) + continue; + KKASSERT(root->base.subtype == child->base.type); + + /* + * Accumulate n for a good child, update the root's count + * if it was wrong. + */ + if (root->internal.elms[i].subtree_count != child->base.count) { + root->internal.elms[i].subtree_count = child->base.count; + root_modified = 1; + } + n += root->internal.elms[i].subtree_count; + } + if (error || n > btree_max_elements(root->base.subtype)) + goto done; + + /* + * Ok, we can collapse the root. If the root's children are leafs + * the collapse is really simple. If they are internal nodes the + * collapse is not so simple because we have to fixup the parent + * pointers for the root's children's children. + * + * When collapsing an internal node the far left and far right + * element's boundaries should match the root's left and right + * boundaries. + */ + if (root->base.subtype == HAMMER_BTREE_LEAF_NODE) { + for (i = n = 0; i < count; ++i) { + child = children[i]; + for (j = 0; j < child->base.count; ++j) { + root->leaf.elms[n] = child->leaf.elms[j]; + ++n; + } + } + root->base.type = root->base.subtype; + root->base.subtype = 0; + root->base.count = n; + root->leaf.link_left = 0; + root->leaf.link_right = 0; + } else { + struct hammer_btree_internal_elm *elm; + struct hammer_btree_internal_node *subchild; + struct hammer_buffer *subchild_buffer = NULL; + + if (count) { + child = children[0]; + subsubtype = child->base.subtype; + KKASSERT(child->base.count > 0); + KKASSERT(root->internal.elms[0].base.key == + child->internal.elms[0].base.key); + child = children[count-1]; + KKASSERT(child->base.count > 0); + KKASSERT(root->internal.elms[count].base.key == + child->internal.elms[child->base.count].base.key); + } else { + subsubtype = 0; + } + for (i = n = 0; i < count; ++i) { + child = children[i]; + KKASSERT(child->base.subtype == subsubtype); + for (j = 0; j < child->base.count; ++j) { + elm = &child->internal.elms[j]; + + root->internal.elms[n] = *elm; + subchild = hammer_bread(cursor->cluster, + elm->subtree_offset, + HAMMER_FSBUF_BTREE, + &error, + &subchild_buffer); + if (subchild) { + subchild->base.parent = root_offset; + hammer_modify_buffer(subchild_buffer); + } + ++n; + } + /* make sure the right boundary is correct */ + /* (this gets overwritten when the loop continues) */ + /* XXX generate a new separator? */ + root->internal.elms[n] = child->internal.elms[j]; + } + root->base.type = HAMMER_BTREE_INTERNAL_NODE; + root->base.subtype = subsubtype; + if (subchild_buffer) + hammer_put_buffer(subchild_buffer); + } + root_modified = 1; + + /* + * Cleanup + */ +done: + if (root_modified) + hammer_modify_buffer(cursor->node_buffer); + for (i = 0; i < count; ++i) { + if (child_buffer[i]) + hammer_put_buffer(child_buffer[i]); + } + return(error); +} + diff --git a/sys/vfs/hammer/hammer_btree.h b/sys/vfs/hammer/hammer_btree.h index be6aebe197..76bafd1f3e 100644 --- a/sys/vfs/hammer/hammer_btree.h +++ b/sys/vfs/hammer/hammer_btree.h @@ -31,18 +31,44 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.2 2007/10/12 18:57:45 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.3 2007/11/01 20:53:05 dillon Exp $ */ /* - * HAMMER B-Tree index + * HAMMER BH-Tree index * - * rec_type is the record type for a leaf node or an extended record type - * for internal btree nodes and cluster references. + * HAMMER implements a modified B+Tree. In all documentation this will + * simply be refered to as the HAMMER BH-Tree. Basically la BH-Tree + * looks like a B+Tree (A B-Tree which stores its records only at the leafs + * of the tree), but adds two additional boundary elements which describe + * the left-most and right-most element a node is able to represent. In + * otherwords, we have boundary elements at the two ends of a BH-Tree node + * instead of sub-tree pointers. * - * A B-Tree diving down into a new cluster will have two B-Tree elements - * indicating the full sub-range stored in that cluster. These elements - * will match the elements stored in the cluster header. + * A BH-Tree internal node looks like this: + * + * B N N N N N N B <-- boundary and internal elements + * S S S S S S S <-- subtree pointers + * + * A BH-Tree leaf node looks like this: + * + * L L L L L L L L <-- leaf elemenets + * (there is also a previous and next-leaf pointer) + * + * The recursion radix is reduced by 2 relative to a normal B-Tree but + * as a bonus we treat internal nodes completely differently from leaf nodes, + * which allows us to pack in more elements. + * + * The big benefit to using a BH-Tree is that it is possible to cache + * pointers into the middle of the tree and not have to start searches, + * insertions, OR deletions at the root node. In particular, searches are + * able to progress in a definitive direction from any point in the tree + * without revisting nodes. This greatly improves the efficiency of many + * operations, most especially record appends. + * + * BH-Trees also make stacking of trees fairly straightforward. + * + * Most of the structures below are on-disk structures. */ /* @@ -52,6 +78,9 @@ * an inode or a cluster range. A cluster range is special in that the * B-Tree nodes represent a range within the B-Tree inclusive of rec_type * field, so obj_type must be used to detect the cluster range entries. + * + * The special field is used as a subtree pointer for internal nodes, + * and reserved for leaf nodes. */ struct hammer_base_elm { int64_t obj_id; /* 00 object record is associated with */ @@ -62,14 +91,30 @@ struct hammer_base_elm { u_int16_t rec_type; /* 20 */ u_int16_t obj_type; /* 22 (special) */ - int32_t subtree_offset; /* 24 (B+Tree recursion) */ + int32_t reserved07; /* 24 (future) */ /* 28 */ }; +typedef struct hammer_base_elm *hammer_base_elm_t; + +/* + * Internal element - 48 bytes. Internal elements are independantly + * organized. Note that internal elements are a different size than + * leaf elements. + */ +struct hammer_btree_internal_elm { + struct hammer_base_elm base; + int32_t subtree_offset; + int8_t subtree_count; /* hint: can be too small, but not too big */ + int8_t reserved01; + int8_t reserved02; + int8_t reserved03; +}; + /* * Leaf B-Tree element (40 + 16 = 56 bytes) */ -struct hammer_record_elm { +struct hammer_btree_record_elm { struct hammer_base_elm base; int32_t rec_offset; int32_t data_offset; @@ -80,57 +125,172 @@ struct hammer_record_elm { /* * Leaf Cluster-reference B-Tree element (40 + 16 = 56 bytes) */ -struct hammer_cluster_elm { +struct hammer_btree_cluster_elm { struct hammer_base_elm base; int32_t rec_offset; /* cluster recursion record */ u_int32_t verifier; /* low 32 bits of target clu_id */ int32_t vol_no; - int32_t cluster_no; + int32_t clu_no; }; /* - * Rollup btree element types - 56 byte structure + * Rollup btree leaf element types - 56 byte structure */ -union hammer_btree_elm { +union hammer_btree_leaf_elm { struct hammer_base_elm base; - struct hammer_record_elm record; - struct hammer_cluster_elm cluster; + struct hammer_btree_record_elm record; + struct hammer_btree_cluster_elm cluster; }; +typedef union hammer_btree_leaf_elm *hammer_btree_leaf_elm_t; + /* * B-Tree node (normal or meta) - 56 + 56 * 8 = 504 bytes * * 32 B-Tree nodes fit in a 16K filesystem buffer. Remember, the filesystem * buffer has a 128 byte header so (16384 - 128) / 32 = 508, but we want * our structures to be 8-byte aligned so we use 504. + * + * We store B-Tree records as elements in internal nodes, which means we + * must have a separate index of sub-tree pointers. The sub-tree pointers + * interlace the elements (that is, the elements act as separators). Thus + * we have to have one more pointer then we do elements so we can represent + * the gamut - the sub-tree to the left of the first element AND the sub-tree + * to the right of the last element. + * + * subcount represents the number of elements in the subnode (1-8) + * + * 'count' always refers to the number of elements. + */ +#define HAMMER_BTREE_LEAF_ELMS 8 +#define HAMMER_BTREE_INT_ELMS 9 /* +1 more for the right boundary */ + +/* + * It is safe to combine two adjacent nodes if the total number of elements + * is less then or equal to the *_FILL constant. */ -#define HAMMER_BTREE_ELMS 8 +#define HAMMER_BTREE_LEAF_FILL (HAMMER_BTREE_LEAF_ELMS - 3) +#define HAMMER_BTREE_INT_FILL (HAMMER_BTREE_INT_ELMS - 3) + +#define HAMMER_BTREE_INTERNAL_NODE ((u_int32_t)'I') +#define HAMMER_BTREE_LEAF_NODE ((u_int32_t)'L') + +struct hammer_base_node { + int32_t count; + int32_t parent; + u_int8_t type; + u_int8_t subtype; + u_int16_t reserved02; + int32_t reserved03; +}; + +struct hammer_btree_internal_node { + /* + * B-Tree internal node header (24 bytes) + */ + struct hammer_base_node base; + int32_t reserved[2]; -struct hammer_btree_node { /* - * B-Tree node header (56 bytes) + * Internal element array. The subtree fields are logically to + * the right of the element key. These fields are unused and + * must be 0 for the right-hand boundary element. + * + * The left-hand boundary element is at elms[0], the right-hand + * boundary element is at elms[count]. count does not include + * the right-hand boundary (so count at the termination of a + * standard for() loop will point at the right-hand boundary). */ - int32_t count; /* number of elements in B-Tree node */ - int32_t parent; /* parent B-Tree node in current cluster */ - u_int32_t reserved[12]; + struct hammer_btree_internal_elm elms[HAMMER_BTREE_INT_ELMS+1]; +}; +/* + * The hammer leaf node contains a smaller header + */ +struct hammer_btree_leaf_node { /* - * 8 elements making up the B-Tree node 56x8 = 448 bytes + * B-Tree leaf node header */ - union hammer_btree_elm elms[HAMMER_BTREE_ELMS]; + struct hammer_base_node base; + int32_t link_left; + int32_t link_right; + int32_t reserved[8]; + + /* + * Leaf element array + */ + union hammer_btree_leaf_elm elms[HAMMER_BTREE_LEAF_ELMS]; +}; + +union hammer_btree_node { + struct hammer_base_node base; + struct hammer_btree_internal_node internal; + struct hammer_btree_leaf_node leaf; }; +typedef union hammer_btree_node *hammer_btree_node_t; + +/* + * This in-memory structure is used by btree_rebalance_node(). Note + * that internal elements are a different size than record and cluster + * elements. + */ +struct hammer_btree_elm_inmemory { + hammer_btree_node_t owner; + union { + struct hammer_base_elm base; + struct hammer_btree_internal_elm internal; + union hammer_btree_leaf_elm leaf; + } u; +}; + +typedef struct hammer_btree_elm_inmemory *hammer_btree_elm_inmemory_t; + + /* * B-Tree filesystem buffer */ #define HAMMER_BTREE_NODES \ ((HAMMER_BUFSIZE - sizeof(struct hammer_fsbuf_head)) / \ - sizeof(struct hammer_btree_node)) + sizeof(union hammer_btree_node)) struct hammer_fsbuf_btree { struct hammer_fsbuf_head head; char unused[128]; - struct hammer_btree_node nodes[HAMMER_BTREE_NODES]; + union hammer_btree_node nodes[HAMMER_BTREE_NODES]; +}; + +/* + * Key and caching structures used for HAMMER B-Tree lookups. These are + * in-memory structures. + */ +struct hammer_btree_cursor { + hammer_base_elm_t left_bound; + hammer_base_elm_t right_bound; + struct hammer_cluster *cluster; + struct hammer_buffer *parent_buffer; + struct hammer_btree_internal_node *parent; + struct hammer_buffer *node_buffer; + hammer_btree_node_t node; /* internal or leaf node */ + int index; + int parent_index; +}; + +typedef struct hammer_btree_cursor *hammer_btree_cursor_t; + +struct hammer_btree_info { + struct hammer_btree_cursor cursor; + struct hammer_buffer *data_buffer; + struct hammer_buffer *record_buffer; + union hammer_data_ondisk *data; /* returned data pointer */ + union hammer_record_ondisk *rec;/* returned record pointer */ }; +typedef struct hammer_btree_info *hammer_btree_info_t; + +#define HAMMER_BTREE_GET_RECORD 0x0001 +#define HAMMER_BTREE_GET_DATA 0x0002 +#define HAMMER_BTREE_CLUSTER_TAG 0x0004 /* stop at the cluster tag */ +#define HAMMER_BTREE_INSERT 0x0008 /* adjust for insert */ +#define HAMMER_BTREE_DELETE 0x0010 /* adjust for delete */ diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 21804ebb03..63033f3246 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.2 2007/10/16 18:16:42 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.3 2007/11/01 20:53:05 dillon Exp $ */ #ifndef _SYS_UUID_H_ @@ -175,9 +175,8 @@ struct hammer_volume_ondisk { * volume making up a HAMMER filesytem, but only the master volume * contains valid data. */ - int32_t vol0_rootcluster; /* root cluster no (index) in rootvol */ - u_int32_t vol0_reserved02; - u_int32_t vol0_reserved03; + int32_t vol0_root_clu_no; /* root cluster no (index) in rootvol */ + hammer_tid_t vol0_root_clu_id; /* root cluster id */ hammer_tid_t vol0_nexttid; /* next TID */ u_int64_t vol0_recid; /* fs-wide record id allocator */ @@ -198,8 +197,7 @@ struct hammer_volume_ondisk { #define HAMMER_VOLF_VALID 0x0001 /* valid entry */ #define HAMMER_VOLF_OPEN 0x0002 /* volume is open */ -#define HAMMER_VOLF_SUPERCL_ENABLE 0x0004 /* enable supercluster layer */ -#define HAMMER_VOLF_SUPERCL_RESERVE 0x0008 /* supercluster layout */ +#define HAMMER_VOLF_USINGSUPERCL 0x0004 /* using superclusters */ /* * HAMMER Super-cluster header @@ -338,20 +336,20 @@ struct hammer_cluster_ondisk { * Key comparison order: obj_id, rec_type, key, create_tid */ struct hammer_base_record { - int64_t obj_id; /* 00 object record is associated with */ - int64_t key; /* 08 indexing key (offset or namekey) */ - - hammer_tid_t create_tid;/* 10 transaction id for record creation */ - hammer_tid_t delete_tid;/* 18 transaction id for record update/delete */ + /* + * 40 byte base element info - same base as used in B-Tree internal + * and leaf node element arrays. + * + * Fields: obj_id, key, create_tid, delete_tid, rec_type, obj_type, + * reserved07. + */ + struct hammer_base_elm base; /* 00 base element info */ - u_int16_t rec_type; /* 20 type of record */ - u_int16_t obj_type; /* 22 type of object (if inode) */ - u_int32_t data_offset; /* 24 intra-cluster data reference */ - /* An offset of 0 indicates zero-fill */ int32_t data_len; /* 28 size of data (remainder zero-fill) */ u_int32_t data_crc; /* 2C data sanity check */ u_int64_t rec_id; /* 30 record id (iterator for recovery) */ - u_int64_t reserved07; /* 38 */ + int32_t data_offset; /* 38 cluster-relative data reference or 0 */ + u_int32_t reserved07; /* 3C */ /* 40 */ }; @@ -369,6 +367,7 @@ struct hammer_base_record { #define HAMMER_RECTYPE_UNKNOWN 0 #define HAMMER_RECTYPE_INODE 1 /* inode in obj_id space */ #define HAMMER_RECTYPE_PSEUDO_INODE 2 /* pseudo filesysem */ +#define HAMMER_RECTYPE_CLUSTER 3 /* cluster reference */ #define HAMMER_RECTYPE_DATA_CREATE 0x10 #define HAMMER_RECTYPE_DATA_ZEROFILL 0x11 #define HAMMER_RECTYPE_DATA_DELETE 0x12 @@ -392,8 +391,9 @@ struct hammer_base_record { #define HAMMER_OBJTYPE_SOFTLINK 7 #define HAMMER_OBJTYPE_PSEUDOFS 8 /* pseudo filesystem obj */ -#define HAMMER_OBJTYPE_CLUSTER_BEG 0x10 -#define HAMMER_OBJTYPE_CLUSTER_END 0x11 +#define HAMMER_OBJTYPE_CLUSTER_FLAG 0x20 +#define HAMMER_OBJTYPE_CLUSTER_BEG 0x20 +#define HAMMER_OBJTYPE_CLUSTER_END 0x21 /* * Generic full-sized record @@ -560,7 +560,7 @@ struct hammer_inode_data { /* * Rollup various structures embedded as record data */ -union hammer_data { +union hammer_data_ondisk { struct hammer_inode_data inode; }; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c new file mode 100644 index 0000000000..367421811d --- /dev/null +++ b/sys/vfs/hammer/hammer_inode.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ + +#include "hammer.h" +#include +#include + +static enum vtype hammer_get_vnode_type(u_int16_t obj_type); + +int +hammer_vop_inactive(struct vop_inactive_args *ap) +{ + return(0); +} + +int +hammer_vop_reclaim(struct vop_reclaim_args *ap) +{ + struct hammer_mount *hmp; + struct hammer_inode *ip; + struct vnode *vp; + + vp = ap->a_vp; + hmp = (void *)vp->v_mount->mnt_data; + if ((ip = vp->v_data) != NULL) { + ip->vp = NULL; + vp->v_data = NULL; + RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip); + kfree(ip, M_HAMMER); + } + return(0); +} + +/* + * Lookup or create the vnode associated with the specified inode number. + * ino_t in DragonFly is 64 bits which matches the 64 bit HAMMER inode + * number. + */ +int +hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) +{ + struct hammer_mount *hmp = (void *)mp->mnt_data; + struct hammer_btree_info binfo; + struct hammer_inode_info iinfo; + struct hammer_base_elm key; + struct hammer_inode *ip; + struct vnode *vp; + int error; + + /* + * Determine if we already have an inode cached. If we do then + * we are golden. + */ + iinfo.obj_id = ino; + iinfo.obj_asof = 0; +loop: + ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); + if (ip) { + vp = ip->vp; + if (vget(vp, LK_EXCLUSIVE) != 0) + goto loop; + ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, + &iinfo); + if (ip == NULL || ip->vp != vp) { + vput(vp); + goto loop; + } + *vpp = vp; + return(0); + } + + /* + * Lookup failed, instantiate a new vnode and inode in-memory + * structure so we don't block in kmalloc later on when holding + * locked buffer cached buffers. + */ + error = getnewvnode(VT_HAMMER, mp, vpp, 0, 0); + if (error) { + *vpp = NULL; + return (error); + } + vp = *vpp; + ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); + + /* + * If we do not have an inode cached search the HAMMER on-disk B-Tree + * for it. + */ + hammer_btree_info_init(&binfo, hmp->rootcl); + key.obj_id = ino; + key.key = 0; + key.create_tid = 0; + key.delete_tid = 0; + key.rec_type = HAMMER_RECTYPE_INODE; + key.obj_type = 0; + + error = hammer_btree_lookup(&binfo, &key, HAMMER_BTREE_GET_RECORD | + HAMMER_BTREE_GET_DATA); + + /* + * On success the B-Tree lookup will hold the appropriate + * buffer cache buffers and provide a pointer to the requested + * information. Copy the information to the in-memory inode. + */ + if (error == 0) { + ip->ino_rec = binfo.rec->inode; + ip->ino_data = binfo.data->inode; + } + hammer_btree_info_done(&binfo); + + /* + * On success load the inode's record and data and insert the + * inode into the B-Tree. It is possible to race another lookup + * insertion of the same inode so deal with that condition too. + */ + if (error == 0) { + if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { + vp->v_type = VBAD; + vx_put(vp); + kfree(ip, M_HAMMER); + goto loop; + } + ip->vp = vp; + vp->v_data = (void *)ip; + vp->v_type = + hammer_get_vnode_type(ip->ino_rec.base.base.obj_type); + *vpp = vp; + } else { + *vpp = NULL; + } + return (error); +} + +/* + * Convert a HAMMER filesystem object type to a vnode type + */ +static +enum vtype +hammer_get_vnode_type(u_int16_t obj_type) +{ + switch(obj_type) { + case HAMMER_OBJTYPE_DIRECTORY: + return(VDIR); + case HAMMER_OBJTYPE_REGFILE: + return(VREG); + case HAMMER_OBJTYPE_DBFILE: + return(VDATABASE); + case HAMMER_OBJTYPE_FIFO: + return(VFIFO); + case HAMMER_OBJTYPE_CDEV: + return(VCHR); + case HAMMER_OBJTYPE_BDEV: + return(VBLK); + case HAMMER_OBJTYPE_SOFTLINK: + return(VLNK); + default: + return(VBAD); + } + /* not reached */ +} + +/* + * Access the filesystem buffer containing the cluster-relative byte + * offset, validate the buffer type, load *bufferp and return a + * pointer to the requested data. + * + * If buf_type is 0 the buffer is assumed to be a pure-data buffer and + * no type or crc check is performed. + * + * XXX add a flag for the buffer type and check the CRC here XXX + */ +void * +hammer_bread(struct hammer_cluster *cluster, int32_t cloff, + u_int64_t buf_type, + int *errorp, struct hammer_buffer **bufferp) +{ + struct hammer_buffer *buffer; + int32_t buf_no; + int32_t buf_off; + + /* + * Load the correct filesystem buffer, replacing *bufferp. + */ + buf_no = cloff / HAMMER_BUFSIZE; + buffer = *bufferp; + if (buffer == NULL || buffer->cluster != cluster || + buffer->buf_no != buf_no) { + if (buffer) + hammer_put_buffer(buffer); + buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); + *bufferp = buffer; + if (buffer == NULL) + return(NULL); + } + + /* + * Validate the buffer type and crc XXX + */ + buf_off = cloff & HAMMER_BUFMASK; + if (buf_type) { + if (buf_type != buffer->ondisk->head.buf_type) { + *errorp = EIO; + return(NULL); + } + if (buf_off < sizeof(buffer->ondisk->head)) { + *errorp = EIO; + return(NULL); + } + /* XXX crc */ + } + + /* + * Return a pointer to the buffer data. + */ + *errorp = 0; + return((char *)buffer->ondisk + buf_off); +} + diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c new file mode 100644 index 0000000000..d2069320f6 --- /dev/null +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -0,0 +1,1417 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ +/* + * Manage HAMMER's on-disk structures. These routines are primarily + * responsible for interfacing with the kernel's I/O subsystem and for + * managing in-memory structures. + */ + +#include "hammer.h" +#include +#include +#include +#include + +static void hammer_free_volume(struct hammer_volume *volume); +static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, + u_int64_t type); +static void alloc_new_buffer(struct hammer_cluster *cluster, + hammer_alist_t live, u_int64_t type, int32_t nelements, + int32_t start, + int *errorp, struct hammer_buffer **bufferp); +#if 0 +static void readhammerbuf(struct hammer_volume *vol, void *data, + int64_t offset); +static void writehammerbuf(struct hammer_volume *vol, const void *data, + int64_t offset); +#endif +static int64_t calculate_cluster_offset(struct hammer_volume *vol, + int32_t clu_no); +static int64_t calculate_supercl_offset(struct hammer_volume *vol, + int32_t scl_no); + + +struct hammer_alist_config Buf_alist_config; +struct hammer_alist_config Vol_normal_alist_config; +struct hammer_alist_config Vol_super_alist_config; +struct hammer_alist_config Supercl_alist_config; +struct hammer_alist_config Clu_master_alist_config; +struct hammer_alist_config Clu_slave_alist_config; + +/* + * Red-Black tree support for various structures + */ +static int +hammer_ino_rb_compare(struct hammer_inode *ip1, struct hammer_inode *ip2) +{ + if (ip1->obj_id < ip2->obj_id) + return(-1); + if (ip1->obj_id > ip2->obj_id) + return(1); + if (ip1->obj_asof < ip2->obj_asof) + return(-1); + if (ip1->obj_asof > ip2->obj_asof) + return(1); + return(0); +} + +static int +hammer_inode_info_cmp(hammer_inode_info_t info, struct hammer_inode *ip) +{ + if (info->obj_id < ip->obj_id) + return(-1); + if (info->obj_id > ip->obj_id) + return(1); + if (info->obj_asof < ip->obj_asof) + return(-1); + if (info->obj_asof > ip->obj_asof) + return(1); + return(0); +} + +static int +hammer_vol_rb_compare(struct hammer_volume *vol1, struct hammer_volume *vol2) +{ + if (vol1->vol_no < vol2->vol_no) + return(-1); + if (vol1->vol_no > vol2->vol_no) + return(1); + return(0); +} + +static int +hammer_scl_rb_compare(struct hammer_supercl *cl1, struct hammer_supercl *cl2) +{ + if (cl1->scl_no < cl2->scl_no) + return(-1); + if (cl1->scl_no > cl2->scl_no) + return(1); + return(0); +} + +static int +hammer_clu_rb_compare(struct hammer_cluster *cl1, struct hammer_cluster *cl2) +{ + if (cl1->clu_no < cl2->clu_no) + return(-1); + if (cl1->clu_no > cl2->clu_no) + return(1); + return(0); +} + +static int +hammer_buf_rb_compare(struct hammer_buffer *buf1, struct hammer_buffer *buf2) +{ + if (buf1->buf_no < buf2->buf_no) + return(-1); + if (buf1->buf_no > buf2->buf_no) + return(1); + return(0); +} + +/* + * Note: The lookup function for hammer_ino_rb_tree winds up being named + * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup + * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no). + */ +RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); +RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, + hammer_inode_info_cmp, hammer_inode_info_t); +RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node, + hammer_vol_rb_compare, int32_t, vol_no); +RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node, + hammer_scl_rb_compare, int32_t, scl_no); +RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node, + hammer_clu_rb_compare, int32_t, clu_no); +RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node, + hammer_buf_rb_compare, int32_t, buf_no); + +/* + * Load a HAMMER volume by name. Returns 0 on success or a positive error + * code on failure. Volumes must be loaded at mount time, get_volume() will + * not load a new volume. + * + * Calls made to hammer_load_volume() or single-threaded + */ +int +hammer_load_volume(struct hammer_mount *hmp, const char *volname) +{ + struct mount *mp; + struct hammer_volume *volume; + struct hammer_volume_ondisk *ondisk; + struct nlookupdata nd; + struct buf *bp = NULL; + int error; + int ronly; + + mp = hmp->mp; + ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0); + + /* + * Allocate a volume structure + */ + volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO); + volume->vol_name = kstrdup(volname, M_HAMMER); + volume->hmp = hmp; + + /* + * Get the device vnode + */ + error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW); + if (error == 0) + error = nlookup(&nd); + if (error == 0) + error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp); + nlookup_done(&nd); + if (error == 0) { + vn_isdisk(volume->devvp, &error); + } + if (error == 0) { + vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE), + FSCRED, NULL); + vn_unlock(volume->devvp); + } + if (error) { + hammer_free_volume(volume); + return(error); + } + + /* + * Extract the volume number from the volume header and do various + * sanity checks. + */ + error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp); + if (error) + goto late_failure; + ondisk = (void *)bp->b_data; + if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) { + kprintf("hammer_mount: volume %s has an invalid header\n", + volume->vol_name); + error = EFTYPE; + goto late_failure; + } + volume->vol_no = ondisk->vol_no; + volume->cluster_base = ondisk->vol_beg; + volume->vol_clsize = ondisk->vol_clsize; + volume->vol_flags = ondisk->vol_flags; + RB_INIT(&volume->rb_clus_root); + RB_INIT(&volume->rb_scls_root); + + if (RB_EMPTY(&hmp->rb_vols_root)) { + hmp->fsid = ondisk->vol_fsid; + } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) { + kprintf("hammer_mount: volume %s's fsid does not match " + "other volumes\n", volume->vol_name); + error = EFTYPE; + goto late_failure; + } + + /* + * Insert the volume structure into the red-black tree. + */ + if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) { + kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n", + volume->vol_name, volume->vol_no); + error = EEXIST; + } + + /* + * Set the root volume and load the root cluster. + */ + if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) { + hmp->rootvol = volume; + hmp->rootcl = hammer_get_cluster(volume, + ondisk->vol0_root_clu_no, + &error, 0); + } +late_failure: + if (bp) + brelse(bp); + if (error) { + /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/ + VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE); + hammer_free_volume(volume); + } + return (error); +} + +/* + * Unload and free a HAMMER volume. Must return >= 0 to continue scan + * so returns -1 on failure. + */ +int +hammer_unload_volume(struct hammer_volume *volume, void *data __unused) +{ + struct hammer_mount *hmp = volume->hmp; + + /* + * Sync clusters, sync volume + */ + /* flush_volume */ + + /* + * Destroy the structure + */ + RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume); + hammer_free_volume(volume); + return(0); +} + +static +void +hammer_free_volume(struct hammer_volume *volume) +{ + if (volume->vol_name) { + kfree(volume->vol_name, M_HAMMER); + volume->vol_name = NULL; + } + if (volume->devvp) { + vrele(volume->devvp); + volume->devvp = NULL; + } + kfree(volume, M_HAMMER); +} + +/* + * Get a HAMMER volume. The volume must already exist. + */ +struct hammer_volume * +hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp) +{ + struct hammer_volume *volume; + struct hammer_volume_ondisk *ondisk; + + /* + * Locate the volume structure + */ + volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no); + if (volume == NULL) { + *errorp = ENOENT; + return(NULL); + } + + /* + * Load the ondisk buffer if necessary + */ + hammer_lock(&volume->lock); + if (volume->ondisk == NULL) { + *errorp = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, + &volume->bp); + if (*errorp) { + hammer_unlock(&volume->lock); + return(NULL); + } + volume->ondisk = ondisk = (void *)volume->bp->b_data; + + /* + * Configure the volume's A-lists. These are used to + * allocate clusters. + */ + if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) { + volume->alist.config = &Vol_super_alist_config; + volume->alist.meta = ondisk->vol_almeta.super; + volume->alist.info = volume; + } else { + volume->alist.config = &Vol_normal_alist_config; + volume->alist.meta = ondisk->vol_almeta.normal; + volume->alist.info = NULL; + } + hammer_alist_init(&volume->alist); + } + *errorp = 0; + return(volume); +} + +void +hammer_put_volume(struct hammer_volume *volume) +{ + if (hammer_islastref(&volume->lock)) { + if (volume->bp) { + if (volume->modified) { + bdwrite(volume->bp); + } else { + brelse(volume->bp); + } + volume->bp = NULL; + volume->ondisk = NULL; + } + volume->modified = 0; + } + hammer_unlock(&volume->lock); +} + +struct hammer_supercl * +hammer_get_supercl(struct hammer_volume *volume, int32_t scl_no, + int *errorp, int isnew) +{ + struct hammer_supercl_ondisk *ondisk; + struct hammer_supercl *supercl; + + /* + * Locate and lock the super-cluster structure, creating one + * if necessary. + */ +again: + supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no); + if (supercl == NULL) { + supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO); + supercl->scl_no = scl_no; + supercl->volume = volume; + supercl->scl_offset = calculate_supercl_offset(volume, scl_no); + hammer_lock(&supercl->lock); + + /* + * Insert the cluster into the RB tree and handle late + * collisions. + */ + if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) { + hammer_unlock(&supercl->lock); + kfree(supercl, M_HAMMER); + goto again; + } + } else { + hammer_lock(&supercl->lock); + } + + /* + * Load the cluster's on-disk info + */ + *errorp = 0; + if (supercl->ondisk == NULL) { + if (isnew) { + supercl->bp = getblk(volume->devvp, supercl->scl_offset, + HAMMER_BUFSIZE, 0, 0); + vfs_bio_clrbuf(supercl->bp); + supercl->modified = 1; + } else { + *errorp = bread(volume->devvp, supercl->scl_offset, + HAMMER_BUFSIZE, &supercl->bp); + } + if (*errorp) { + hammer_unlock(&supercl->lock); + return(NULL); + } + supercl->ondisk = ondisk = (void *)supercl->bp->b_data; + + supercl->alist.config = &Supercl_alist_config; + supercl->alist.meta = ondisk->scl_meta; + supercl->alist.info = NULL; + + /* + * If this is a new super-cluster we have to initialize + * various ondisk structural elements. The caller is + * responsible for the remainder. + */ + if (isnew) { + struct hammer_alist_live dummy; + + dummy.config = &Buf_alist_config; + dummy.meta = ondisk->head.buf_almeta; + dummy.info = NULL; + initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL); + hammer_alist_init(&supercl->alist); + } + } else if (isnew) { + vfs_bio_clrbuf(supercl->bp); + supercl->modified = 1; + } + return (supercl); +} + +void +hammer_put_supercl(struct hammer_supercl *supercl) +{ + if (hammer_islastref(&supercl->lock)) { + if (supercl->bp) { + if (supercl->modified) { + bdwrite(supercl->bp); + } else { + brelse(supercl->bp); + } + supercl->bp = NULL; + supercl->ondisk = NULL; + } + supercl->modified = 0; + } + hammer_unlock(&supercl->lock); +} + +struct hammer_cluster * +hammer_get_cluster(struct hammer_volume *volume, int32_t clu_no, + int *errorp, int isnew) +{ + struct hammer_cluster_ondisk *ondisk; + struct hammer_cluster *cluster; + + /* + * Locate and lock the cluster structure, creating one if necessary. + */ +again: + cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no); + if (cluster == NULL) { + cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO); + cluster->clu_no = clu_no; + cluster->volume = volume; + cluster->clu_offset = calculate_cluster_offset(volume, clu_no); + RB_INIT(&cluster->rb_bufs_root); + hammer_lock(&cluster->lock); + + /* + * Insert the cluster into the RB tree and handle late + * collisions. + */ + if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) { + hammer_unlock(&cluster->lock); + kfree(cluster, M_HAMMER); + goto again; + } + } else { + hammer_lock(&cluster->lock); + } + + /* + * Load the cluster's on-disk info + */ + *errorp = 0; + if (cluster->ondisk == NULL) { + if (isnew) { + cluster->bp = getblk(volume->devvp, cluster->clu_offset, + HAMMER_BUFSIZE, 0, 0); + vfs_bio_clrbuf(cluster->bp); + cluster->modified = 1; + } else { + *errorp = bread(volume->devvp, cluster->clu_offset, + HAMMER_BUFSIZE, &cluster->bp); + } + if (*errorp) { + hammer_unlock(&cluster->lock); + return(NULL); + } + cluster->ondisk = ondisk = (void *)cluster->bp->b_data; + + cluster->alist_master.config = &Clu_master_alist_config; + cluster->alist_master.meta = ondisk->clu_master_meta; + cluster->alist_btree.config = &Clu_slave_alist_config; + cluster->alist_btree.meta = ondisk->clu_btree_meta; + cluster->alist_btree.info = cluster; + cluster->alist_record.config = &Clu_slave_alist_config; + cluster->alist_record.meta = ondisk->clu_record_meta; + cluster->alist_record.info = cluster; + cluster->alist_mdata.config = &Clu_slave_alist_config; + cluster->alist_mdata.meta = ondisk->clu_mdata_meta; + cluster->alist_mdata.info = cluster; + + /* + * If this is a new cluster we have to initialize + * various ondisk structural elements. The caller is + * responsible for the remainder. + */ + if (isnew) { + struct hammer_alist_live dummy; + + dummy.config = &Buf_alist_config; + dummy.meta = ondisk->head.buf_almeta; + dummy.info = NULL; + initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER); + + hammer_alist_init(&cluster->alist_master); + hammer_alist_init(&cluster->alist_btree); + hammer_alist_init(&cluster->alist_record); + hammer_alist_init(&cluster->alist_mdata); + } + } else if (isnew) { + vfs_bio_clrbuf(cluster->bp); + cluster->modified = 1; + } + return(cluster); +} + +void +hammer_put_cluster(struct hammer_cluster *cluster) +{ + if (hammer_islastref(&cluster->lock)) { + if (cluster->bp) { + if (cluster->modified) { + bdwrite(cluster->bp); + } else { + brelse(cluster->bp); + } + cluster->bp = NULL; + cluster->ondisk = NULL; + } + cluster->modified = 0; + } + hammer_unlock(&cluster->lock); +} + +/* + * Get a buffer from a cluster. Note that buffer #0 is the cluster header + * itself and may not be retrieved with this function. + * + * If buf_type is 0 the buffer already exists in-memory or on-disk. + * Otherwise a new buffer is initialized with the specified buffer type. + */ +struct hammer_buffer * +hammer_get_buffer(struct hammer_cluster *cluster, int32_t buf_no, + int64_t buf_type, int *errorp) +{ + hammer_fsbuf_ondisk_t ondisk; + struct hammer_buffer *buffer; + + /* + * Find the buffer. Note that buffer 0 corresponds to the cluster + * header and should never be requested. + */ + KKASSERT(buf_no != 0); + + /* + * Locate and lock the buffer structure, creating one if necessary. + */ +again: + buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no); + if (buffer == NULL) { + buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO); + buffer->buf_no = buf_no; + buffer->cluster = cluster; + buffer->volume = cluster->volume; + buffer->buf_offset = cluster->clu_offset + + (buf_no * HAMMER_BUFSIZE); + hammer_lock(&cluster->lock); + + /* + * Insert the cluster into the RB tree and handle late + * collisions. + */ + if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) { + hammer_unlock(&buffer->lock); + kfree(buffer, M_HAMMER); + goto again; + } + } else { + hammer_lock(&buffer->lock); + } + + *errorp = 0; + if (buffer->ondisk == NULL) { + if (buf_type) { + buffer->bp = getblk(buffer->volume->devvp, + buffer->buf_offset, + HAMMER_BUFSIZE, 0, 0); + vfs_bio_clrbuf(buffer->bp); + buffer->modified = 1; + } else { + *errorp = bread(buffer->volume->devvp, + buffer->buf_offset, + HAMMER_BUFSIZE, &buffer->bp); + } + if (*errorp) { + hammer_unlock(&buffer->lock); + return(NULL); + } + buffer->ondisk = ondisk = (void *)buffer->bp->b_data; + buffer->alist.config = &Buf_alist_config; + buffer->alist.meta = ondisk->head.buf_almeta; + + if (buf_type) { + initbuffer(&buffer->alist, &ondisk->head, buf_type); + } + } else if (buf_type) { + vfs_bio_clrbuf(buffer->bp); + buffer->modified = 1; + } + return (buffer); +} + +void +hammer_put_buffer(struct hammer_buffer *buffer) +{ + if (hammer_islastref(&buffer->lock)) { + if (buffer->bp) { + if (buffer->modified) { + bdwrite(buffer->bp); + } else { + brelse(buffer->bp); + } + buffer->bp = NULL; + buffer->ondisk = NULL; + } + buffer->modified = 0; + } + hammer_unlock(&buffer->lock); +} + +void +hammer_dup_buffer(struct hammer_buffer **bufferp, struct hammer_buffer *buffer) +{ + if (buffer != *bufferp) { + if (buffer) + hammer_lock(&buffer->lock); + if (*bufferp) + hammer_put_buffer(*bufferp); + *bufferp = buffer; + } +} + +void +hammer_dup_cluster(struct hammer_cluster **clusterp, + struct hammer_cluster *cluster) +{ + if (cluster != *clusterp) { + if (cluster) + hammer_lock(&cluster->lock); + if (*clusterp) + hammer_put_cluster(*clusterp); + *clusterp = cluster; + } +} + +/* + * Allocate HAMMER elements - btree nodes, data storage, and record elements + * + * The passed *bufferp should be initialized to NULL. On successive calls + * *bufferp caches the most recent buffer used until put away by the caller. + * Note that previously returned pointers using the cached buffer become + * invalid on successive calls which reuse *bufferp. + * + * All allocations first attempt to use the block found at the specified + * iterator. If that fails the first available block is used. If that + * fails a new buffer is allocated and associated with the buffer type + * A-list and the element is allocated out of the new buffer. + */ +void * +hammer_alloc_btree(struct hammer_cluster *cluster, + int *errorp, struct hammer_buffer **bufferp) +{ + struct hammer_buffer *buffer; + hammer_alist_t live; + int32_t elm_no; + int32_t buf_no; + void *item; + + /* + * Allocate a B-Tree element + */ + live = &cluster->alist_btree; + elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) + elm_no = hammer_alist_alloc_fwd(live, 1, 0); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) { + alloc_new_buffer(cluster, live, + HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES, + cluster->ondisk->idx_index, errorp, bufferp); + elm_no = hammer_alist_alloc(live, 1); + KKASSERT(elm_no != HAMMER_ALIST_BLOCK_NONE); + } + cluster->ondisk->idx_index = elm_no; + + /* + * Load and return the B-Tree element + */ + buf_no = elm_no / HAMMER_FSBUF_MAXBLKS; + buffer = *bufferp; + if (buffer == NULL || buffer->cluster != cluster || + buffer->buf_no != buf_no) { + if (buffer) + hammer_put_buffer(buffer); + buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); + *bufferp = buffer; + } + KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE); + item = &buffer->ondisk->btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]; + bzero(item, sizeof(union hammer_btree_node)); + return(item); +} + +void * +hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes, + int *errorp, struct hammer_buffer **bufferp) +{ + struct hammer_buffer *buffer; + hammer_alist_t live; + int32_t elm_no; + int32_t buf_no; + int32_t nblks; + void *item; + + /* + * Allocate a data element + */ + nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; + live = &cluster->alist_mdata; + elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) + elm_no = hammer_alist_alloc_fwd(live, 1, 0); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) { + alloc_new_buffer(cluster, live, + HAMMER_FSBUF_DATA, HAMMER_DATA_NODES, + cluster->ondisk->idx_data, errorp, bufferp); + elm_no = hammer_alist_alloc(live, nblks); + KKASSERT(elm_no != HAMMER_ALIST_BLOCK_NONE); + } + cluster->ondisk->idx_index = elm_no; + + /* + * Load and return the B-Tree element + */ + buf_no = elm_no / HAMMER_FSBUF_MAXBLKS; + buffer = *bufferp; + if (buffer == NULL || buffer->cluster != cluster || + buffer->buf_no != buf_no) { + if (buffer) + hammer_put_buffer(buffer); + buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); + *bufferp = buffer; + } + KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE); + item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK]; + bzero(item, nblks * HAMMER_DATA_BLKSIZE); + return(item); +} + +void * +hammer_alloc_record(struct hammer_cluster *cluster, + int *errorp, struct hammer_buffer **bufferp) +{ + struct hammer_buffer *buffer; + hammer_alist_t live; + int32_t elm_no; + int32_t buf_no; + void *item; + + /* + * Allocate a record element + */ + live = &cluster->alist_record; + elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) + elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX); + if (elm_no == HAMMER_ALIST_BLOCK_NONE) { + alloc_new_buffer(cluster, live, + HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES, + cluster->ondisk->idx_record, errorp, bufferp); + elm_no = hammer_alist_alloc(live, 1); + KKASSERT(elm_no != HAMMER_ALIST_BLOCK_NONE); + } + cluster->ondisk->idx_record = elm_no; + + /* + * Load and return the B-Tree element + */ + buf_no = elm_no / HAMMER_FSBUF_MAXBLKS; + buffer = *bufferp; + if (buffer == NULL || buffer->cluster != cluster || + buffer->buf_no != buf_no) { + if (buffer) + hammer_put_buffer(buffer); + buffer = hammer_get_buffer(cluster, buf_no, 0, errorp); + *bufferp = buffer; + } + KKASSERT(buffer->ondisk->head.buf_type != 0); + item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK]; + bzero(item, sizeof(union hammer_record_ondisk)); + return(item); +} + +/* + * Free HAMMER elements based on either a hammer_buffer and element pointer + * or a cluster-relative byte offset. + */ +void +hammer_free_btree_ptr(struct hammer_buffer *buffer, hammer_btree_node_t node) +{ + int32_t elm_no; + hammer_alist_t live; + + elm_no = node - buffer->ondisk->btree.nodes; + KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES); + elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; + live = &buffer->cluster->alist_btree; + hammer_alist_free(live, elm_no, 1); +} + +void +hammer_free_data_ptr(struct hammer_buffer *buffer, void *data, int bytes) +{ + int32_t elm_no; + int32_t nblks; + hammer_alist_t live; + + elm_no = ((char *)data - (char *)buffer->ondisk->data.data) / + HAMMER_DATA_BLKSIZE; + KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES); + elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; + nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; + live = &buffer->cluster->alist_mdata; + hammer_alist_free(live, elm_no, nblks); +} + +void +hammer_free_record_ptr(struct hammer_buffer *buffer, + union hammer_record_ondisk *rec) +{ + int32_t elm_no; + hammer_alist_t live; + + elm_no = rec - &buffer->ondisk->record.recs[0]; + KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES); + elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; + live = &buffer->cluster->alist_record; + hammer_alist_free(live, elm_no, 1); +} + +void +hammer_free_btree(struct hammer_cluster *cluster, int32_t bclu_offset) +{ + const int32_t blksize = sizeof(union hammer_btree_node); + int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK; + hammer_alist_t live; + int32_t elm_no; + + elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS; + fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]); + live = &cluster->alist_btree; + KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); + elm_no += fsbuf_offset / blksize; + hammer_alist_free(live, elm_no, 1); +} + +void +hammer_free_data(struct hammer_cluster *cluster, int32_t bclu_offset, + int32_t bytes) +{ + const int32_t blksize = HAMMER_DATA_BLKSIZE; + int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK; + hammer_alist_t live; + int32_t elm_no; + int32_t nblks; + + elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS; + fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]); + live = &cluster->alist_mdata; + nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; + KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); + elm_no += fsbuf_offset / blksize; + hammer_alist_free(live, elm_no, nblks); +} + +void +hammer_free_record(struct hammer_cluster *cluster, int32_t bclu_offset) +{ + const int32_t blksize = sizeof(union hammer_record_ondisk); + int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK; + hammer_alist_t live; + int32_t elm_no; + + elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS; + fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]); + live = &cluster->alist_record; + KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); + elm_no += fsbuf_offset / blksize; + hammer_alist_free(live, elm_no, 1); +} + + +/* + * Allocate a new filesystem buffer and assign it to the specified + * filesystem buffer type. The new buffer will be added to the + * type-specific A-list and initialized. + */ +static void +alloc_new_buffer(struct hammer_cluster *cluster, hammer_alist_t live, + u_int64_t type, int32_t nelements, + int start, int *errorp, struct hammer_buffer **bufferp) +{ + struct hammer_buffer *buffer; + int32_t buf_no; + + start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */ + + if (type == HAMMER_FSBUF_RECORDS) { + buf_no = hammer_alist_alloc_rev(&cluster->alist_master, + 1, start); + if (buf_no == HAMMER_ALIST_BLOCK_NONE) { + buf_no = hammer_alist_alloc_rev(&cluster->alist_master, + 1, HAMMER_ALIST_BLOCK_MAX); + } + } else { + buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, + 1, start); + if (buf_no == HAMMER_ALIST_BLOCK_NONE) { + buf_no = hammer_alist_alloc_fwd(&cluster->alist_master, + 1, 0); + } + } + KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */ + + /* + * The new buffer must be initialized (type != 0) regardless of + * whether we already have it cached or not, so don't try to + * optimize the cached buffer check. Just call hammer_get_buffer(). + */ + buffer = hammer_get_buffer(cluster, buf_no, type, errorp); + if (*bufferp) + hammer_put_buffer(*bufferp); + *bufferp = buffer; +} + +#if 0 + +/* + * Flush various tracking structures to disk + */ + +/* + * Flush various tracking structures to disk + */ +void +flush_all_volumes(void) +{ + struct hammer_volume *vol; + + for (vol = VolBase; vol; vol = vol->next) + flush_volume(vol); +} + +void +flush_volume(struct hammer_volume *vol) +{ + struct hammer_supercl *supercl; + struct hammer_cluster *cl; + + for (supercl = vol->supercl_base; supercl; supercl = supercl->next) + flush_supercl(supercl); + for (cl = vol->cluster_base; cl; cl = cl->next) + flush_cluster(cl); + writehammerbuf(vol, vol->ondisk, 0); +} + +void +flush_supercl(struct hammer_supercl *supercl) +{ + int64_t supercl_offset; + + supercl_offset = supercl->scl_offset; + writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset); +} + +void +flush_cluster(struct hammer_cluster *cl) +{ + struct hammer_buffer *buf; + int64_t cluster_offset; + + for (buf = cl->buffer_base; buf; buf = buf->next) + flush_buffer(buf); + cluster_offset = cl->clu_offset; + writehammerbuf(cl->volume, cl->ondisk, cluster_offset); +} + +void +flush_buffer(struct hammer_buffer *buf) +{ + int64_t buffer_offset; + + buffer_offset = buf->buf_offset + buf->cluster->clu_offset; + writehammerbuf(buf->volume, buf->ondisk, buffer_offset); +} + +#endif + +/* + * Generic buffer initialization + */ +static void +initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type) +{ + head->buf_type = type; + hammer_alist_init(live); +} + +#if 0 +/* + * Core I/O operations + */ +static void +readhammerbuf(struct hammer_volume *vol, void *data, int64_t offset) +{ + ssize_t n; + + n = pread(vol->fd, data, HAMMER_BUFSIZE, offset); + if (n != HAMMER_BUFSIZE) + err(1, "Read volume %d (%s)", vol->vol_no, vol->name); +} + +static void +writehammerbuf(struct hammer_volume *vol, const void *data, int64_t offset) +{ + ssize_t n; + + n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset); + if (n != HAMMER_BUFSIZE) + err(1, "Write volume %d (%s)", vol->vol_no, vol->name); +} + +#endif + +/* + * Calculate the cluster's offset in the volume. This calculation is + * slightly more complex when using superclusters because superclusters + * are grouped in blocks of 16, followed by 16 x N clusters where N + * is the number of clusters a supercluster can manage. + */ +static int64_t +calculate_cluster_offset(struct hammer_volume *volume, int32_t clu_no) +{ + int32_t scl_group; + int64_t scl_group_size; + int64_t off; + + if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) { + scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP / + HAMMER_SCL_MAXCLUSTERS; + scl_group_size = + ((int64_t)HAMMER_BUFSIZE * + HAMMER_VOL_SUPERCLUSTER_GROUP) + + ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP * + volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS); + scl_group_size += + HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE; + + off = volume->cluster_base + + scl_group * scl_group_size + + (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) + + ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS * + HAMMER_VOL_SUPERCLUSTER_GROUP)) + * HAMMER_BUFSIZE; + } else { + off = volume->cluster_base + + (int64_t)clu_no * volume->vol_clsize; + } + return(off); +} + +/* + * Calculate a super-cluster's offset in the volume. + */ +static int64_t +calculate_supercl_offset(struct hammer_volume *volume, int32_t scl_no) +{ + int64_t off; + int32_t scl_group; + int64_t scl_group_size; + + KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL); + scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP; + if (scl_group) { + scl_group_size = + ((int64_t)HAMMER_BUFSIZE * + HAMMER_VOL_SUPERCLUSTER_GROUP) + + ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP * + volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS); + scl_group_size += + HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE; + off = volume->cluster_base + (scl_group * scl_group_size) + + (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE; + } else { + off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE); + } + return(off); +} + +/* + * A-LIST SUPPORT + * + * Setup the parameters for the various A-lists we use in hammer. The + * supercluster A-list must be chained to the cluster A-list and cluster + * slave A-lists are chained to buffer A-lists. + * + * See hammer_init_alist_config() below. + */ + +/* + * A-LIST - cluster recursion into a filesystem buffer + */ +static int +buffer_alist_init(void *info, int32_t blk, int32_t radix) +{ + struct hammer_cluster *cluster = info; + struct hammer_buffer *buffer; + int32_t buf_no; + int error = 0; + + /* + * Calculate the buffer number, initialize based on the buffer type. + * The buffer has already been allocated so assert that it has been + * initialized. + */ + buf_no = blk / HAMMER_FSBUF_MAXBLKS; + buffer = hammer_get_buffer(cluster, buf_no, 0, &error); + if (buffer) + hammer_put_buffer(buffer); + return (error); +} + +static int +buffer_alist_destroy(void *info, int32_t blk, int32_t radix) +{ + return (0); +} + +static int +buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, + int32_t count, int32_t atblk, int32_t *fullp) +{ + struct hammer_cluster *cluster = info; + struct hammer_buffer *buffer; + int32_t buf_no; + int32_t r; + int error = 0; + + buf_no = blk / HAMMER_FSBUF_MAXBLKS; + buffer = hammer_get_buffer(cluster, buf_no, 0, &error); + if (buffer) { + KKASSERT(buffer->ondisk->head.buf_type != 0); + + r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk); + if (r != HAMMER_ALIST_BLOCK_NONE) + r += blk; + *fullp = hammer_alist_isfull(&buffer->alist); + hammer_put_buffer(buffer); + } else { + r = HAMMER_ALIST_BLOCK_NONE; + } + return(r); +} + +static int +buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix, + int32_t count, int32_t atblk, int32_t *fullp) +{ + struct hammer_cluster *cluster = info; + struct hammer_buffer *buffer; + int32_t buf_no; + int32_t r; + int error = 0; + + buf_no = blk / HAMMER_FSBUF_MAXBLKS; + buffer = hammer_get_buffer(cluster, buf_no, 0, &error); + if (buffer) { + KKASSERT(buffer->ondisk->head.buf_type != 0); + + r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk); + if (r != HAMMER_ALIST_BLOCK_NONE) + r += blk; + *fullp = hammer_alist_isfull(&buffer->alist); + hammer_put_buffer(buffer); + } else { + r = HAMMER_ALIST_BLOCK_NONE; + *fullp = 0; + } + return(r); +} + +static void +buffer_alist_free(void *info, int32_t blk, int32_t radix, + int32_t base_blk, int32_t count, int32_t *emptyp) +{ + struct hammer_cluster *cluster = info; + struct hammer_buffer *buffer; + int32_t buf_no; + int error = 0; + + buf_no = blk / HAMMER_FSBUF_MAXBLKS; + buffer = hammer_get_buffer(cluster, buf_no, 0, &error); + if (buffer) { + KKASSERT(buffer->ondisk->head.buf_type != 0); + hammer_alist_free(&buffer->alist, base_blk, count); + *emptyp = hammer_alist_isempty(&buffer->alist); + hammer_put_buffer(buffer); + } else { + *emptyp = 0; + } +} + +static void +buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab) +{ +} + +/* + * A-LIST - super-cluster recursion into a cluster and cluster recursion + * into a filesystem buffer. A-List's are mostly self-contained entities, + * but callbacks must be installed to recurse from one A-List to another. + * + * Implementing these callbacks allows us to operate a multi-layered A-List + * as a single entity. + */ +static int +super_alist_init(void *info, int32_t blk, int32_t radix) +{ + struct hammer_volume *volume = info; + struct hammer_supercl *supercl; + int32_t scl_no; + int error = 0; + + /* + * Calculate the super-cluster number containing the cluster (blk) + * and obtain the super-cluster buffer. + */ + scl_no = blk / HAMMER_SCL_MAXCLUSTERS; + supercl = hammer_get_supercl(volume, scl_no, &error, 1); + if (supercl) + hammer_put_supercl(supercl); + return (error); +} + +static int +super_alist_destroy(void *info, int32_t blk, int32_t radix) +{ + return(0); +} + +static int +super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, + int32_t count, int32_t atblk, int32_t *fullp) +{ + struct hammer_volume *volume = info; + struct hammer_supercl *supercl; + int32_t scl_no; + int32_t r; + int error = 0; + + scl_no = blk / HAMMER_SCL_MAXCLUSTERS; + supercl = hammer_get_supercl(volume, scl_no, &error, 1); + if (supercl) { + r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk); + if (r != HAMMER_ALIST_BLOCK_NONE) + r += blk; + *fullp = hammer_alist_isfull(&supercl->alist); + hammer_put_supercl(supercl); + } else { + r = HAMMER_ALIST_BLOCK_NONE; + *fullp = 0; + } + return(r); +} + +static int +super_alist_alloc_rev(void *info, int32_t blk, int32_t radix, + int32_t count, int32_t atblk, int32_t *fullp) +{ + struct hammer_volume *volume = info; + struct hammer_supercl *supercl; + int32_t scl_no; + int32_t r; + int error = 0; + + scl_no = blk / HAMMER_SCL_MAXCLUSTERS; + supercl = hammer_get_supercl(volume, scl_no, &error, 1); + if (supercl) { + r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk); + if (r != HAMMER_ALIST_BLOCK_NONE) + r += blk; + *fullp = hammer_alist_isfull(&supercl->alist); + hammer_put_supercl(supercl); + } else { + r = HAMMER_ALIST_BLOCK_NONE; + *fullp = 0; + } + return(r); +} + +static void +super_alist_free(void *info, int32_t blk, int32_t radix, + int32_t base_blk, int32_t count, int32_t *emptyp) +{ + struct hammer_volume *volume = info; + struct hammer_supercl *supercl; + int32_t scl_no; + int error = 0; + + scl_no = blk / HAMMER_SCL_MAXCLUSTERS; + supercl = hammer_get_supercl(volume, scl_no, &error, 1); + if (supercl) { + hammer_alist_free(&supercl->alist, base_blk, count); + *emptyp = hammer_alist_isempty(&supercl->alist); + hammer_put_supercl(supercl); + } else { + *emptyp = 0; + } +} + +static void +super_alist_print(void *info, int32_t blk, int32_t radix, int tab) +{ +} + +void +hammer_init_alist_config(void) +{ + hammer_alist_config_t config; + + hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS, + 1, HAMMER_FSBUF_METAELMS); + hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS, + 1, HAMMER_VOL_METAELMS_1LYR); + hammer_alist_template(&Vol_super_alist_config, + HAMMER_VOL_MAXSUPERCLUSTERS, + HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR); + hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS, + 1, HAMMER_SUPERCL_METAELMS); + hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS, + 1, HAMMER_CLU_MASTER_METAELMS); + hammer_alist_template(&Clu_slave_alist_config, HAMMER_CLU_MAXBUFFERS, + HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS); + + config = &Vol_super_alist_config; + config->bl_radix_init = super_alist_init; + config->bl_radix_destroy = super_alist_destroy; + config->bl_radix_alloc_fwd = super_alist_alloc_fwd; + config->bl_radix_alloc_rev = super_alist_alloc_rev; + config->bl_radix_free = super_alist_free; + config->bl_radix_print = super_alist_print; + + config = &Clu_slave_alist_config; + config->bl_radix_init = buffer_alist_init; + config->bl_radix_destroy = buffer_alist_destroy; + config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd; + config->bl_radix_alloc_rev = buffer_alist_alloc_rev; + config->bl_radix_free = buffer_alist_free; + config->bl_radix_print = buffer_alist_print; +} + + diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c new file mode 100644 index 0000000000..f6ede839c9 --- /dev/null +++ b/sys/vfs/hammer/hammer_subs.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ +/* + * HAMMER structural locking + */ + +#include "hammer.h" + +void +hammer_lock(struct hammer_lock *lock) +{ + thread_t td = curthread; + + if (lock->locktd != td) { + while (lock->locktd != NULL) { + lock->wanted = 1; + tsleep(lock, 0, "hmrlck", 0); + } + lock->locktd = td; + } + ++lock->refs; +} + +void +hammer_unlock(struct hammer_lock *lock) +{ + KKASSERT(lock->locktd == curthread); + KKASSERT(lock->refs > 0); + if (--lock->refs == 0) { + lock->locktd = NULL; + if (lock->wanted) { + lock->wanted = 0; + wakeup(lock); + } + } +} + diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c new file mode 100644 index 0000000000..487d204215 --- /dev/null +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hammer.h" + +/* + * VFS ABI + */ +static void hammer_free_hmp(struct mount *mp); + +static int hammer_vfs_mount(struct mount *mp, char *path, caddr_t data, + struct ucred *cred); +static int hammer_vfs_unmount(struct mount *mp, int mntflags); +static int hammer_vfs_root(struct mount *mp, struct vnode **vpp); +static int hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, + struct ucred *cred); +static int hammer_vfs_sync(struct mount *mp, int waitfor); +static int hammer_vfs_init(struct vfsconf *conf); + +static struct vfsops hammer_vfsops = { + .vfs_mount = hammer_vfs_mount, + .vfs_unmount = hammer_vfs_unmount, + .vfs_root = hammer_vfs_root, + .vfs_statfs = hammer_vfs_statfs, + .vfs_sync = hammer_vfs_sync, + .vfs_vget = hammer_vfs_vget, + .vfs_init = hammer_vfs_init +}; + +MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount"); + +VFS_SET(hammer_vfsops, hammer, 0); +MODULE_VERSION(hammer, 1); + +static int +hammer_vfs_init(struct vfsconf *conf) +{ + hammer_init_alist_config(); + return(0); +} + +static int +hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, + struct ucred *cred) +{ + struct hammer_mount_info info; + struct hammer_mount *hmp; + const char *upath; /* volume name in userspace */ + char *path; /* volume name in system space */ + int error; + int i; + + if ((error = copyin(data, &info, sizeof(info))) != 0) + return (error); + if (info.nvolumes <= 0 || info.nvolumes >= 32768) + return (EINVAL); + + /* + * Interal mount data structure + */ + hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); + mp->mnt_data = (qaddr_t)hmp; + hmp->mp = mp; + RB_INIT(&hmp->rb_vols_root); + RB_INIT(&hmp->rb_inos_root); + + /* + * Load volumes + */ + path = objcache_get(namei_oc, M_WAITOK); + for (i = 0; i < info.nvolumes; ++i) { + error = copyin(&info.volumes[i], &upath, sizeof(char *)); + if (error == 0) + error = copyinstr(upath, path, MAXPATHLEN, NULL); + if (error == 0) + error = hammer_load_volume(hmp, path); + if (error) + break; + } + objcache_put(namei_oc, path); + + /* + * Make sure we found a root volume + */ + if (error == 0 && hmp->rootvol == NULL) { + kprintf("hammer_mount: No root volume found!\n"); + error = EINVAL; + } + if (error == 0 && hmp->rootcl == NULL) { + kprintf("hammer_mount: No root cluster found!\n"); + error = EINVAL; + } + if (error) { + hammer_free_hmp(mp); + return (error); + } + + /* + * No errors at this point. Locate the root directory using the + * root cluster's B-Tree as a starting point. The root directory + * uses an obj_id of 1. Leave the root directory cached referenced + * but unlocked in hmp->rootvp. + */ + error = hammer_vfs_vget(mp, 1, &hmp->rootvp); + if (error == 0) + vn_unlock(hmp->rootvp); + + /* + * Setup the rest of the mount or clean up after an error. + */ + if (error) { + hammer_free_hmp(mp); + } else { + mp->mnt_iosize_max = MAXPHYS; + mp->mnt_kern_flag |= MNTK_FSMID; + mp->mnt_stat.f_fsid.val[0] = 0; /* XXX */ + mp->mnt_stat.f_fsid.val[1] = 0; /* XXX */ + vfs_getnewfsid(mp); /* XXX */ + mp->mnt_maxsymlinklen = 255; + mp->mnt_flag |= MNT_LOCAL; + + vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops); + } + return (error); +} + +static int +hammer_vfs_unmount(struct mount *mp, int mntflags) +{ +#if 0 + struct hammer_mount *hmp = (void *)mp->mnt_data; +#endif + int flags; + + /* + * Clean out the vnodes + */ + flags = WRITECLOSE | ((mntflags & MNT_FORCE) ? FORCECLOSE : 0); + vflush(mp, 0, flags); + + /* + * Clean up the internal mount structure and related entities. This + * may issue I/O. + */ + hammer_free_hmp(mp); + return(0); +} + +/* + * Clean up the internal mount structure and disassociate it from the mount. + * This may issue I/O. + */ +static void +hammer_free_hmp(struct mount *mp) +{ + struct hammer_mount *hmp = (void *)mp->mnt_data; + + /* + * Clean up the root vnode + */ + if (hmp->rootvp) { + vrele(hmp->rootvp); + hmp->rootvp = NULL; + } + + /* + * Unload & flush inodes + */ + RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL, + hammer_unload_inode, NULL); + + /* + * Unload & flush volumes + */ + RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, + hammer_unload_volume, NULL); + + mp->mnt_data = NULL; + hmp->mp = NULL; + kfree(hmp, M_HAMMER); +} + +/* + * Return the root vnode for the filesystem. + * + * HAMMER stores the root vnode in the hammer_mount structure so + * getting it is easy. + */ +static int +hammer_vfs_root(struct mount *mp, struct vnode **vpp) +{ + struct hammer_mount *hmp = (void *)mp->mnt_data; + struct vnode *vp; + + if ((vp = hmp->rootvp) != NULL) { + vref(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + *vpp = vp; + return (0); + } else { + *vpp = NULL; + return (EIO); + } +} + +static int +hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) +{ + /*struct hammer_mount *hmp = (void *)mp->mnt_data;*/ + int error; + + error = EINVAL; + return(error); +} + +static int +hammer_vfs_sync(struct mount *mp, int waitfor) +{ + return(0); +} + diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c new file mode 100644 index 0000000000..223ee55422 --- /dev/null +++ b/sys/vfs/hammer/hammer_vnops.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.1 2007/11/01 20:53:05 dillon Exp $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hammer.h" + +/* + * USERFS VNOPS + */ +/*static int hammer_vop_vnoperate(struct vop_generic_args *);*/ +static int hammer_vop_fsync (struct vop_fsync_args *); +static int hammer_vop_read (struct vop_read_args *); +static int hammer_vop_write (struct vop_write_args *); +static int hammer_vop_access (struct vop_access_args *); +static int hammer_vop_advlock (struct vop_advlock_args *); +static int hammer_vop_close (struct vop_close_args *); +static int hammer_vop_ncreate (struct vop_ncreate_args *); +static int hammer_vop_getattr (struct vop_getattr_args *); +static int hammer_vop_nresolve (struct vop_nresolve_args *); +static int hammer_vop_nlookupdotdot (struct vop_nlookupdotdot_args *); +static int hammer_vop_nlink (struct vop_nlink_args *); +static int hammer_vop_nmkdir (struct vop_nmkdir_args *); +static int hammer_vop_nmknod (struct vop_nmknod_args *); +static int hammer_vop_open (struct vop_open_args *); +static int hammer_vop_pathconf (struct vop_pathconf_args *); +static int hammer_vop_print (struct vop_print_args *); +static int hammer_vop_readdir (struct vop_readdir_args *); +static int hammer_vop_readlink (struct vop_readlink_args *); +static int hammer_vop_nremove (struct vop_nremove_args *); +static int hammer_vop_nrename (struct vop_nrename_args *); +static int hammer_vop_nrmdir (struct vop_nrmdir_args *); +static int hammer_vop_setattr (struct vop_setattr_args *); +static int hammer_vop_strategy (struct vop_strategy_args *); +static int hammer_vop_nsymlink (struct vop_nsymlink_args *); +static int hammer_vop_nwhiteout (struct vop_nwhiteout_args *); + +struct vop_ops hammer_vnode_vops = { + .vop_default = vop_defaultop, + .vop_fsync = hammer_vop_fsync, + .vop_read = hammer_vop_read, + .vop_write = hammer_vop_write, + .vop_access = hammer_vop_access, + .vop_advlock = hammer_vop_advlock, + .vop_close = hammer_vop_close, + .vop_ncreate = hammer_vop_ncreate, + .vop_getattr = hammer_vop_getattr, + .vop_inactive = hammer_vop_inactive, + .vop_reclaim = hammer_vop_reclaim, + .vop_nresolve = hammer_vop_nresolve, + .vop_nlookupdotdot = hammer_vop_nlookupdotdot, + .vop_nlink = hammer_vop_nlink, + .vop_nmkdir = hammer_vop_nmkdir, + .vop_nmknod = hammer_vop_nmknod, + .vop_open = hammer_vop_open, + .vop_pathconf = hammer_vop_pathconf, + .vop_print = hammer_vop_print, + .vop_readdir = hammer_vop_readdir, + .vop_readlink = hammer_vop_readlink, + .vop_nremove = hammer_vop_nremove, + .vop_nrename = hammer_vop_nrename, + .vop_nrmdir = hammer_vop_nrmdir, + .vop_setattr = hammer_vop_setattr, + .vop_strategy = hammer_vop_strategy, + .vop_nsymlink = hammer_vop_nsymlink, + .vop_nwhiteout = hammer_vop_nwhiteout +}; + +#if 0 +static +int +hammer_vop_vnoperate(struct vop_generic_args *) +{ + return (VOCALL(&hammer_vnode_vops, ap)); +} +#endif + +static +int +hammer_vop_fsync (struct vop_fsync_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_read (struct vop_read_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_write (struct vop_write_args *ap) +{ + /* XXX UIO_NOCOPY */ + return EOPNOTSUPP; +} + +static +int +hammer_vop_access (struct vop_access_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_advlock (struct vop_advlock_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_close (struct vop_close_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_ncreate (struct vop_ncreate_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_getattr (struct vop_getattr_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nresolve (struct vop_nresolve_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nlookupdotdot (struct vop_nlookupdotdot_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nlink (struct vop_nlink_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nmkdir (struct vop_nmkdir_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nmknod (struct vop_nmknod_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_open (struct vop_open_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_pathconf (struct vop_pathconf_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_print (struct vop_print_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_readdir (struct vop_readdir_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_readlink (struct vop_readlink_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nremove (struct vop_nremove_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nrename (struct vop_nrename_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nrmdir (struct vop_nrmdir_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_setattr (struct vop_setattr_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_strategy (struct vop_strategy_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nsymlink (struct vop_nsymlink_args *ap) +{ + return EOPNOTSUPP; +} + +static +int +hammer_vop_nwhiteout (struct vop_nwhiteout_args *ap) +{ + return EOPNOTSUPP; +} + -- 2.41.0