hammer2 - Implement more of the hammer2_chain infrastructure
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 12 Feb 2012 19:23:17 +0000 (11:23 -0800)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 12 Feb 2012 19:23:17 +0000 (11:23 -0800)
* Allocate system structures through their chain type.

* Implement core lookup and iteration code

* Non-terminal media objects which are smaller than HAMMER2_PBUFSIZE (64K)
  cannot hold onto their buffer cache buffer without deadlocking against
  or interfering with the chain.

  This is just inodes for now.  An embedded copy of the media data is
  retained (I had removed it before thinking I could just map the bp but
  it doesn't work, so it goes back in).

* Data references for other media objects can be temporary and allocated.
  The chain locking and unlocking code will instantiate and destroy the
  allocated copy as needed.

  This also enforces the chain locking requirement for media data access.

* hammer2_chain_create() skeleton added (cannot create indirect blocks yet).

* hammer2_chain_delete() does nothing atm.

* tested with mount/umount.

sys/vfs/hammer2/Makefile
sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_chain.c
sys/vfs/hammer2/hammer2_disk.h
sys/vfs/hammer2/hammer2_freemap.c [new file with mode: 0644]
sys/vfs/hammer2/hammer2_inode.c
sys/vfs/hammer2/hammer2_subr.c
sys/vfs/hammer2/hammer2_vfsops.c

index 0a712ac..97d10b8 100644 (file)
@@ -5,6 +5,6 @@
 
 KMOD=  hammer2
 SRCS=  hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c
-SRCS+= hammer2_chain.c hammer2_subr.c hammer2_icrc.c
+SRCS+= hammer2_chain.c hammer2_freemap.c hammer2_subr.c hammer2_icrc.c
 
 .include <bsd.kmod.mk>
index 4a7cfb9..581a02d 100644 (file)
@@ -66,6 +66,7 @@
 #include "hammer2_disk.h"
 #include "hammer2_mount.h"
 
+struct hammer2_chain;
 struct hammer2_inode;
 struct hammer2_mount;
 
@@ -81,34 +82,80 @@ struct hammer2_mount;
  * It is always possible to track a chain element all the way back to the
  * root by following the (parent) links.  (index) is a type-dependent index
  * in the parent indicating where in the parent the chain element resides.
+ *
+ * When a blockref is added or deleted the related chain element is marked
+ * modified and all of its parents are marked SUBMODIFIED (the parent
+ * recursion can stop once we hit a node that is already marked SUBMODIFIED).
+ * A deleted chain element must remain intact until synchronized against
+ * its parent.
+ *
+ * The blockref at (parent, index) is not adjusted until the modified chain
+ * element is flushed and unmarked.  Until then the child's blockref may
+ * not match the blockref at (parent, index).
  */
+SPLAY_HEAD(hammer2_chain_splay, hammer2_chain);
+
 struct hammer2_chain {
        struct hammer2_blockref bref;
        struct hammer2_chain *parent;   /* return chain to root */
-       struct hammer2_chain *subs;     /* children (base) */
-       struct hammer2_chain *next;     /* linked list */
+       struct hammer2_chain_splay shead;
+       SPLAY_ENTRY(hammer2_chain) snode;
        union {
                struct hammer2_inode *ip;
-               struct hammer2_indblock *ind;
+               struct hammer2_indblock *np;
+               struct hammer2_data *dp;
+               void *mem;
        } u;
-       int     index;                  /* index in parent */
-       u_int   refs;
-       u_int   busy;
+
+       struct buf      *bp;            /* buffer cache (ro) */
+       hammer2_media_data_t *data;     /* modified copy of data (rw) */
+
+       struct lock     lk;             /* lockmgr lock */
+       int             index;          /* index in parent */
+       u_int           refs;
+       u_int           busy;           /* soft-busy */
+       u_int           flags;
 };
 
 typedef struct hammer2_chain hammer2_chain_t;
 
+int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
+SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
+
+#define HAMMER2_CHAIN_MODIFIED         0x00000001      /* this elm modified */
+#define HAMMER2_CHAIN_SUBMODIFIED      0x00000002      /* 1+ subs modified */
+#define HAMMER2_CHAIN_DELETED          0x00000004
+#define HAMMER2_CHAIN_INITIAL          0x00000008      /* initial write */
+
 /*
- * A hammer2 inode.
+ * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
  *
- * NOTE: An inode's ref count shares chain.refs.
+ * There is an in-memory representation of all on-media data structure.
+ *
+ * When accessed read-only the data will be mapped to the related buffer
+ * cache buffer.
+ *
+ * When accessed read-write (marked modified) a kmalloc()'d copy of the
+ * is created which can then be modified.  The copy is destroyed when a
+ * filesystem block is allocated to replace it.
+ *
+ * Active inodes (those with vnodes attached) will maintain the kmalloc()'d
+ * copy for both the read-only and the read-write case.  The combination of
+ * (bp) and (data) determines whether (data) was allocated or not.
+ *
+ * The in-memory representation may remain cached (for example in order to
+ * placemark clustering locks) even after the related data has been
+ * detached.
+ */
+
+/*
+ * A hammer2 inode.
  */
 struct hammer2_inode {
        struct hammer2_mount    *hmp;
-       struct lock             lk;
        struct vnode            *vp;
        hammer2_chain_t         chain;
-       hammer2_inode_data_t    data;
+       struct hammer2_inode_data ip_data;
 };
 
 typedef struct hammer2_inode hammer2_inode_t;
@@ -117,28 +164,38 @@ typedef struct hammer2_inode hammer2_inode_t;
  * A hammer2 indirect block
  */
 struct hammer2_indblock {
-       struct buf              *bp;
        hammer2_chain_t         chain;
-       hammer2_indblock_data_t *data;
 };
 
 typedef struct hammer2_indblock hammer2_indblock_t;
 
+#define np_data                chain.data->npdata
+
+/*
+ * A hammer2 data block
+ */
+struct hammer2_data {
+       hammer2_chain_t         chain;
+};
+
+#define dp_data                chain.data->buf
+
+typedef struct hammer2_data hammer2_data_t;
+
 /*
  * Governing mount structure for filesystem (aka vp->v_mount)
  */
 struct hammer2_mount {
        struct mount    *mp;            /* kernel mount */
        struct vnode    *devvp;         /* device vnode */
-       struct lock     lk;
        struct netexport export;        /* nfs export */
        int             ronly;          /* read-only mount */
 
-       struct malloc_type *inodes;
+       struct malloc_type *minode;
        int             ninodes;
        int             maxinodes;
 
-       struct malloc_type *ipstacks;
+       struct malloc_type *mchain;
        int             nipstacks;
        int             maxipstacks;
        hammer2_chain_t vchain;         /* anchor chain */
@@ -205,35 +262,36 @@ void hammer2_inode_drop(hammer2_inode_t *ip);
 /*
  * hammer2_chain.c
  */
+hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
+                               hammer2_blockref_t *bref);
+void hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain);
 void hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain);
 void hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain);
-void hammer2_chain_link(hammer2_mount_t *hmp __unused, hammer2_chain_t *parent,
-                                   int index, hammer2_chain_t *chain);
-void hammer2_chain_unlink(hammer2_mount_t *hmp, hammer2_chain_t *chain);
-
+int hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain);
+void hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain);
+void hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain);
 hammer2_chain_t *hammer2_chain_get(hammer2_mount_t *hmp,
-                                   hammer2_chain_t *parent, int index,
-                                   hammer2_blockref_t *bref);
+                               hammer2_chain_t *parent, int index);
 void hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain);
-
-hammer2_chain_t *hammer2_chain_push(hammer2_mount_t *hmp,
-                                   hammer2_chain_t *parent,
-                                   hammer2_key_t key);
-
-hammer2_chain_t *hammer2_chain_first(hammer2_mount_t *hmp,
-                                   hammer2_chain_t *parent,
-                                   hammer2_key_t key,
-                                   hammer2_key_t mask);
-
+hammer2_chain_t *hammer2_chain_lookup(hammer2_mount_t *hmp,
+                               hammer2_chain_t **parentp, hammer2_key_t key,
+                               hammer2_key_t mask);
 hammer2_chain_t *hammer2_chain_next(hammer2_mount_t *hmp,
-                                   hammer2_chain_t *current,
-                                   hammer2_key_t key,
-                                   hammer2_key_t mask);
+                               hammer2_chain_t **parentp,
+                               hammer2_chain_t *chain,
+                               hammer2_key_t key, hammer2_key_t mask);
+hammer2_chain_t *hammer2_chain_create(hammer2_mount_t *hmp,
+                               hammer2_chain_t *parent,
+                               hammer2_key_t key, int keybits,
+                               int type, size_t bytes);
+void hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+                               hammer2_chain_t *chain);
+
 
 /*
  * hammer2_freemap.c
  */
-hammer2_off_t hammer_freemap_alloc(hammer2_mount_t *hmp, size_t bytes);
+hammer2_off_t hammer2_freemap_alloc(hammer2_mount_t *hmp, size_t bytes);
 
 #endif /* !_KERNEL */
 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */
index 17b5997..0e5ff50 100644 (file)
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+/*
+ * This subsystem handles direct and indirect block searches, recursions,
+ * creation, and deletion.  Chains of blockrefs are tracked and modifications
+ * are flag for propagation... eventually all the way back to the volume
+ * header.
+ */
+
 #include <sys/cdefs.h>
 #include <sys/cdefs.h>
 #include <sys/param.h>
 
 #include "hammer2.h"
 
+SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
+
+/*
+ * Compare function for chain splay tree
+ */
+int
+hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2)
+{
+       return(chain2->index - chain1->index);
+}
+
+/*
+ * Allocate a new disconnected chain element representing the specified
+ * bref.
+ *
+ * This essentially allocates a system memory structure representing one
+ * of the media structure types, including inodes.
+ */
+hammer2_chain_t *
+hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref)
+{
+       hammer2_chain_t *chain;
+       hammer2_inode_t *ip;
+       hammer2_indblock_t *np;
+       hammer2_data_t *dp;
+
+       /*
+        * Construct the appropriate system structure.
+        */
+       switch(bref->type) {
+       case HAMMER2_BREF_TYPE_INODE:
+               ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO);
+               chain = &ip->chain;
+               chain->u.ip = ip;
+               lockinit(&chain->lk, "inode", 0, LK_CANRECURSE);
+               ip->hmp = hmp;
+               break;
+       case HAMMER2_BREF_TYPE_INDIRECT:
+               np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO);
+               chain = &np->chain;
+               chain->u.np = np;
+               lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE);
+               break;
+       case HAMMER2_BREF_TYPE_DATA:
+               dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO);
+               chain = &dp->chain;
+               chain->u.dp = dp;
+               lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE);
+               break;
+       case HAMMER2_BREF_TYPE_VOLUME:
+               chain = NULL;
+               panic("hammer2_chain_get: volume type illegal for op");
+       default:
+               chain = NULL;
+               panic("hammer2_chain_get: unrecognized blockref type: %d",
+                     bref->type);
+       }
+       chain->bref = *bref;
+       chain->refs = 1;
+
+       return (chain);
+}
+
+/*
+ * Free a disconnected chain element
+ */
+void
+hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain)
+{
+       void *mem;
+
+       KKASSERT(chain->bp == NULL);
+       KKASSERT(chain->data == NULL);
+       KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
+                chain->u.ip->vp == NULL);
+
+       if ((mem = chain->u.mem) != NULL) {
+               chain->u.mem = NULL;
+               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
+                       kfree(mem, hmp->minode);
+               else
+                       kfree(mem, hmp->mchain);
+       }
+}
+
 /*
  * Add a reference to a chain element (for shared access).  The chain
- * element must already have at least 1 ref.
+ * element must already have at least 1 ref controlled by the caller.
  */
 void
 hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
@@ -55,342 +147,699 @@ hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 
 /*
  * Drop the callers reference to the chain element.  If the ref count
- * reaches zero the chain element and any related structure (typically an
- * inode or indirect block) will be freed.
+ * reaches zero the chain element and its related structure (typically an
+ * inode or indirect block) will be freed and the parent will be
+ * recursively dropped.
+ *
+ * Modified elements hold an additional reference so it should not be
+ * possible for the count on a modified element to drop to 0.
+ *
+ * The chain element must NOT be locked by the caller.
  *
- * Keep in mind that hammer2_chain structures are typically directly embedded
- * in major hammer2 memory structures.
+ * The parent might or might not be locked by the caller but if so it
+ * will also be referenced so we shouldn't recurse upward.
  */
 void
 hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
-       if (atomic_fetchadd_int(&chain->refs, -1) == 1) {
-               if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
-                       KKASSERT(chain == &chain->u.ip->chain);
-                       hammer2_inode_free(chain->u.ip);
+       hammer2_chain_t *parent;
+       u_int refs;
+
+       while (chain) {
+               refs = chain->refs;
+               cpu_ccfence();
+               if (refs == 1) {
+                       KKASSERT(chain != &hmp->vchain);
+                       parent = chain->parent;
+                       lockmgr(&parent->lk, LK_EXCLUSIVE);
+                       if (atomic_cmpset_int(&chain->refs, 1, 0)) {
+                               /*
+                                * Succeeded, recurse and drop parent
+                                */
+                               SPLAY_REMOVE(hammer2_chain_splay,
+                                            &parent->shead, chain);
+                               chain->parent = NULL;
+                               lockmgr(&parent->lk, LK_RELEASE);
+                               hammer2_chain_free(hmp, chain);
+                               chain = parent;
+                       }
+               } else {
+                       if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) {
+                               /*
+                                * Succeeded, count did not reach zero so
+                                * cut out of the loop.
+                                */
+                               break;
+                       }
                }
        }
 }
 
-void
-hammer2_chain_link(hammer2_mount_t *hmp __unused, hammer2_chain_t *parent,
-                  int index, hammer2_chain_t *chain)
+/*
+ * Lock a chain element, acquiring its data with I/O if necessary.
+ *
+ * Returns 0 on success or an error code if the data could not be acquired.
+ * The chain element is locked either way.
+ */
+int
+hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
-       chain->parent = parent;
-       chain->index = index;
-       chain->refs = 1;
-       chain->next = parent->subs;
-       parent->subs = chain;
+       hammer2_blockref_t *bref;
+       hammer2_off_t off_hi;
+       size_t off_lo;
+       size_t bytes;
+       int error;
+       void *data;
+
+       /*
+        * Lock the element.  Under certain conditions this might end up
+        * being a recursive lock.
+        */
+       KKASSERT(chain->refs > 0);
+       lockmgr(&chain->lk, LK_EXCLUSIVE);
+
+       /*
+        * The volume header is a special case
+        */
+       if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME)
+               return(0);
+
+       /*
+        * If data is present the element may have been modified.  Assert
+        * the case and return the chain.
+        *
+        * data will point to the inode-embedded copy of hammer2_inode_data
+        * for inodes, whether modified or not.
+        */
+       if (chain->data) {
+               KKASSERT(chain->bp == NULL);
+               return (0);
+       }
+
+       /*
+        * Issue I/O.  If an error occurs we return the error but still
+        * leave the chain locked.
+        */
+       KKASSERT(chain->bp == NULL);
+       bref = &chain->bref;
+
+       off_hi = bref->data_off & HAMMER2_OFF_MASK_HI;
+       off_lo = (size_t)bref->data_off & HAMMER2_OFF_MASK_LO;
+       bytes = HAMMER2_PBUFSIZE;
+       KKASSERT(off_hi != 0);
+       KKASSERT(chain->bp == NULL);
+       error = bread(hmp->devvp, off_hi, bytes, &chain->bp);
+
+       if (error) {
+               kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
+                       (intmax_t)off_hi, error);
+               brelse(chain->bp);
+               chain->bp = NULL;
+               return (error);
+       }
+
+       /*
+        * Extract the data.  Inodes have local embedded copies.  We can
+        * retain the bp for all other types.
+        *
+        * WARNING: Generally speaking we can only retain bp's that represent
+        *          full blocks to avoid potential deadlocks or attempts to
+        *          recursively acquire the same bp.
+        */
+       switch (bref->type) {
+       case HAMMER2_BREF_TYPE_INODE:
+               /*
+                * Copy data from bp to embedded buffer.
+                */
+               bcopy((char *)chain->bp->b_data + off_lo,
+                     &chain->u.ip->ip_data,
+                     HAMMER2_INODE_BYTES);
+               chain->data = (void *)&chain->u.ip->ip_data;
+               brelse(chain->bp);
+               chain->bp = NULL;
+               break;
+       default:
+               /*
+                * Leave bp intact
+                */
+               data = (char *)chain->bp->b_data + off_lo;
+               chain->data = data;
+               break;
+       }
+       return (0);
 }
 
 /*
- * Remove the chain linkage to its parent.  The chain must not have any
- * children.
+ * Convert a locked chain that was retrieved read-only to read-write.
  */
 void
-hammer2_chain_unlink(hammer2_mount_t *hmp __unused, hammer2_chain_t *chain)
+hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
-       hammer2_chain_t **scanp;
-       hammer2_chain_t *scan;
-
-       KKASSERT(chain->subs == NULL);
-       if (chain->parent) {
-               scanp = &chain->parent->subs;
-               while ((scan = *scanp) != NULL) {
-                       if (scan == chain)
-                               break;
+       hammer2_chain_t *parent;
+       size_t off_lo;
+       size_t bytes;
+       void *data;
+
+       /*
+        * Nothing to do if it is already RW
+        */
+       if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
+               KKASSERT(chain->bp == NULL);
+               KKASSERT(chain->data != NULL);
+               return;
+       }
+
+       /*
+        * Create an allocated copy of the data if it is not embedded.
+        */
+       if (chain->data == NULL) {
+               /*
+                * Allocate in-memory data and copy from the read-only bp
+                */
+               KKASSERT(chain->bp != NULL);
+               off_lo = (size_t)chain->bref.data_off & HAMMER2_OFF_MASK_LO;
+               data = (char *)chain->bp->b_data + off_lo;
+
+               bytes = 1 << (int)(chain->bref.data_off &
+                                  HAMMER2_OFF_MASK_RADIX);
+               chain->data = kmalloc(bytes, hmp->mchain, M_WAITOK | M_ZERO);
+               bcopy(data, chain->data, bytes);
+               brelse(chain->bp);
+               chain->bp = NULL;
+
+               /*
+                * Assert correctness to prevent system memory corruption.
+                */
+               switch(chain->bref.type) {
+               case HAMMER2_BREF_TYPE_INODE:
+                       KKASSERT(bytes == sizeof(hammer2_inode_data_t));
+                       break;
+               case HAMMER2_BREF_TYPE_INDIRECT:
+                       KKASSERT(bytes == sizeof(hammer2_indblock_data_t));
+                       break;
+               default:
+                       /* do nothing */
+                       break;
                }
-               KKASSERT(scan == chain);
-               *scanp = scan->next;
-               chain->parent = NULL;
+       }
+
+       /*
+        * Mark modified
+        */
+       atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
+       parent = chain->parent;
+       while (parent && (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) {
+               atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED);
+               parent = parent->parent;
        }
 }
 
 /*
- * Find or allocate a chain element representing a blockref.
+ * Unlock a chain element without dropping its reference count.
+ * (see hammer2_chain_put() to do both).
  *
- * The blockref at (parent, index) representing (bref) is located or
- * created and returned.  I/O is issued to read the contents if necessary.
+ * Any read-only data reference (bp present) is cleaned out.  If the
+ * chain was modified the writable copy of the data sticks around.
+ */
+void
+hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain)
+{
+       if (chain->bp) {
+               chain->data = NULL;
+               brelse(chain->bp);
+               chain->bp = NULL;
+       }
+       lockmgr(&chain->lk, LK_RELEASE);
+}
+
+/*
+ * Return a locked chain structure with all associated data acquired.
+ *
+ * If this is a read-only request the data may wind up pointing into a
+ * buffer cache buffer.  If read-write a kmalloc()'d copy of the data
+ * will be made and the buffer released, and the chain will be marked
+ * modified.
  *
- * XXX use RB-Tree instead of linked list
+ * Caller must lock the parent on call, the returned child will be locked.
  */
 hammer2_chain_t *
-hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent,
-                 int index, hammer2_blockref_t *bref)
+hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index)
 {
-       struct buf *bp;
-       void *data;
+       hammer2_blockref_t *bref;
        hammer2_chain_t *chain;
-       hammer2_inode_t *ip;
-       hammer2_off_t dblock;
-       int off;
-       int error;
-
-       /*
-        * Look for a cached chain element
-        */
-       for (chain = parent->subs; chain; chain = chain->next) {
-               if (chain->index == index) {
-                       atomic_add_int(&chain->refs, 1);
-                       return (chain);
-               }
-       }
+       hammer2_chain_t dummy;
 
        /*
-        * Issue I/O to read the underlying object
+        * First see if we have a (possibly modified) chain element cached
+        * for this (parent, index).  Acquire the data if necessary.
+        *
+        * If chain->data is non-NULL the chain should already be marked
+        * modified.
         */
-       dblock = bref->data_off & HAMMER2_OFF_MASK_HI;
-       off = (int)bref->data_off & HAMMER2_OFF_MASK_LO;
-       KKASSERT(dblock != 0);
-
-       error = bread(hmp->devvp, dblock, HAMMER2_PBUFSIZE, &bp);
-       if (error) {
-               kprintf("hammer2_chain_get: I/O error %016jx: %d\n",
-                       (intmax_t)dblock, error);
-               brelse(bp);
-               return(NULL);
+       dummy.index = index;
+       chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy);
+       if (chain) {
+               hammer2_chain_ref(hmp, chain);
+               hammer2_chain_lock(hmp, chain);
+               return(chain);
        }
-       data = (char *)bp->b_data + off;
 
        /*
-        * Construct the appropriate system structure.
+        * Otherwise lookup the bref and issue I/O
         */
-       switch(bref->type) {
+       switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
-               ip = hammer2_inode_alloc(hmp, data);
-               chain = &ip->chain;
-               chain->bref = *bref;
-               chain->u.ip = ip;
-               hammer2_chain_link(hmp, parent, index, chain);
-               kprintf("found inode %jd\n", (intmax_t)ip->data.inum);
-               /* hammer2_chain_unbusy */
+               KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
+               bref = &parent->data->ipdata.u.blockset.blockref[index];
                break;
        case HAMMER2_BREF_TYPE_INDIRECT:
-               kprintf("hammer2_chain_get: indirect (not impl yet)\n");
-               break;
-       case HAMMER2_BREF_TYPE_DATA:
-               panic("hammer2_chain_get: data type illegal for op");
+               KKASSERT(index >= 0 && index < HAMMER2_IND_COUNT);
+               bref = &parent->data->npdata.blockref[index];
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               panic("hammer2_chain_get: volume type illegal for op");
+               KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT);
+               bref = &hmp->voldata.sroot_blockset.blockref[index];
                break;
        default:
+               bref = NULL;
                panic("hammer2_chain_get: unrecognized blockref type: %d",
-                     bref->type);
+                     parent->bref.type);
        }
-       brelse(bp);
+       chain = hammer2_chain_alloc(hmp, bref);
+       lockmgr(&chain->lk, LK_EXCLUSIVE);
+
+       /*
+        * Link the chain into its parent.  Caller is expected to hold an
+        * exclusive lock on the parent.
+        */
+       chain->parent = parent;
+       chain->index = index;
+       if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
+               panic("hammer2_chain_link: collision");
+       KKASSERT(parent->refs > 1);
+       atomic_add_int(&parent->refs, 1);
+
+       /*
+        * Our new chain structure has already been referenced and locked
+        * but the lock code handles the I/O so call it to resolve the data.
+        * Then release one of our two exclusive locks.
+        */
+       hammer2_chain_lock(hmp, chain);
+       lockmgr(&chain->lk, LK_RELEASE);
+       /* still locked */
 
        return (chain);
 }
 
+/*
+ * Unlock and dereference a chain after use.  It is possible for this to
+ * recurse up the chain.
+ */
 void
 hammer2_chain_put(hammer2_mount_t *hmp, hammer2_chain_t *chain)
 {
+       hammer2_chain_unlock(hmp, chain);
        hammer2_chain_drop(hmp, chain);
 }
 
 /*
- * Locate the exact key relative to the parent chain.
+ * Locate the first masked key relative to the parent chain in (*parentp).
+ * The mask indicates which bits must match and should be set to -1 if an
+ * exact match is desired.
+ *
+ * (*parentp) must be exclusively locked and referenced and can be an inode
+ * or an existing indirect block within the inode.
+ *
+ * On return (*parentp) will be modified to point at the deepest parent chain
+ * element encountered during the search, as a helper for an insertion or
+ * deletion.   The new (*parentp) will be locked and referenced and the old
+ * will be unlocked and dereferenced (no change if they are both the same).
+ *
+ * The matching chain will be returned exclusively locked and referenced.
+ *
+ * NULL is returned if no match was found, but (*parentp) will still
+ * potentially be adjusted.
+ *
+ * This function will also recurse up the chain if the key is not within the
+ * current parent's range.  (*parentp) can never be set to NULL.  An iteration
+ * can simply allow (*parentp) to float inside the loop.
  */
 hammer2_chain_t *
-hammer2_chain_push(hammer2_mount_t *hmp, hammer2_chain_t *parent,
-                  hammer2_key_t key)
+hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+                    hammer2_key_t key, hammer2_key_t mask)
 {
+       hammer2_chain_t *parent;
        hammer2_chain_t *chain;
+       hammer2_blockref_t *base;
        hammer2_blockref_t *bref;
-       hammer2_blockset_t *bset;
+       hammer2_key_t pmask;
        int count = 0;
-       int n;
        int i;
 
-       chain = NULL;
-       bset = NULL;
+       key &= mask;
 
        /*
-        * Locate the blockset(s) to search.  Currently we do a fully
-        * associative search across all blocksets instead of seeking to
-        * a blockset and doing a fully associative search inside one set.
+        * Recurse (*parentp) upward if necessary if the key is outside the
+        * range of the current indirect block.
+        */
+       parent = *parentp;
+       while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
+               pmask = ~(((hammer2_key_t)1 << parent->bref.keybits) - 1);
+               if (((parent->bref.key ^ key) & pmask & mask) == 0)
+                       break;
+               hammer2_chain_unlock(hmp, parent);
+               parent = parent->parent;
+               hammer2_chain_ref(hmp, parent);         /* ref new parent */
+               hammer2_chain_lock(hmp, parent);        /* lock new parent */
+               hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
+               *parentp = parent;                      /* new parent */
+       }
+
+again:
+       /*
+        * Locate the blockref array.  Currently we do a fully associative
+        * search through the array.
         */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
-               bset = &parent->u.ip->data.u.blockset;
-               count = 1;
+               base = &parent->data->ipdata.u.blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_INDIRECT:
-               bset = &parent->u.ind->data->blocksets[0];
-               count = HAMMER2_IND_COUNT / HAMMER2_SET_COUNT;
-               break;
-       case HAMMER2_BREF_TYPE_DATA:
+               base = &parent->data->npdata.blockref[0];
+               count = HAMMER2_IND_COUNT;
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               bset = &hmp->voldata.sroot_blockset;
-               count = 1;
+               base = &hmp->voldata.sroot_blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        default:
                panic("hammer2_chain_push: unrecognized blockref type: %d",
                      parent->bref.type);
+               base = NULL;    /* safety */
+               count = 0;      /* safety */
        }
 
-       for (n = 0; n < count; ++n) {
-               bref = NULL;
-               for (i = 0; i < HAMMER2_SET_COUNT; ++i) {
-                       bref = &bset->blockref[i];
-                       if (bref->key == key &&
-                           bref->keybits == 0 &&
-                           bref->type != 0) {
-                               break;
-                       }
-               }
-               if (i != HAMMER2_SET_COUNT) {
-                       chain = hammer2_chain_get(hmp, parent,
-                                                 n * HAMMER2_SET_COUNT + i,
-                                                 bref);
+       /*
+        * Look for the key.  If we are unable to find a match and an exact
+        * match was requested we return NULL.  If a range was requested we
+        * run hammer2_chain_next() to iterate.
+        */
+       bref = NULL;
+       for (i = 0; i < count; ++i) {
+               bref = &base[i];
+               pmask = ~(((hammer2_key_t)1 << bref->keybits) - 1);
+               if (((bref->key ^ key) & pmask & mask) == 0 && bref->type != 0)
                        break;
-               }
-               ++bset;
        }
+       if (i == count) {
+               if (mask == (hammer2_key_t)-1)
+                       return (NULL);
+               return (hammer2_chain_next(hmp, parentp, NULL, key, mask));
+       }
+
+       /*
+        * Acquire the new chain element.  If the chain element is an
+        * indirect block we must search recursively.
+        */
+       chain = hammer2_chain_get(hmp, parent, i);
+       if (chain == NULL)
+               return (NULL);
+
+       /*
+        * If the chain element is an indirect block it becomes the new
+        * parent and we loop on it.
+        */
+       if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
+               hammer2_chain_put(hmp, parent);
+               *parentp = parent = chain;
+               goto again;
+       }
+
+       /*
+        * All done, return chain
+        */
        return (chain);
 }
 
 /*
- * Initiate a ranged search, locating the first element matching (key & ~mask).
- * That is, the passed mask represents the bits we wish to ignore.
+ * After having issued a lookup we can iterate all matching keys.
+ *
+ * If chain is non-NULL we continue the iteration from just after it's index.
+ *
+ * If chain is NULL we assume the parent was exhausted and continue the
+ * iteration at the next parent.
  */
 hammer2_chain_t *
-hammer2_chain_first(hammer2_mount_t *hmp, hammer2_chain_t *parent,
-                   hammer2_key_t key, hammer2_key_t mask)
+hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+                  hammer2_chain_t *chain,
+                  hammer2_key_t key, hammer2_key_t mask)
 {
-       hammer2_chain_t *chain;
+       hammer2_chain_t *parent;
+       hammer2_blockref_t *base;
        hammer2_blockref_t *bref;
-       hammer2_blockset_t *bset;
-       int count = 0;
-       int n;
+       hammer2_key_t pmask;
        int i;
+       int count;
+
+       parent = *parentp;
 
-       chain = NULL;
-       bset = NULL;
+again:
+       /*
+        * Calculate the next index and recalculate the parent if necessary.
+        */
+       if (chain) {
+               /*
+                * Continue iteration within current parent
+                */
+               i = chain->index + 1;
+               hammer2_chain_put(hmp, chain);
+               chain = NULL;
+       } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) {
+               /*
+                * We reached the end of the iteration.
+                */
+               return (NULL);
+       } else {
+               /*
+                * Continue iteration with next parent
+                */
+               if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT)
+                       return (NULL);
+               i = parent->index + 1;
+               hammer2_chain_unlock(hmp, parent);
+               parent = parent->parent;
+               hammer2_chain_ref(hmp, parent);         /* ref new parent */
+               hammer2_chain_lock(hmp, parent);        /* lock new parent */
+               hammer2_chain_drop(hmp, *parentp);      /* drop old parent */
+               *parentp = parent;                      /* new parent */
+       }
 
+again2:
        /*
-        * Locate the blockset(s) to search.  Currently we do a fully
-        * associative search across all blocksets instead of seeking to
-        * a blockset and doing a fully associative search inside one set.
+        * Locate the blockref array.  Currently we do a fully associative
+        * search through the array.
         */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
-               bset = &parent->u.ip->data.u.blockset;
-               count = 1;
+               base = &parent->data->ipdata.u.blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_INDIRECT:
-               bset = &parent->u.ind->data->blocksets[0];
-               count = HAMMER2_IND_COUNT / HAMMER2_SET_COUNT;
-               break;
-       case HAMMER2_BREF_TYPE_DATA:
+               base = &parent->data->npdata.blockref[0];
+               count = HAMMER2_IND_COUNT;
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               bset = &hmp->voldata.sroot_blockset;
-               count = 1;
+               base = &hmp->voldata.sroot_blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_first: unrecognized blockref type: %d",
+               panic("hammer2_chain_push: unrecognized blockref type: %d",
                      parent->bref.type);
+               base = NULL;    /* safety */
+               count = 0;      /* safety */
+               break;
        }
-       kprintf("first key %016jx %016jx\n", (intmax_t)key, (intmax_t)mask);
+       KKASSERT(i <= count);
 
-       for (n = 0; n < count; ++n) {
-               bref = NULL;
-               for (i = 0; i < HAMMER2_SET_COUNT; ++i) {
-                       bref = &bset->blockref[i];
-                       kprintf("test %016jx\n", bref->key);
-                       if ((bref->key & ~((1ULL << bref->keybits) - 1) & ~mask)
-                           == (key & ~mask) &&
-                           bref->type != 0) {
-                               break;
-                       }
-               }
-               if (i != HAMMER2_SET_COUNT) {
-                       chain = hammer2_chain_get(hmp, parent,
-                                                 n * HAMMER2_SET_COUNT + i,
-                                                 bref);
+       /*
+        * Look for the key.  If we are unable to find a match and an exact
+        * match was requested we return NULL.  If a range was requested we
+        * run hammer2_chain_next() to iterate.
+        */
+       bref = NULL;
+       while (i < count) {
+               bref = &base[i];
+               pmask = ~(((hammer2_key_t)1 << bref->keybits) - 1);
+               if (((bref->key ^ key) & pmask & mask) == 0 && bref->type != 0)
                        break;
-               }
-               ++bset;
+               ++i;
        }
+
+       /*
+        * If we couldn't find a match recurse up a parent to continue the
+        * search.
+        */
+       if (i == count)
+               goto again;
+
+       /*
+        * Acquire the new chain element.  If the chain element is an
+        * indirect block we must search recursively.
+        */
+       chain = hammer2_chain_get(hmp, parent, i);
+       if (chain == NULL)
+               return (NULL);
+
+       /*
+        * If the chain element is an indirect block it becomes the new
+        * parent and we loop on it.
+        */
+       if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
+               hammer2_chain_put(hmp, parent);
+               *parentp = parent = chain;
+               i = 0;
+               goto again2;
+       }
+
+       /*
+        * All done, return chain
+        */
        return (chain);
 }
 
 /*
- * Locate the next element matching (key & ~mask) occuring after the current
- * element.
+ * Create and return a new hammer2 system memory structure of the specified
+ * key, type and size and insert it under (parent).  (parent) is typically
+ * acquired as a side effect of issuing a prior lookup.  parent must be locked
+ * and held.
+ *
+ * Non-indirect types will automatically allocate indirect blocks as required
+ * if the new item does not fit in the current (parent).
+ *
+ * Indirect types will move a portion of the existing blockref array in
+ * (parent) into the new indirect type and then use one of the free slots
+ * to emplace the new indirect type.
+ *
+ * A new locked, referenced chain element is returned of the specified type.
+ * This element will also be marked as modified and contain a kmalloc()'d
+ * pre-zero'd data area that is ready for initialization.
  */
 hammer2_chain_t *
-hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t *current,
-                  hammer2_key_t key, hammer2_key_t mask)
+hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent,
+                    hammer2_key_t key, int keybits, int type, size_t bytes)
 {
-       hammer2_chain_t *parent;
-       hammer2_chain_t *chain;
+       hammer2_blockref_t dummy;
+       hammer2_blockref_t *base;
        hammer2_blockref_t *bref;
-       hammer2_blockset_t *bset;
-       int count = 0;
-       int n;
+       hammer2_chain_t *chain;
+       int count;
        int i;
 
-       chain = NULL;
-       bset = NULL;
+       /*
+        * First allocate media space and construct the dummy bref, then
+        * allocate the in-memory chain structure.
+        */
+       bzero(&dummy, sizeof(dummy));
+       dummy.type = type;
+       dummy.key = key;
+       dummy.keybits = keybits;
+       dummy.data_off = hammer2_freemap_alloc(hmp, bytes);
+       chain = hammer2_chain_alloc(hmp, &dummy);
 
        /*
-        * Locate the blockset(s) to search.  Currently we do a fully
-        * associative search across all blocksets instead of seeking to
-        * a blockset and doing a fully associative search inside one set.
+        * Recalculate bytes to reflect the actual media block allocation,
+        * then allocate the local memory copy.  This is a new structure
+        * so no I/O is performed.
         */
-       parent = current->parent;
+       bytes = (hammer2_off_t)1 <<
+               (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX);
+       switch(type) {
+       case HAMMER2_BREF_TYPE_INODE:
+               KKASSERT(bytes == HAMMER2_INODE_BYTES);
+               chain->data = (void *)&chain->u.ip->ip_data;
+               break;
+       default:
+               chain->data = kmalloc(bytes, hmp->mchain, M_WAITOK | M_ZERO);
+               break;
+       }
+       chain->flags |= HAMMER2_CHAIN_INITIAL;
 
+       /*
+        * Indicate that the parent is being modified before we dive
+        * the parent's media data (since this will relocate the media
+        * data pointer).
+        */
+       hammer2_chain_modify(hmp, parent);
+
+       /*
+        * Locate a free blockref in the parent's array
+        */
        switch(parent->bref.type) {
        case HAMMER2_BREF_TYPE_INODE:
-               bset = &parent->u.ip->data.u.blockset;
-               count = 1;
+               base = &parent->data->ipdata.u.blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        case HAMMER2_BREF_TYPE_INDIRECT:
-               bset = &parent->u.ind->data->blocksets[0];
-               count = HAMMER2_IND_COUNT / HAMMER2_SET_COUNT;
-               break;
-       case HAMMER2_BREF_TYPE_DATA:
+               base = &parent->data->npdata.blockref[0];
+               count = HAMMER2_IND_COUNT;
                break;
        case HAMMER2_BREF_TYPE_VOLUME:
-               bset = &hmp->voldata.sroot_blockset;
-               count = 1;
+               base = &hmp->voldata.sroot_blockset.blockref[0];
+               count = HAMMER2_SET_COUNT;
                break;
        default:
-               panic("hammer2_chain_next: unrecognized blockref type: %d",
+               panic("hammer2_chain_push: unrecognized blockref type: %d",
                      parent->bref.type);
+               count = 0;
+               break;
        }
 
-       kprintf("next key %016jx %016jx\n", (intmax_t)key, (intmax_t)mask);
-
-       n = current->index / HAMMER2_SET_COUNT;
-       i = current->index % HAMMER2_SET_COUNT;
-       bset += n;
-
-       while (n < count) {
-               bref = NULL;
-               while (i < HAMMER2_SET_COUNT) {
-                       bref = &bset->blockref[i];
-                       kprintf("test %016jx\n", bref->key);
-                       if ((bref->key & ~((1ULL << bref->keybits) - 1) & ~mask)
-                           == (key & ~mask) &&
-                           bref->type != 0) {
-                               break;
-                       }
-                       ++i;
-               }
-               if (i != HAMMER2_SET_COUNT) {
-                       chain = hammer2_chain_get(hmp, parent,
-                                                 n * HAMMER2_SET_COUNT + i,
-                                                 bref);
+       bref = NULL;
+       for (i = 0; i < count; ++i) {
+               bref = &base[i];
+               if (bref->type == 0)
                        break;
-               }
-               ++bset;
-               ++n;
-               i = 0;
        }
+
+       /*
+        * If no free blockref count be found, fail us for now
+        */
+       if (i == count) {
+               hammer2_chain_free(hmp, chain);
+               return (NULL);
+       }
+
+       /*
+        * Link the chain into its parent and load the blockref slot in
+        * the parent with a pointer to the data allocation.
+        */
+       chain->parent = parent;
+       chain->index = i;
+       if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain))
+               panic("hammer2_chain_link: collision");
+       KKASSERT(parent->refs > 1);
+       atomic_add_int(&parent->refs, 1);
+       base[i] = chain->bref;
+
        return (chain);
 }
+
+/*
+ * Physically delete the specified chain element.  Note that inodes with
+ * open descriptors should not be deleted (as with other filesystems) until
+ * the last open descriptor is closed.
+ *
+ * This routine will remove the chain element from its parent and potentially
+ * also recurse upward and delete indirect blocks which become empty as a
+ * side effect.
+ *
+ * The caller must pass a pointer to the chain's parent, also locked and
+ * referenced.  (*parentp) will be modified in a manner similar to a lookup
+ * or iteration when indirect blocks are also deleted as a side effect.
+ */
+void
+hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t **parentp,
+                    hammer2_chain_t *chain)
+{
+}
index a6ca634..57317e3 100644 (file)
@@ -193,7 +193,8 @@ typedef uint32_t hammer2_crc32_t;
  * HAMMER2 directory support and pre-defined keys
  */
 #define HAMMER2_DIRHASH_VISIBLE        0x8000000000000000ULL
-#define HAMMER2_DIRHASH_LOMASK 0x00000000FFFFFFFFULL
+#define HAMMER2_DIRHASH_LOMASK 0x000000000000FFFFULL
+#define HAMMER2_DIRHASH_HIMASK 0xFFFFFFFFFFFF0000ULL
 
 #define HAMMER2_SROOT_KEY      0x0000000000000000ULL   /* volume to sroot */
 
@@ -320,7 +321,7 @@ typedef struct hammer2_blockset hammer2_blockset_t;
  * The media indirect block structure.
  */
 struct hammer2_indblock_data {
-       hammer2_blockset_t blocksets[HAMMER2_IND_COUNT / HAMMER2_SET_COUNT];
+       hammer2_blockref_t blockref[HAMMER2_IND_COUNT];
 };
 
 typedef struct hammer2_indblock_data hammer2_indblock_data_t;
@@ -707,6 +708,14 @@ typedef struct hammer2_volume_data hammer2_volume_data_t;
 
 #define HAMMER2_NUM_VOLHDRS            4
 
+union hammer2_media_data {
+        hammer2_inode_data_t    ipdata;
+       hammer2_indblock_data_t npdata;
+       char                    buf[HAMMER2_PBUFSIZE];
+};
+
+typedef union hammer2_media_data hammer2_media_data_t;
+
 /*
  * Prototypes for user & kernel functions.  Kernel-only prototypes are
  * elsewhere.
diff --git a/sys/vfs/hammer2/hammer2_freemap.c b/sys/vfs/hammer2/hammer2_freemap.c
new file mode 100644 (file)
index 0000000..d6757f3
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/mountctl.h>
+
+#include "hammer2.h"
+
+/*
+ * Allocate media space, returning a combined data offset and radix.
+ */
+hammer2_off_t
+hammer2_freemap_alloc(hammer2_mount_t *hmp, size_t bytes)
+{
+       hammer2_off_t data_off;
+       hammer2_off_t data_next;
+       int radix;
+
+       /*
+        * Figure out the base 2 radix of the allocation (rounded up)
+        */
+       if (bytes < HAMMER2_MIN_ALLOC)
+               bytes = HAMMER2_MIN_ALLOC;
+       if (bytes == HAMMER2_PBUFSIZE)
+               radix = HAMMER2_PBUFRADIX;
+       else if (bytes >= 1024)
+               radix = 10;
+       else
+               radix = HAMMER2_MIN_RADIX;
+
+       while (((size_t)1 << radix) < bytes)
+               ++radix;
+       bytes = 1 << radix;
+
+       if (radix < HAMMER2_MAX_RADIX && hmp->freecache[radix]) {
+               /*
+                * Allocate from our packing cache
+                */
+               data_off = hmp->freecache[radix];
+               hmp->freecache[radix] += bytes;
+               if ((hmp->freecache[radix] & HAMMER2_PBUFMASK) == 0)
+                       hmp->freecache[radix] = 0;
+       } else {
+               /*
+                * Allocate from the allocation iterator using a PBUFSIZE
+                * aligned block and reload the packing cache if possible.
+                */
+               data_off = hmp->voldata.allocator_beg;
+               data_off = (data_off + HAMMER2_PBUFMASK64) &
+                          ~HAMMER2_PBUFMASK64;
+               data_next = data_off + bytes;
+
+               if ((data_next & HAMMER2_PBUFMASK) == 0) {
+                       hmp->voldata.allocator_beg = data_next;
+               } else {
+                       KKASSERT(radix < HAMMER2_MAX_RADIX);
+                       hmp->voldata.allocator_beg =
+                                       (data_next + HAMMER2_PBUFMASK64) &
+                                       ~HAMMER2_PBUFMASK64;
+                       hmp->freecache[radix] = data_next;
+               }
+       }
+       kprintf("hammer2: allocate %016jx: %zd\n", (intmax_t)data_off, bytes);
+       return (data_off | radix);
+}
+
+#if 0
+/*
+ * Allocate media space, returning a combined data offset and radix.
+ * Also return the related (device) buffer cache buffer.
+ */
+hammer2_off_t
+hammer2_freemap_alloc_bp(hammer2_mount_t *hmp, size_t bytes, struct buf **bpp)
+{
+}
+
+#endif
index 9466663..eaa55b0 100644 (file)
@@ -49,8 +49,7 @@
 void
 hammer2_inode_ref(hammer2_inode_t *ip)
 {
-       KKASSERT(ip->chain.refs > 0);
-       atomic_add_int(&ip->chain.refs, 1);
+       hammer2_chain_ref(ip->hmp, &ip->chain);
 }
 
 /*
@@ -60,8 +59,7 @@ hammer2_inode_ref(hammer2_inode_t *ip)
 void
 hammer2_inode_drop(hammer2_inode_t *ip)
 {
-       if (atomic_fetchadd_int(&ip->chain.refs, -1) == 1)
-               hammer2_inode_free(ip);
+       hammer2_chain_drop(ip->hmp, &ip->chain);
 }
 
 /*
@@ -145,18 +143,19 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
                }
 
                kprintf("igetv new\n");
-               switch (ip->data.type) {
+               switch (ip->ip_data.type) {
                case HAMMER2_OBJTYPE_DIRECTORY:
                        vp->v_type = VDIR;
                        break;
                case HAMMER2_OBJTYPE_REGFILE:
                        vp->v_type = VREG;
                        vinitvmio(vp, 0, HAMMER2_LBUFSIZE,
-                                 (int)ip->data.size & HAMMER2_LBUFMASK);
+                                 (int)ip->ip_data.size & HAMMER2_LBUFMASK);
                        break;
                /* XXX FIFO */
                default:
-                       panic("hammer2: unhandled objtype %d", ip->data.type);
+                       panic("hammer2: unhandled objtype %d",
+                             ip->ip_data.type);
                        break;
                }
 
@@ -177,6 +176,7 @@ hammer2_igetv(hammer2_inode_t *ip, int *errorp)
        return (vp);
 }
 
+#if 0
 /*
  * Allocate a HAMMER2 inode memory structure.
  *
@@ -220,3 +220,5 @@ hammer2_inode_free(hammer2_inode_t *ip)
 
        kfree(ip, hmp->inodes);
 }
+
+#endif
index efa7011..d7c7574 100644 (file)
 void
 hammer2_inode_lock_sh(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_SHARED);
+       lockmgr(&ip->chain.lk, LK_SHARED);
 }
 
 void
 hammer2_inode_lock_up(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_EXCLUSIVE);
+       lockmgr(&ip->chain.lk, LK_EXCLUSIVE);
        ++ip->chain.busy;
-       lockmgr(&ip->lk, LK_DOWNGRADE);
+       lockmgr(&ip->chain.lk, LK_DOWNGRADE);
 }
 
 void
 hammer2_inode_lock_ex(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_EXCLUSIVE);
+       lockmgr(&ip->chain.lk, LK_EXCLUSIVE);
 }
 
 void
 hammer2_inode_unlock_ex(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_RELEASE);
+       lockmgr(&ip->chain.lk, LK_RELEASE);
 }
 
 void
 hammer2_inode_unlock_up(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_UPGRADE);
+       lockmgr(&ip->chain.lk, LK_UPGRADE);
        --ip->chain.busy;
-       lockmgr(&ip->lk, LK_RELEASE);
+       lockmgr(&ip->chain.lk, LK_RELEASE);
 }
 
 void
 hammer2_inode_unlock_sh(hammer2_inode_t *ip)
 {
-       lockmgr(&ip->lk, LK_RELEASE);
+       lockmgr(&ip->chain.lk, LK_RELEASE);
 }
 
 /*
@@ -107,19 +107,19 @@ hammer2_inode_unlock_sh(hammer2_inode_t *ip)
 void
 hammer2_mount_exlock(hammer2_mount_t *hmp)
 {
-       lockmgr(&hmp->lk, LK_EXCLUSIVE);
+       lockmgr(&hmp->vchain.lk, LK_EXCLUSIVE);
 }
 
 void
 hammer2_mount_shlock(hammer2_mount_t *hmp)
 {
-       lockmgr(&hmp->lk, LK_SHARED);
+       lockmgr(&hmp->vchain.lk, LK_SHARED);
 }
 
 void
 hammer2_mount_unlock(hammer2_mount_t *hmp)
 {
-       lockmgr(&hmp->lk, LK_RELEASE);
+       lockmgr(&hmp->vchain.lk, LK_RELEASE);
 }
 
 /*
index f7f8deb..be9468f 100644 (file)
@@ -143,6 +143,7 @@ hammer2_mount(struct mount *mp, char *path, caddr_t data,
        hammer2_key_t lhc;
        struct vnode *devvp;
        struct nlookupdata nd;
+       hammer2_chain_t *parent;
        hammer2_chain_t *schain;
        hammer2_chain_t *rchain;
        char devstr[MNAMELEN];
@@ -248,14 +249,20 @@ hammer2_mount(struct mount *mp, char *path, caddr_t data,
        hmp->mp = mp;
        hmp->ronly = ronly;
        hmp->devvp = devvp;
-       lockinit(&hmp->lk, "h2mp", 0, 0);
-       kmalloc_create(&hmp->inodes, "HAMMER2-inodes");
-       kmalloc_create(&hmp->ipstacks, "HAMMER2-ipstacks");
+       kmalloc_create(&hmp->minode, "HAMMER2-inodes");
+       kmalloc_create(&hmp->mchain, "HAMMER2-chains");
        
        mp->mnt_flag = MNT_LOCAL;
        mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;   /* all entry pts are SMP */
 
+       /*
+        * vchain setup. vchain.data is special cased to NULL.  vchain.refs
+        * is initialized and will never drop to 0.
+        */
        hmp->vchain.bref.type = HAMMER2_BREF_TYPE_VOLUME;
+       hmp->vchain.refs = 1;
+       /* hmp->vchain.data is special cased to NULL */
+       lockinit(&hmp->vchain.lk, "volume", 0, LK_CANRECURSE);
 
        /*
         * Install the volume header
@@ -283,33 +290,43 @@ hammer2_mount(struct mount *mp, char *path, caddr_t data,
         * represented by the label.
         */
        lhc = hammer2_dirhash(label, strlen(label));
-       schain = hammer2_chain_push(hmp, &hmp->vchain, HAMMER2_SROOT_KEY);
+       parent = &hmp->vchain;
+       hammer2_chain_ref(hmp, parent);
+       hammer2_chain_lock(hmp, parent);
+       schain = hammer2_chain_lookup(hmp, &parent,
+                                     HAMMER2_SROOT_KEY, (hammer2_key_t)-1);
+       hammer2_chain_put(hmp, parent);
        if (schain == NULL) {
                kprintf("hammer2_mount: invalid super-root\n");
                hammer2_unmount(mp, MNT_FORCE);
                return EINVAL;
        }
-       rchain = hammer2_chain_first(hmp, schain, lhc, HAMMER2_DIRHASH_LOMASK);
+
+       parent = schain;
+       hammer2_chain_ref(hmp, parent);
+       rchain = hammer2_chain_lookup(hmp, &parent,
+                                     lhc, HAMMER2_DIRHASH_HIMASK);
        while (rchain) {
                if (rchain->bref.type == HAMMER2_BREF_TYPE_INODE &&
                    rchain->u.ip &&
-                   strcmp(label, rchain->u.ip->data.filename) == 0) {
+                   strcmp(label, rchain->data->ipdata.filename) == 0) {
                        break;
                }
-               rchain = hammer2_chain_next(hmp, rchain,
-                                           lhc, HAMMER2_DIRHASH_LOMASK);
+               rchain = hammer2_chain_next(hmp, &parent, rchain,
+                                           lhc, HAMMER2_DIRHASH_HIMASK);
        }
+       hammer2_chain_put(hmp, parent);
        if (rchain == NULL) {
                kprintf("hammer2_mount: root label not found\n");
                hammer2_chain_drop(hmp, schain);
                hammer2_unmount(mp, MNT_FORCE);
                return EINVAL;
        }
-       hmp->schain = schain;
-       hmp->rchain = rchain;
-       hmp->iroot = rchain->u.ip;
+
+       hmp->schain = schain;           /* left held & unlocked */
+       hmp->rchain = rchain;           /* left held & unlocked */
+       hmp->iroot = rchain->u.ip;      /* implied hold from rchain */
        kprintf("iroot %p\n", rchain->u.ip);
-       hammer2_inode_ref(hmp->iroot);  /* additional ref */
 
        vfs_getnewfsid(mp);
        vfs_add_vnodeops(mp, &hammer2_vnode_vops, &mp->mnt_vn_norm_ops);
@@ -374,18 +391,17 @@ hammer2_unmount(struct mount *mp, int mntflags)
         *      3) Destroy the kmalloc inode zone
         *      4) Free the mount point
         */
+       hmp->iroot = NULL;
        if (hmp->rchain) {
+               KKASSERT(hmp->rchain->refs == 1);
                hammer2_chain_drop(hmp, hmp->rchain);
                hmp->rchain = NULL;
        }
        if (hmp->schain) {
+               KKASSERT(hmp->schain->refs == 1);
                hammer2_chain_drop(hmp, hmp->schain);
                hmp->schain = NULL;
        }
-       if (hmp->iroot) {
-               hammer2_inode_drop(hmp->iroot);
-               hmp->iroot = NULL;
-       }
        if ((devvp = hmp->devvp) != NULL) {
                vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
                hmp->devvp = NULL;
@@ -394,8 +410,8 @@ hammer2_unmount(struct mount *mp, int mntflags)
                devvp = NULL;
        }
 
-       kmalloc_destroy(&hmp->inodes);
-       kmalloc_destroy(&hmp->ipstacks);
+       kmalloc_destroy(&hmp->minode);
+       kmalloc_destroy(&hmp->mchain);
 
        hammer2_mount_unlock(hmp);