HAMMER 46/Many: Performance pass, media changes, bug fixes.
authorMatthew Dillon <dillon@dragonflybsd.org>
Sun, 18 May 2008 01:48:50 +0000 (01:48 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sun, 18 May 2008 01:48:50 +0000 (01:48 +0000)
* Add a localization field to the B-Tree element which has sorting priority
  over the object id.

  Use the localization field to separate inode entries from file data.  This
  allows the reblocker to cluster inode information together and greatly
  improves directory/stat performance.

* Enhance the reblocker to reblock internal B-Tree nodes as well as leaves.

* Enhance the reblocker by adding 'reblock-inodes' in addition to
  'reblock-data' and 'reblock-btree', allowing individual types of
  meta-data to be independantly reblocked.

* Fix a bug in hammer_bread().  The buffer's zoneX_offset field was
  sometimes not being properly masked, resulting in unnecessary blockmap
  lookups.  Also add hammer_clrxlate_buffer() to clear the translation
  cache for a hammer_buffer.

* Fix numerous issues with hmp->sync_lock.

* Fix a buffer exhaustion issue in the pruner and reblocker due to not
  counting I/O's in progress as being dirty.

* Enhance the symlink implementation.  Take advantage of the extra 24 bytes
  of space in the inode data to directly store symlinks <= 24 bytes.

* Use cluster_read() to gang read I/O's into 64KB chunks.  Rely on
  localization and the reblocker and pruner to make doing the larger
  I/O's worthwhile.

These changes reduce ls -lR overhead on 43383 files (half created with cpdup,
half totally randomly created with blogbench).  Overhead went from 35 seconds
after reblocking, before the changes, to 5 seconds after reblocking,
after the changes.

20 files changed:
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_blockmap.c
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_ioctl.h
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_prune.c
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_recover.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_transaction.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 68b9cdb..dcdb28c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.68 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.69 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -90,6 +90,7 @@ struct hammer_transaction {
        struct hammer_mount *hmp;
        hammer_tid_t    tid;
        hammer_tid_t    time;
+       int             sync_lock_refs;
        struct hammer_volume *rootvol;
 };
 
@@ -402,7 +403,8 @@ struct hammer_volume {
        int32_t vol_no;
        int64_t nblocks;        /* note: special calculation for statfs */
        int64_t buffer_base;    /* base offset of buffer 0 */
-       hammer_off_t maxbuf_off; /* Maximum buffer offset */
+       hammer_off_t maxbuf_off; /* Maximum buffer offset (zone-2) */
+       hammer_off_t maxraw_off; /* Maximum raw offset for device */
        char    *vol_name;
        struct vnode *devvp;
        int     vol_flags;
@@ -593,6 +595,7 @@ extern struct vop_ops hammer_spec_vops;
 extern struct vop_ops hammer_fifo_vops;
 extern struct bio_ops hammer_bioops;
 
+extern int hammer_debug_io;
 extern int hammer_debug_general;
 extern int hammer_debug_debug;
 extern int hammer_debug_inode;
@@ -660,6 +663,10 @@ void       hammer_unlock(struct hammer_lock *lock);
 void   hammer_ref(struct hammer_lock *lock);
 void   hammer_unref(struct hammer_lock *lock);
 
+void   hammer_sync_lock_ex(hammer_transaction_t trans);
+void   hammer_sync_lock_sh(hammer_transaction_t trans);
+void   hammer_sync_unlock(hammer_transaction_t trans);
+
 u_int32_t hammer_to_unix_xid(uuid_t *uuid);
 void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
 void   hammer_to_timespec(hammer_tid_t tid, struct timespec *ts);
@@ -699,9 +706,12 @@ int        hammer_btree_chkts(hammer_tid_t ts, hammer_base_elm_t key);
 int    hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid);
 int    hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid);
 
-
+int    btree_set_parent(hammer_transaction_t trans, hammer_node_t node,
+                        hammer_btree_elm_t elm);
 int    hammer_btree_lock_children(hammer_cursor_t cursor,
                         struct hammer_node_locklist **locklistp);
+void   hammer_btree_unlock_children(struct hammer_node_locklist **locklistp);
+
 
 void   hammer_print_btree_node(hammer_node_ondisk_t ondisk);
 void   hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i);
@@ -719,6 +729,8 @@ hammer_volume_t     hammer_get_volume(hammer_mount_t hmp,
                        int32_t vol_no, int *errorp);
 hammer_buffer_t        hammer_get_buffer(hammer_mount_t hmp,
                        hammer_off_t buf_offset, int isnew, int *errorp);
+void           hammer_clrxlate_buffer(hammer_mount_t hmp,
+                       hammer_off_t buf_offset);
 void   hammer_uncache_buffer(struct hammer_mount *hmp, hammer_off_t off);
 
 int            hammer_ref_volume(hammer_volume_t volume);
@@ -826,7 +838,8 @@ int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
 void hammer_io_init(hammer_io_t io, hammer_mount_t hmp,
                        enum hammer_io_type type);
 void hammer_io_reinit(hammer_io_t io, enum hammer_io_type type);
-int hammer_io_read(struct vnode *devvp, struct hammer_io *io);
+int hammer_io_read(struct vnode *devvp, struct hammer_io *io,
+                       hammer_off_t limit);
 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
 void hammer_io_release(struct hammer_io *io, int flush);
 void hammer_io_flush(struct hammer_io *io);
index 06c293c..c374002 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.12 2008/05/06 00:21:07 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.13 2008/05/18 01:48:50 dillon Exp $
  */
 
 /*
@@ -379,6 +379,8 @@ hammer_blockmap_free(hammer_transaction_t trans,
                    ~HAMMER_LARGEBLOCK_MASK64) {
                        hammer_freemap_free(trans, layer2->u.phys_offset,
                                            bmap_off, &error);
+                       hammer_clrxlate_buffer(trans->hmp,
+                                              layer2->u.phys_offset);
                        layer2->u.phys_offset = HAMMER_BLOCKMAP_FREE;
 
                        hammer_modify_buffer(trans, buffer1,
index 42ea29e..85575f4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.48 2008/05/13 20:46:54 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.49 2008/05/18 01:48:50 dillon Exp $
  */
 
 /*
@@ -86,13 +86,9 @@ static int btree_search(hammer_cursor_t cursor, int flags);
 static int btree_split_internal(hammer_cursor_t cursor);
 static int btree_split_leaf(hammer_cursor_t cursor);
 static int btree_remove(hammer_cursor_t cursor);
-static int btree_set_parent(hammer_transaction_t trans, hammer_node_t node,
-                       hammer_btree_elm_t elm);
 static int btree_node_is_full(hammer_node_ondisk_t node);
 static void hammer_make_separator(hammer_base_elm_t key1,
                        hammer_base_elm_t key2, hammer_base_elm_t dest);
-static void hammer_btree_unlock_children(
-                       struct hammer_node_locklist **locklistp);
 
 /*
  * Iterate records after a search.  The cursor is iterated forwards past
@@ -166,6 +162,15 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        /* reload stale pointer */
                        node = cursor->node->ondisk;
                        KKASSERT(cursor->index != node->count);
+
+                       /*
+                        * If we are reblocking we want to return internal
+                        * nodes.
+                        */
+                       if (cursor->flags & HAMMER_CURSOR_REBLOCKING) {
+                               cursor->flags |= HAMMER_CURSOR_ATEDISK;
+                               return(0);
+                       }
                        ++cursor->index;
                        continue;
                }
@@ -181,21 +186,23 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        r = hammer_btree_cmp(&cursor->key_end, &elm[0].base);
                        s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base);
                        if (hammer_debug_btree) {
-                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx %d (td=%p)\n",
+                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx lo=%02x %d (td=%p)\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        elm[0].internal.base.obj_id,
                                        elm[0].internal.base.rec_type,
                                        elm[0].internal.base.key,
+                                       elm[0].internal.base.localization,
                                        r,
                                        curthread
                                );
-                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx %d\n",
+                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx lo=%02x %d\n",
                                        cursor->node->node_offset,
                                        cursor->index + 1,
                                        elm[1].internal.base.obj_id,
                                        elm[1].internal.base.rec_type,
                                        elm[1].internal.base.key,
+                                       elm[1].internal.base.localization,
                                        s
                                );
                        }
@@ -227,7 +234,7 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                        elm = &node->elms[cursor->index];
                        r = hammer_btree_cmp(&cursor->key_end, &elm->base);
                        if (hammer_debug_btree) {
-                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx %d\n",
+                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx lo=%02x %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        (elm[0].leaf.base.btype ?
@@ -235,6 +242,7 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                                        elm[0].leaf.base.obj_id,
                                        elm[0].leaf.base.rec_type,
                                        elm[0].leaf.base.key,
+                                       elm[0].leaf.base.localization,
                                        r
                                );
                        }
@@ -278,11 +286,12 @@ hammer_btree_iterate(hammer_cursor_t cursor)
                if (hammer_debug_btree) {
                        int i = cursor->index;
                        hammer_btree_elm_t elm = &cursor->node->ondisk->elms[i];
-                       kprintf("ITERATE  %p:%d %016llx %02x %016llx\n",
+                       kprintf("ITERATE  %p:%d %016llx %02x %016llx lo=%02x\n",
                                cursor->node, i,
                                elm->internal.base.obj_id,
                                elm->internal.base.rec_type,
-                               elm->internal.base.key
+                               elm->internal.base.key,
+                               elm->internal.base.localization
                        );
                }
                return(0);
@@ -351,20 +360,22 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                        r = hammer_btree_cmp(&cursor->key_end, &elm[0].base);
                        s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base);
                        if (hammer_debug_btree) {
-                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx %d\n",
+                               kprintf("BRACKETL %016llx[%d] %016llx %02x %016llx lo=%02x %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        elm[0].internal.base.obj_id,
                                        elm[0].internal.base.rec_type,
                                        elm[0].internal.base.key,
+                                       elm[0].internal.base.localization,
                                        r
                                );
-                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx %d\n",
+                               kprintf("BRACKETR %016llx[%d] %016llx %02x %016llx lo=%02x %d\n",
                                        cursor->node->node_offset,
                                        cursor->index + 1,
                                        elm[1].internal.base.obj_id,
                                        elm[1].internal.base.rec_type,
                                        elm[1].internal.base.key,
+                                       elm[1].internal.base.localization,
                                        s
                                );
                        }
@@ -394,7 +405,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                        elm = &node->elms[cursor->index];
                        s = hammer_btree_cmp(&cursor->key_beg, &elm->base);
                        if (hammer_debug_btree) {
-                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx %d\n",
+                               kprintf("ELEMENT  %016llx:%d %c %016llx %02x %016llx lo=%02x %d\n",
                                        cursor->node->node_offset,
                                        cursor->index,
                                        (elm[0].leaf.base.btype ?
@@ -402,6 +413,7 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                                        elm[0].leaf.base.obj_id,
                                        elm[0].leaf.base.rec_type,
                                        elm[0].leaf.base.key,
+                                       elm[0].leaf.base.localization,
                                        s
                                );
                        }
@@ -435,11 +447,12 @@ hammer_btree_iterate_reverse(hammer_cursor_t cursor)
                if (hammer_debug_btree) {
                        int i = cursor->index;
                        hammer_btree_elm_t elm = &cursor->node->ondisk->elms[i];
-                       kprintf("ITERATE  %p:%d %016llx %02x %016llx\n",
+                       kprintf("ITERATE  %p:%d %016llx %02x %016llx lo=%02x\n",
                                cursor->node, i,
                                elm->internal.base.obj_id,
                                elm->internal.base.rec_type,
-                               elm->internal.base.key
+                               elm->internal.base.key,
+                               elm->internal.base.localization
                        );
                }
                return(0);
@@ -818,13 +831,14 @@ btree_search(hammer_cursor_t cursor, int flags)
        flags |= cursor->flags;
 
        if (hammer_debug_btree) {
-               kprintf("SEARCH   %016llx[%d] %016llx %02x key=%016llx cre=%016llx (td = %p)\n",
+               kprintf("SEARCH   %016llx[%d] %016llx %02x key=%016llx cre=%016llx lo=%02x (td = %p)\n",
                        cursor->node->node_offset, 
                        cursor->index,
                        cursor->key_beg.obj_id,
                        cursor->key_beg.rec_type,
                        cursor->key_beg.key,
                        cursor->key_beg.create_tid, 
+                       cursor->key_beg.localization, 
                        curthread
                );
                if (cursor->parent)
@@ -1042,13 +1056,14 @@ btree_search(hammer_cursor_t cursor, int flags)
 
                if (hammer_debug_btree) {
                        kprintf("RESULT-I %016llx[%d] %016llx %02x "
-                               "key=%016llx cre=%016llx\n",
+                               "key=%016llx cre=%016llx lo=%02x\n",
                                cursor->node->node_offset,
                                i,
                                elm->internal.base.obj_id,
                                elm->internal.base.rec_type,
                                elm->internal.base.key,
-                               elm->internal.base.create_tid
+                               elm->internal.base.create_tid,
+                               elm->internal.base.localization
                        );
                }
 
@@ -2001,7 +2016,6 @@ btree_remove(hammer_cursor_t cursor)
  *
  * XXX deadlock potential here with our exclusive locks
  */
-static
 int
 btree_set_parent(hammer_transaction_t trans, hammer_node_t node,
                 hammer_btree_elm_t elm)
@@ -2096,7 +2110,7 @@ hammer_btree_lock_children(hammer_cursor_t cursor,
 /*
  * Release previously obtained node locks.
  */
-static void
+void
 hammer_btree_unlock_children(struct hammer_node_locklist **locklistp)
 {
        hammer_node_locklist_t item;
@@ -2125,6 +2139,11 @@ hammer_btree_unlock_children(struct hammer_node_locklist **locklistp)
 int
 hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2)
 {
+       if (key1->localization < key2->localization)
+               return(-5);
+       if (key1->localization > key2->localization)
+               return(5);
+
        if (key1->obj_id < key2->obj_id)
                return(-4);
        if (key1->obj_id > key2->obj_id)
@@ -2201,22 +2220,27 @@ hammer_make_separator(hammer_base_elm_t key1, hammer_base_elm_t key2,
 
        dest->rec_type = key2->rec_type;
        dest->key = key2->key;
+       dest->obj_id = key2->obj_id;
        dest->create_tid = key2->create_tid;
 
-       MAKE_SEPARATOR(key1, key2, dest, obj_id);
-       if (key1->obj_id == key2->obj_id) {
-               MAKE_SEPARATOR(key1, key2, dest, rec_type);
-               if (key1->rec_type == key2->rec_type) {
-                       MAKE_SEPARATOR(key1, key2, dest, key);
-                       /*
-                        * Don't bother creating a separator for create_tid,
-                        * which also conveniently avoids having to handle
-                        * the create_tid == 0 (infinity) case.  Just leave
-                        * create_tid set to key2.
-                        *
-                        * Worst case, dest matches key2 exactly, which is
-                        * acceptable.
-                        */
+       MAKE_SEPARATOR(key1, key2, dest, localization);
+       if (key1->localization == key2->localization) {
+               MAKE_SEPARATOR(key1, key2, dest, obj_id);
+               if (key1->obj_id == key2->obj_id) {
+                       MAKE_SEPARATOR(key1, key2, dest, rec_type);
+                       if (key1->rec_type == key2->rec_type) {
+                               MAKE_SEPARATOR(key1, key2, dest, key);
+                               /*
+                                * Don't bother creating a separator for
+                                * create_tid, which also conveniently avoids
+                                * having to handle the create_tid == 0
+                                * (infinity) case.  Just leave create_tid
+                                * set to key2.
+                                *
+                                * Worst case, dest matches key2 exactly,
+                                * which is acceptable.
+                                */
+                       }
                }
        }
 }
@@ -2294,6 +2318,7 @@ hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i)
        kprintf("\tbtype        = %02x (%c)\n",
                elm->base.btype,
                (elm->base.btype ? elm->base.btype : '?'));
+       kprintf("\tlocalization = %02x\n", elm->base.localization);
 
        switch(type) {
        case HAMMER_BTREE_TYPE_INTERNAL:
index 25d7f2d..2065a0e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.15 2008/05/13 20:46:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.16 2008/05/18 01:48:50 dillon Exp $
  */
 
 /*
@@ -105,12 +105,24 @@ struct hammer_base_elm {
        u_int16_t rec_type;     /* 20 _RECTYPE_ */
        u_int8_t obj_type;      /* 22 _OBJTYPE_ (restricted) */
        u_int8_t btype;         /* 23 B-Tree element type */
-       int32_t reserved07;     /* 24 (future) */
+       u_int32_t localization; /* 24 B-Tree localization parameter */
                                /* 28 */
 };
 
 typedef struct hammer_base_elm *hammer_base_elm_t;
 
+/*
+ * Localization has sorting priority over the obj_id and is used to
+ * localize inodes for very fast directory scans.
+ */
+#define HAMMER_LOCALIZE_RESERVED00     0x00000000
+#define HAMMER_LOCALIZE_INODE          0x00000001
+#define HAMMER_LOCALIZE_MISC           0x00000002
+#define HAMMER_LOCALIZE_RESERVED03     0x00000003
+
+#define HAMMER_MIN_LOCALIZATION                0x00000000U
+#define HAMMER_MAX_LOCALIZATION                0xFFFFFFFFU
+
 /*
  * Internal element (40 + 24 = 64 bytes).
  *
index b2c632a..dfc8d7e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.18 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.19 2008/05/18 01:48:50 dillon Exp $
  */
 
 /*
@@ -131,6 +131,7 @@ typedef struct hammer_cursor *hammer_cursor_t;
 #define HAMMER_CURSOR_CREATE_CHECK     0x8000  /* as-of lookup */
 
 #define HAMMER_CURSOR_PRUNING          0x00010000
+#define HAMMER_CURSOR_REBLOCKING       0x00020000
 
 /*
  * Flags we can clear when reusing a cursor (we can clear all of them)
index 7c21dc2..2b2bd3c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.32 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.33 2008/05/18 01:48:50 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -579,11 +579,16 @@ struct hammer_inode_data {
        u_int64_t mtime;
        u_int64_t size;         /* filesystem object size */
        u_int64_t nlinks;       /* hard links */
-       char    reserved04[32];
+       u_int64_t reserved04;
+       union {
+               char    reserved06[24];
+               char    symlink[24];    /* HAMMER_INODE_BASESYMLEN */
+       } ext;
 };
 
 #define HAMMER_INODE_DATA_VERSION      1
 #define HAMMER_OBJID_ROOT              1
+#define HAMMER_INODE_BASESYMLEN                24
 
 /*
  * A directory entry specifies the HAMMER filesystem object id, a copy of
index 066a2f0..df048cb 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.16 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.17 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -179,6 +179,9 @@ hammer_flusher_flush(hammer_mount_t hmp)
        hammer_start_transaction_fls(&trans, hmp);
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 
+       /*
+        * Flush all pending inodes
+        */
        while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
                /*
                 * Stop when we hit a different flush group
@@ -250,7 +253,7 @@ hammer_flusher_finalize(hammer_transaction_t trans)
        int count;
        int i;
 
-       hammer_lock_ex(&hmp->sync_lock);
+       hammer_sync_lock_ex(trans);
        rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
 
        /*
@@ -377,7 +380,7 @@ hammer_flusher_finalize(hammer_transaction_t trans)
                hammer_rel_buffer((hammer_buffer_t)io, 0);
                ++count;
        }
-       hammer_unlock(&hmp->sync_lock);
+       hammer_sync_unlock(trans);
        if (count)
                hkprintf("Z%d", count);
 }
index 03a1f70..fc81cda 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.59 2008/05/13 20:46:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.60 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -236,6 +236,7 @@ loop:
         */
 retry:
        hammer_init_cursor(trans, &cursor, cache, NULL);
+       cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.key = 0;
        cursor.key_beg.create_tid = 0;
@@ -342,6 +343,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
        ip->ino_data.nlinks = 0;
        /* XXX */
        ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
        ip->ino_leaf.base.obj_id = ip->obj_id;
        ip->ino_leaf.base.key = 0;
        ip->ino_leaf.base.create_tid = 0;
@@ -416,6 +418,7 @@ retry:
        if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
            HAMMER_INODE_ONDISK) {
                hammer_normalize_cursor(cursor);
+               cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
                cursor->key_beg.obj_id = ip->obj_id;
                cursor->key_beg.key = 0;
                cursor->key_beg.create_tid = 0;
@@ -562,6 +565,7 @@ retry:
        if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
            HAMMER_INODE_ONDISK) {
                hammer_normalize_cursor(cursor);
+               cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
                cursor->key_beg.obj_id = ip->obj_id;
                cursor->key_beg.key = 0;
                cursor->key_beg.create_tid = 0;
index 5bcab68..24a201e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.31 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.32 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -167,18 +167,30 @@ hammer_io_wait(hammer_io_t io)
        }
 }
 
+#define HAMMER_MAXRA   4
+
 /*
  * Load bp for a HAMMER structure.  The io must be exclusively locked by
  * the caller.
+ *
+ * Generally speaking HAMMER assumes either an optimized layout or that
+ * typical access patterns will be close to the original layout when the
+ * information was written.  For this reason we try to cluster all reads.
  */
 int
-hammer_io_read(struct vnode *devvp, struct hammer_io *io)
+hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit)
 {
        struct buf *bp;
-       int error;
+       int   error;
 
        if ((bp = io->bp) == NULL) {
+#if 1
+               error = cluster_read(devvp, limit, io->offset,
+                                    HAMMER_BUFSIZE, MAXBSIZE, 16, &io->bp);
+#else
                error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
+#endif
+
                if (error == 0) {
                        bp = io->bp;
                        bp->b_ops = &hammer_bioops;
@@ -468,12 +480,19 @@ hammer_io_modify_done(hammer_io_t io)
        --io->modify_refs;
 }
 
+/*
+ * Caller intends to modify a volume's ondisk structure.
+ *
+ * This is only allowed if we are the flusher or we have a ref on the
+ * sync_lock.
+ */
 void
 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
                     void *base, int len)
 {
-       hammer_io_modify(&volume->io, 1);
+       KKASSERT (trans == NULL || trans->sync_lock_refs > 0);
 
+       hammer_io_modify(&volume->io, 1);
        if (len) {
                intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk;
                KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0);
@@ -484,14 +503,17 @@ hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
 }
 
 /*
- * Caller intends to modify a buffer's ondisk structure.  The related
- * cluster must be marked open prior to being able to flush the modified
- * buffer so get that I/O going now.
+ * Caller intends to modify a buffer's ondisk structure.
+ *
+ * This is only allowed if we are the flusher or we have a ref on the
+ * sync_lock.
  */
 void
 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer,
                     void *base, int len)
 {
+       KKASSERT (trans == NULL || trans->sync_lock_refs > 0);
+
        hammer_io_modify(&buffer->io, 1);
        if (len) {
                intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk;
index 8761efb..8efccbd 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.18 2008/05/13 20:46:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.19 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -149,6 +149,8 @@ hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
                 */
                cursor.key_beg.key = hist->key;
                cursor.key_end.key = HAMMER_MAX_KEY;
+               cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
+               cursor.key_end.localization = HAMMER_LOCALIZE_MISC;
 
                switch(ip->ino_data.obj_type) {
                case HAMMER_OBJTYPE_REGFILE:
@@ -174,6 +176,8 @@ hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip,
                cursor.key_end.key = 0;
                cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
                cursor.key_end.rec_type = HAMMER_RECTYPE_INODE;
+               cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
+               cursor.key_end.localization = HAMMER_LOCALIZE_INODE;
        }
 
        error = hammer_btree_first(&cursor);
index ee34d14..529f748 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.8 2008/05/13 20:46:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.9 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * HAMMER ioctl's.  This file can be #included from userland
@@ -47,7 +47,7 @@
 /*
  * Common HAMMER ioctl header
  *
- * Global flags are stored in the upper 16 bits
+ * Global flags are stored in the upper 16 bits.
  */
 struct hammer_ioc_head {
        int32_t         flags;
@@ -57,10 +57,11 @@ struct hammer_ioc_head {
 
 #define HAMMER_IOC_HEAD_INTR   0x00010000
 #define HAMMER_IOC_DO_BTREE    0x00020000      /* reblocker */
-#define HAMMER_IOC_DO_RECS     0x00040000      /* reblocker */
+#define HAMMER_IOC_DO_INODES   0x00040000      /* reblocker */
 #define HAMMER_IOC_DO_DATA     0x00080000      /* reblocker */
 
-#define HAMMER_IOC_DO_FLAGS    (HAMMER_IOC_DO_BTREE | HAMMER_IOC_DO_RECS | \
+#define HAMMER_IOC_DO_FLAGS    (HAMMER_IOC_DO_BTREE |  \
+                                HAMMER_IOC_DO_INODES | \
                                 HAMMER_IOC_DO_DATA)
 
 /*
@@ -81,6 +82,10 @@ struct hammer_ioc_prune {
        struct hammer_ioc_head head;
        int             nelms;
        int             reserved01;
+       u_int32_t       beg_localization;
+       u_int32_t       cur_localization;
+       u_int32_t       end_localization;
+       u_int32_t       reserved03;
        int64_t         beg_obj_id;
        int64_t         cur_obj_id;
        int64_t         cur_key;
@@ -148,11 +153,17 @@ struct hammer_ioc_history {
  */
 struct hammer_ioc_reblock {
        struct hammer_ioc_head head;
+       int32_t         free_level;             /* 0 for maximum compaction */
+       u_int32_t       reserved01;
+
+       u_int32_t       beg_localization;
+       u_int32_t       cur_localization;
+       u_int32_t       end_localization;
+       u_int32_t       reserved03;
+
        int64_t         beg_obj_id;
        int64_t         cur_obj_id;             /* Stopped at (interrupt) */
        int64_t         end_obj_id;
-       int32_t         free_level;             /* 0 for maximum compaction */
-       int32_t         unused01;
 
        int64_t         btree_count;            /* B-Tree nodes checked */
        int64_t         record_count;           /* Records checked */
index a24ae74..3870a9c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.58 2008/05/12 23:15:46 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.59 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -476,6 +476,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans,
                ++trans->hmp->namekey_iterator;
 
        record->type = HAMMER_MEM_RECORD_ADD;
+       record->leaf.base.localization = HAMMER_LOCALIZE_MISC;
        record->leaf.base.obj_id = dip->obj_id;
        record->leaf.base.key = hammer_directory_namekey(ncp->nc_name, bytes);
        record->leaf.base.key += trans->hmp->namekey_iterator;
@@ -623,6 +624,7 @@ hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record)
        hammer_inode_t ip = record->ip;
        int error;
 
+       KKASSERT(record->leaf.base.localization != 0);
        record->leaf.base.obj_id = ip->obj_id;
        record->leaf.base.obj_type = ip->ino_leaf.base.obj_type;
        error = hammer_mem_add(trans, record);
@@ -655,6 +657,7 @@ hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip,
        KKASSERT(bytes != 0);
 retry:
        hammer_normalize_cursor(cursor);
+       cursor->key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor->key_beg.obj_id = ip->obj_id;
        cursor->key_beg.key = offset + bytes;
        cursor->key_beg.create_tid = trans->tid;
@@ -699,6 +702,7 @@ retry:
         * undo elements.
         */
        elm.base.btype = HAMMER_BTREE_TYPE_RECORD;
+       elm.base.localization = HAMMER_LOCALIZE_MISC;
        elm.base.obj_id = ip->obj_id;
        elm.base.key = offset + bytes;
        elm.base.create_tid = trans->tid;
@@ -780,6 +784,7 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
 
        KKASSERT(record->flush_state == HAMMER_FST_FLUSH);
        KKASSERT(record->flags & HAMMER_RECF_INTERLOCK_BE);
+       KKASSERT(record->leaf.base.localization != 0);
 
        hammer_normalize_cursor(cursor);
        cursor->key_beg = record->leaf.base;
@@ -1283,6 +1288,7 @@ hammer_ip_delete_range(hammer_cursor_t cursor, hammer_inode_t ip,
        KKASSERT(trans->type == HAMMER_TRANS_FLS);
 retry:
        hammer_normalize_cursor(cursor);
+       cursor->key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor->key_beg.obj_id = ip->obj_id;
        cursor->key_beg.create_tid = 0;
        cursor->key_beg.delete_tid = 0;
@@ -1412,6 +1418,7 @@ hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip,
        KKASSERT(trans->type == HAMMER_TRANS_FLS);
 retry:
        hammer_normalize_cursor(cursor);
+       cursor->key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor->key_beg.obj_id = ip->obj_id;
        cursor->key_beg.create_tid = 0;
        cursor->key_beg.delete_tid = 0;
@@ -1610,6 +1617,7 @@ hammer_ip_check_directory_empty(hammer_transaction_t trans, hammer_inode_t ip)
         */
        hammer_init_cursor(trans, &cursor, &ip->cache[0], ip);
 
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = 0;
        cursor.key_beg.delete_tid = 0;
index b613fb4..5865213 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.45 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.46 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -215,6 +215,7 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname)
        volume->nblocks = ondisk->vol_nblocks; 
        volume->maxbuf_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
                                    ondisk->vol_buf_end - ondisk->vol_buf_beg);
+       volume->maxraw_off = ondisk->vol_buf_end;
        RB_INIT(&volume->rb_bufs_root);
 
        if (RB_EMPTY(&hmp->rb_vols_root)) {
@@ -429,7 +430,8 @@ hammer_load_volume(hammer_volume_t volume)
        hammer_lock_ex(&volume->io.lock);
 
        if (volume->ondisk == NULL) {
-               error = hammer_io_read(volume->devvp, &volume->io);
+               error = hammer_io_read(volume->devvp, &volume->io,
+                                      volume->maxraw_off);
                if (error == 0)
                        volume->ondisk = (void *)volume->io.bp->b_data;
        } else {
@@ -518,15 +520,14 @@ hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset,
         * Locate the buffer given its zone-2 offset.
         */
        buf_offset &= ~HAMMER_BUFMASK64;
-       KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) ==
-                HAMMER_ZONE_RAW_BUFFER);
+       KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
        vol_no = HAMMER_VOL_DECODE(buf_offset);
        volume = hammer_get_volume(hmp, vol_no, errorp);
        if (volume == NULL)
                return(NULL);
 
        /*
-        * NOTE: buf_offset and maxbuf_off are both full offset
+        * NOTE: buf_offset and maxbuf_off are both full zone-2 offset
         * specifications.
         */
        KKASSERT(buf_offset < volume->maxbuf_off);
@@ -580,7 +581,7 @@ again:
        /*
         * Cache the blockmap translation
         */
-       if ((zoneX_offset & HAMMER_ZONE_RAW_BUFFER) != HAMMER_ZONE_RAW_BUFFER)
+       if ((zoneX_offset & HAMMER_OFF_ZONE_MASK) != HAMMER_ZONE_RAW_BUFFER)
                buffer->zoneX_offset = zoneX_offset;
 
        /*
@@ -599,6 +600,30 @@ again:
        return(buffer);
 }
 
+/*
+ * Clear the cached zone-X translation for a buffer.
+ */
+void
+hammer_clrxlate_buffer(hammer_mount_t hmp, hammer_off_t buf_offset)
+{
+       hammer_buffer_t buffer;
+       hammer_volume_t volume;
+       int vol_no;
+       int error;
+
+       buf_offset &= ~HAMMER_BUFMASK64;
+       KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
+       vol_no = HAMMER_VOL_DECODE(buf_offset);
+       volume = hammer_get_volume(hmp, vol_no, &error);
+       if (volume == NULL)
+               return;
+       buffer = RB_LOOKUP(hammer_buf_rb_tree, &volume->rb_bufs_root,
+                          buf_offset);
+       if (buffer)
+               buffer->zoneX_offset = 0;
+       hammer_rel_volume(volume, 0);
+}
+
 static int
 hammer_load_buffer(hammer_buffer_t buffer, int isnew)
 {
@@ -612,11 +637,17 @@ hammer_load_buffer(hammer_buffer_t buffer, int isnew)
        ++buffer->io.loading;
        hammer_lock_ex(&buffer->io.lock);
 
+       if (hammer_debug_io & 0x0001) {
+               kprintf("load_buffer %016llx %016llx\n",
+                       buffer->zoneX_offset, buffer->zone2_offset);
+       }
+
        if (buffer->ondisk == NULL) {
                if (isnew) {
                        error = hammer_io_new(volume->devvp, &buffer->io);
                } else {
-                       error = hammer_io_read(volume->devvp, &buffer->io);
+                       error = hammer_io_read(volume->devvp, &buffer->io,
+                                              volume->maxraw_off);
                }
                if (error == 0)
                        buffer->ondisk = (void *)buffer->io.bp->b_data;
@@ -738,8 +769,7 @@ hammer_uncache_buffer(hammer_mount_t hmp, hammer_off_t buf_offset)
        int error;
 
        buf_offset &= ~HAMMER_BUFMASK64;
-       KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) ==
-                HAMMER_ZONE_RAW_BUFFER);
+       KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
        vol_no = HAMMER_VOL_DECODE(buf_offset);
        volume = hammer_get_volume(hmp, vol_no, &error);
        KKASSERT(volume != 0);
@@ -908,6 +938,7 @@ static int
 hammer_load_node(hammer_node_t node, int isnew)
 {
        hammer_buffer_t buffer;
+       hammer_off_t buf_offset;
        int error;
 
        error = 0;
@@ -932,9 +963,9 @@ hammer_load_node(hammer_node_t node, int isnew)
                                node->buffer = buffer;
                        }
                } else {
-                       buffer = hammer_get_buffer(node->hmp,
-                                                  node->node_offset, 0,
-                                                  &error);
+                       buf_offset = node->node_offset & ~HAMMER_BUFMASK64;
+                       buffer = hammer_get_buffer(node->hmp, buf_offset,
+                                                  0, &error);
                        if (buffer) {
                                KKASSERT(error == 0);
                                TAILQ_INSERT_TAIL(&buffer->clist,
index 2be96ff..b025da5 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.1 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.2 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -67,6 +67,7 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
        if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
                return(EINVAL);
 
+       prune->cur_localization = prune->end_localization;
        prune->cur_obj_id = prune->end_obj_id;
        prune->cur_key = HAMMER_MAX_KEY;
 
@@ -76,6 +77,7 @@ retry:
                hammer_done_cursor(&cursor);
                return(error);
        }
+       cursor.key_beg.localization = prune->beg_localization;
        cursor.key_beg.obj_id = prune->beg_obj_id;
        cursor.key_beg.key = HAMMER_MIN_KEY;
        cursor.key_beg.create_tid = 1;
@@ -83,6 +85,7 @@ retry:
        cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
        cursor.key_beg.obj_type = 0;
 
+       cursor.key_end.localization = prune->cur_localization;
        cursor.key_end.obj_id = prune->cur_obj_id;
        cursor.key_end.key = prune->cur_key;
        cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
@@ -94,14 +97,27 @@ retry:
        cursor.flags |= HAMMER_CURSOR_BACKEND;
 
        /*
-        * This flag allows the B-Tree code to clean up loose ends while
-        * it is scanning.
+        * This flag allows the B-Tree code to clean up loose ends.
         */
        cursor.flags |= HAMMER_CURSOR_PRUNING;
 
+       hammer_sync_lock_sh(trans);
        error = hammer_btree_last(&cursor);
        while (error == 0) {
+               /*
+                * Yield to more important tasks
+                */
+               if (trans->hmp->sync_lock.wanted) {
+                       hammer_sync_unlock(trans);
+                       tsleep(trans, 0, "hmrslo", hz / 10);
+                       hammer_sync_lock_sh(trans);
+               }
+
+               /*
+                * Check for work
+                */
                elm = &cursor.node->ondisk->elms[cursor.index];
+               prune->cur_localization = elm->base.localization;
                prune->cur_obj_id = elm->base.obj_id;
                prune->cur_key = elm->base.key;
 
@@ -123,10 +139,8 @@ retry:
                         */
                        isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);
 
-                       hammer_lock_ex(&trans->hmp->sync_lock);
                        error = hammer_delete_at_cursor(&cursor,
                                                        &prune->stat_bytes);
-                       hammer_unlock(&trans->hmp->sync_lock);
                        if (error)
                                break;
 
@@ -142,10 +156,8 @@ retry:
                         */
                        cursor.flags |= HAMMER_CURSOR_ATEDISK;
                } else if (realign_cre >= 0 || realign_del >= 0) {
-                       hammer_lock_ex(&trans->hmp->sync_lock);
                        error = realign_prune(prune, &cursor,
                                              realign_cre, realign_del);
-                       hammer_unlock(&trans->hmp->sync_lock);
                        if (error == 0) {
                                cursor.flags |= HAMMER_CURSOR_ATEDISK;
                                if (hammer_debug_general & 0x0200) {
@@ -169,7 +181,8 @@ retry:
                 * cache.  NOTE: We still hold locks on the cursor, we
                 * cannot call the flusher synchronously.
                 */
-               if (trans->hmp->locked_dirty_count > hammer_limit_dirtybufs) {
+               if (trans->hmp->locked_dirty_count +
+                   trans->hmp->io_running_count > hammer_limit_dirtybufs) {
                        hammer_flusher_async(trans->hmp);
                        tsleep(trans, 0, "hmrslo", hz / 10);
                }
@@ -177,6 +190,7 @@ retry:
                if (error == 0)
                        error = hammer_btree_iterate_reverse(&cursor);
        }
+       hammer_sync_unlock(trans);
        if (error == ENOENT)
                error = 0;
        hammer_done_cursor(&cursor);
index 14d1136..3cf22d7 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.15 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.16 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -50,7 +50,9 @@ static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                                 hammer_btree_elm_t elm);
 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
                                hammer_cursor_t cursor, hammer_btree_elm_t elm);
-static int hammer_reblock_node(struct hammer_ioc_reblock *reblock,
+static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
+                               hammer_cursor_t cursor, hammer_btree_elm_t elm);
+static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
                                hammer_cursor_t cursor, hammer_btree_elm_t elm);
 
 int
@@ -67,6 +69,7 @@ hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
                return(EINVAL);
 
        reblock->cur_obj_id = reblock->beg_obj_id;
+       reblock->cur_localization = reblock->beg_localization;
 
 retry:
        error = hammer_init_cursor(trans, &cursor, NULL, NULL);
@@ -74,6 +77,7 @@ retry:
                hammer_done_cursor(&cursor);
                return(error);
        }
+       cursor.key_beg.localization = reblock->cur_localization;
        cursor.key_beg.obj_id = reblock->cur_obj_id;
        cursor.key_beg.key = HAMMER_MIN_KEY;
        cursor.key_beg.create_tid = 1;
@@ -81,6 +85,7 @@ retry:
        cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
        cursor.key_beg.obj_type = 0;
 
+       cursor.key_end.localization = reblock->end_localization;
        cursor.key_end.obj_id = reblock->end_obj_id;
        cursor.key_end.key = HAMMER_MAX_KEY;
        cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
@@ -91,10 +96,22 @@ retry:
        cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
        cursor.flags |= HAMMER_CURSOR_BACKEND;
 
+       /*
+        * This flag allows the btree scan code to return internal nodes,
+        * so we can reblock them in addition to the leafs.  Only specify it
+        * if we intend to reblock B-Tree nodes.
+        */
+       if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
+               cursor.flags |= HAMMER_CURSOR_REBLOCKING;
+
        error = hammer_btree_first(&cursor);
        while (error == 0) {
+               /*
+                * Internal or Leaf node
+                */
                elm = &cursor.node->ondisk->elms[cursor.index];
                reblock->cur_obj_id = elm->base.obj_id;
+               reblock->cur_localization = elm->base.localization;
 
                /*
                 * Acquiring the sync_lock prevents the operation from
@@ -102,9 +119,9 @@ retry:
                 *
                 * NOTE: cursor.node may have changed on return.
                 */
-               hammer_lock_ex(&trans->hmp->sync_lock);
+               hammer_sync_lock_sh(trans);
                error = hammer_reblock_helper(reblock, &cursor, elm);
-               hammer_unlock(&trans->hmp->sync_lock);
+               hammer_sync_unlock(trans);
                if (error == 0) {
                        cursor.flags |= HAMMER_CURSOR_ATEDISK;
                        error = hammer_btree_iterate(&cursor);
@@ -115,7 +132,8 @@ retry:
                 * cache.  NOTE: We still hold locks on the cursor, we
                 * cannot call the flusher synchronously.
                 */
-               if (trans->hmp->locked_dirty_count > hammer_limit_dirtybufs) {
+               if (trans->hmp->locked_dirty_count +
+                   trans->hmp->io_running_count > hammer_limit_dirtybufs) {
                        hammer_flusher_async(trans->hmp);
                        tsleep(trans, 0, "hmrslo", hz / 10);
                }
@@ -150,26 +168,29 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
        int bytes;
        int cur;
 
-       if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
-               return(0);
        error = 0;
 
        /*
         * Reblock data.  Note that data embedded in a record is reblocked
-        * by the record reblock code.
+        * by the record reblock code.  Data processing only occurs at leaf
+        * nodes and for RECORD element types.
         */
+       if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
+               goto skip;
+       if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
+               return(0);
        tmp_offset = elm->leaf.data_offset;
        zone = HAMMER_ZONE_DECODE(tmp_offset);          /* can be 0 */
        if ((zone == HAMMER_ZONE_SMALL_DATA_INDEX ||
             zone == HAMMER_ZONE_LARGE_DATA_INDEX) &&
-           error == 0 && (reblock->head.flags & HAMMER_IOC_DO_DATA)) {
+           error == 0 && (reblock->head.flags & (HAMMER_IOC_DO_DATA | HAMMER_IOC_DO_INODES))) {
                ++reblock->data_count;
                reblock->data_byte_count += elm->leaf.data_len;
                bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset,
                                                &cur, &error);
+               if (hammer_debug_general & 0x4000)
+                       kprintf("D %6d/%d\n", bytes, reblock->free_level);
                if (error == 0 && cur == 0 && bytes >= reblock->free_level) {
-                       if (hammer_debug_general & 0x4000)
-                               kprintf("%6d ", bytes);
                        error = hammer_cursor_upgrade(cursor);
                        if (error == 0) {
                                error = hammer_reblock_data(reblock,
@@ -182,9 +203,9 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                }
        }
 
+skip:
        /*
-        * Reblock a B-Tree node.  Adjust elm to point at the parent's
-        * leaf entry.
+        * Reblock a B-Tree internal or leaf node.
         */
        tmp_offset = cursor->node->node_offset;
        zone = HAMMER_ZONE_DECODE(tmp_offset);
@@ -193,17 +214,27 @@ hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
                ++reblock->btree_count;
                bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset,
                                                &cur, &error);
+               if (hammer_debug_general & 0x4000)
+                       kprintf("B %6d/%d\n", bytes, reblock->free_level);
                if (error == 0 && cur == 0 && bytes >= reblock->free_level) {
-                       if (hammer_debug_general & 0x4000)
-                               kprintf("%6d ", bytes);
                        error = hammer_cursor_upgrade(cursor);
                        if (error == 0) {
                                if (cursor->parent)
                                        elm = &cursor->parent->ondisk->elms[cursor->parent_index];
                                else
                                        elm = NULL;
-                               error = hammer_reblock_node(reblock,
-                                                           cursor, elm);
+                               switch(cursor->node->ondisk->type) {
+                               case HAMMER_BTREE_TYPE_LEAF:
+                                       error = hammer_reblock_leaf_node(
+                                                       reblock, cursor, elm);
+                                       break;
+                               case HAMMER_BTREE_TYPE_INTERNAL:
+                                       error = hammer_reblock_int_node(
+                                                       reblock, cursor, elm);
+                                       break;
+                               default:
+                                       panic("Illegal B-Tree node type");
+                               }
                        }
                        if (error == 0) {
                                ++reblock->btree_moves;
@@ -259,15 +290,14 @@ done:
 }
 
 /*
- * Reblock a B-Tree (leaf) node.  The parent must be adjusted to point to
- * the new copy of the leaf node.  elm is a pointer to the parent element
- * pointing at cursor.node.
+ * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
+ * the new copy of the leaf node.
  *
- * XXX reblock internal nodes too.
+ * elm is a pointer to the parent element pointing at cursor.node.
  */
 static int
-hammer_reblock_node(struct hammer_ioc_reblock *reblock,
-                   hammer_cursor_t cursor, hammer_btree_elm_t elm)
+hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
+                        hammer_cursor_t cursor, hammer_btree_elm_t elm)
 {
        hammer_node_t onode;
        hammer_node_t nnode;
@@ -314,7 +344,97 @@ hammer_reblock_node(struct hammer_ioc_reblock *reblock,
        hammer_delete_node(cursor->trans, onode);
 
        if (hammer_debug_general & 0x4000) {
-               kprintf("REBLOCK NODE %016llx -> %016llx\n",
+               kprintf("REBLOCK LNODE %016llx -> %016llx\n",
+                       onode->node_offset, nnode->node_offset);
+       }
+       hammer_modify_node_done(nnode);
+       cursor->node = nnode;
+
+       hammer_unlock(&onode->lock);
+       hammer_rel_node(onode);
+
+       return (error);
+}
+
+/*
+ * Reblock a B-Tree internal node.  The parent must be adjusted to point to
+ * the new copy of the internal node, and the node's children's parent
+ * pointers must also be adjusted to point to the new copy.
+ *
+ * elm is a pointer to the parent element pointing at cursor.node.
+ */
+static int
+hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
+                        hammer_cursor_t cursor, hammer_btree_elm_t elm)
+{
+       hammer_node_locklist_t locklist = NULL;
+       hammer_node_t onode;
+       hammer_node_t nnode;
+       int error;
+       int i;
+
+       error = hammer_btree_lock_children(cursor, &locklist);
+       if (error)
+               goto done;
+
+       onode = cursor->node;
+       nnode = hammer_alloc_btree(cursor->trans, &error);
+
+       if (nnode == NULL)
+               goto done;
+
+       /*
+        * Move the node.  Adjust the parent's pointer to us first.
+        */
+       hammer_lock_ex(&nnode->lock);
+       hammer_modify_node_noundo(cursor->trans, nnode);
+       bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
+
+       if (elm) {
+               /*
+                * We are not the root of the B-Tree 
+                */
+               hammer_modify_node(cursor->trans, cursor->parent,
+                                  &elm->internal.subtree_offset,
+                                  sizeof(elm->internal.subtree_offset));
+               elm->internal.subtree_offset = nnode->node_offset;
+               hammer_modify_node_done(cursor->parent);
+       } else {
+               /*
+                * We are the root of the B-Tree
+                */
+                hammer_volume_t volume;
+                        
+                volume = hammer_get_root_volume(cursor->trans->hmp, &error);
+                KKASSERT(error == 0);
+
+                hammer_modify_volume_field(cursor->trans, volume,
+                                          vol0_btree_root);
+                volume->ondisk->vol0_btree_root = nnode->node_offset;
+                hammer_modify_volume_done(volume);
+                hammer_rel_volume(volume, 0);
+        }
+
+       /*
+        * Now adjust our children's pointers to us.
+        */
+       for (i = 0; i < nnode->ondisk->count; ++i) {
+               elm = &nnode->ondisk->elms[i];
+               error = btree_set_parent(cursor->trans, nnode, elm);
+               if (error)
+                       panic("reblock internal node: fixup problem");
+       }
+
+       /*
+        * Clean up.
+        *
+        * The new node replaces the current node in the cursor.  The cursor
+        * expects it to be locked so leave it locked.  Discard onode.
+        */
+       hammer_delete_node(cursor->trans, onode);
+
+       if (hammer_debug_general & 0x4000) {
+               kprintf("REBLOCK INODE %016llx -> %016llx\n",
                        onode->node_offset, nnode->node_offset);
        }
        hammer_modify_node_done(nnode);
@@ -323,6 +443,8 @@ hammer_reblock_node(struct hammer_ioc_reblock *reblock,
        hammer_unlock(&onode->lock);
        hammer_rel_node(onode);
 
+done:
+       hammer_btree_unlock_children(&locklist);
        return (error);
 }
 
index 6b70dd3..872f302 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.17 2008/05/15 03:36:40 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.18 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -209,6 +209,7 @@ hammer_recover_undo(hammer_mount_t hmp, hammer_fifo_undo_t undo, int bytes)
        hammer_fifo_tail_t tail;
        hammer_volume_t volume;
        hammer_buffer_t buffer;
+       hammer_off_t buf_offset;
        int zone;
        int error;
        int vol_no;
@@ -316,7 +317,8 @@ hammer_recover_undo(hammer_mount_t hmp, hammer_fifo_undo_t undo, int bytes)
                hammer_rel_volume(volume, 0);
                break;
        case HAMMER_ZONE_RAW_BUFFER_INDEX:
-               buffer = hammer_get_buffer(hmp, undo->undo_offset, 0, &error);
+               buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
+               buffer = hammer_get_buffer(hmp, buf_offset, 0, &error);
                if (buffer == NULL) {
                        kprintf("HAMMER: UNDO record, "
                                "cannot access buffer %016llx\n",
index 4618508..9980ad9 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.19 2008/05/12 21:17:18 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.20 2008/05/18 01:48:50 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -202,6 +202,38 @@ hammer_unref(struct hammer_lock *lock)
        crit_exit();
 }
 
+/*
+ * The sync_lock must be held when doing any modifying operations on
+ * meta-data.  The flusher holds the lock exclusively while the reblocker
+ * and pruner use a shared lock.
+ *
+ * Modifying operations can run in parallel until the flusher needs to
+ * sync the disk media.
+ */
+void
+hammer_sync_lock_ex(hammer_transaction_t trans)
+{
+       ++trans->sync_lock_refs;
+       hammer_lock_sh(&trans->hmp->sync_lock);
+}
+
+void
+hammer_sync_lock_sh(hammer_transaction_t trans)
+{
+       ++trans->sync_lock_refs;
+       hammer_lock_ex(&trans->hmp->sync_lock);
+}
+
+void
+hammer_sync_unlock(hammer_transaction_t trans)
+{
+       --trans->sync_lock_refs;
+       hammer_unlock(&trans->hmp->sync_lock);
+}
+
+/*
+ * Misc
+ */
 u_int32_t
 hammer_to_unix_xid(uuid_t *uuid)
 {
index 4aebba6..f54e56f 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.14 2008/04/29 01:10:37 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.15 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -54,6 +54,7 @@ hammer_start_transaction(struct hammer_transaction *trans,
        KKASSERT(error == 0);
        trans->tid = 0;
        trans->time = hammer_alloc_tid(trans, 1);
+       trans->sync_lock_refs = 0;
 }
 
 /*
@@ -71,11 +72,16 @@ hammer_simple_transaction(struct hammer_transaction *trans,
        KKASSERT(error == 0);
        trans->tid = 0;
        trans->time = hammer_alloc_tid(trans, 1);
+       trans->sync_lock_refs = 0;
 }
 
 /*
  * Start a transaction using a particular TID.  Used by the sync code.
  * This does not stall.
+ *
+ * This routine may only be called from the flusher thread.  We predispose
+ * sync_lock_refs, implying serialization against the synchronization stage
+ * (which the flusher is responsible for).
  */
 void
 hammer_start_transaction_fls(struct hammer_transaction *trans,
@@ -83,19 +89,27 @@ hammer_start_transaction_fls(struct hammer_transaction *trans,
 {
        int error;
 
+       KKASSERT(curthread == hmp->flusher_td);
+
        trans->type = HAMMER_TRANS_FLS;
        trans->hmp = hmp;
        trans->rootvol = hammer_get_root_volume(hmp, &error);
        KKASSERT(error == 0);
        trans->tid = hammer_alloc_tid(trans, 1);
        trans->time = trans->tid;
+       trans->sync_lock_refs = 1;
 }
 
 void
 hammer_done_transaction(struct hammer_transaction *trans)
 {
+       int expected_lock_refs;
+
        hammer_rel_volume(trans->rootvol, 0);
        trans->rootvol = NULL;
+       expected_lock_refs = (trans->type == HAMMER_TRANS_FLS) ? 1 : 0;
+       KKASSERT(trans->sync_lock_refs == expected_lock_refs);
+       trans->sync_lock_refs = 0;
 }
 
 /*
index e9b9773..8ad7610 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.34 2008/05/06 00:21:08 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.35 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -47,6 +47,7 @@
 #include <sys/buf2.h>
 #include "hammer.h"
 
+int hammer_debug_io;
 int hammer_debug_general;
 int hammer_debug_debug;
 int hammer_debug_inode;
@@ -70,6 +71,8 @@ int64_t hammer_zone_limit;
 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW,
           &hammer_debug_general, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_io, CTLFLAG_RW,
+          &hammer_debug_io, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_debug, CTLFLAG_RW,
           &hammer_debug_debug, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_inode, CTLFLAG_RW,
@@ -186,6 +189,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
                hmp->namekey_iterator = mycpu->gd_time_seconds;
                /*TAILQ_INIT(&hmp->recycle_list);*/
 
+               hmp->root_btree_beg.localization = HAMMER_MIN_LOCALIZATION;
                hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
                hmp->root_btree_beg.key = -0x8000000000000000LL;
                hmp->root_btree_beg.create_tid = 1;
@@ -193,6 +197,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
                hmp->root_btree_beg.rec_type = 0;
                hmp->root_btree_beg.obj_type = 0;
 
+               hmp->root_btree_end.localization = HAMMER_MAX_LOCALIZATION;
                hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
                hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
                hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
index ddbb419..e2740ab 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.53 2008/05/12 23:15:46 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.54 2008/05/18 01:48:50 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -666,6 +666,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        namekey = hammer_directory_namekey(ncp->nc_name, nlen);
 
        error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip);
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
         cursor.key_beg.obj_id = dip->obj_id;
        cursor.key_beg.key = namekey;
         cursor.key_beg.create_tid = 0;
@@ -1063,6 +1064,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap)
         * directly translate to a 64 bit 'seek' position.
         */
        hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = 0;
        cursor.key_beg.delete_tid = 0;
@@ -1144,14 +1146,26 @@ hammer_vop_readlink(struct vop_readlink_args *ap)
 
        ip = VTOI(ap->a_vp);
 
-       hammer_simple_transaction(&trans, ip->hmp);
+       /*
+        * Shortcut if the symlink data was stuffed into ino_data.
+        */
+       if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
+               error = uiomove(ip->ino_data.ext.symlink,
+                               ip->ino_data.size, ap->a_uio);
+               return(error);
+       }
 
+       /*
+        * Long version
+        */
+       hammer_simple_transaction(&trans, ip->hmp);
        hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
 
        /*
         * Key range (begin and end inclusive) to scan.  Directory keys
         * directly translate to a 64 bit 'seek' position.
         */
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC; /* XXX */
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = 0;
        cursor.key_beg.delete_tid = 0;
@@ -1258,6 +1272,7 @@ hammer_vop_nrename(struct vop_nrename_args *ap)
        namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
 retry:
        hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip);
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
         cursor.key_beg.obj_id = fdip->obj_id;
        cursor.key_beg.key = namekey;
         cursor.key_beg.create_tid = 0;
@@ -1546,15 +1561,21 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
         */
        if (error == 0) {
                bytes = strlen(ap->a_target);
-               record = hammer_alloc_mem_record(nip, bytes);
-               record->type = HAMMER_MEM_RECORD_GENERAL;
 
-               record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
-               record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
-               record->leaf.data_len = bytes;
-               KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
-               bcopy(ap->a_target, record->data->symlink.name, bytes);
-               error = hammer_ip_add_record(&trans, record);
+               if (bytes <= HAMMER_INODE_BASESYMLEN) {
+                       bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes);
+               } else {
+                       record = hammer_alloc_mem_record(nip, bytes);
+                       record->type = HAMMER_MEM_RECORD_GENERAL;
+
+                       record->leaf.base.localization = HAMMER_LOCALIZE_MISC;
+                       record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
+                       record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
+                       record->leaf.data_len = bytes;
+                       KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
+                       bcopy(ap->a_target, record->data->symlink.name, bytes);
+                       error = hammer_ip_add_record(&trans, record);
+               }
 
                /*
                 * Set the file size to the length of the link.
@@ -1713,6 +1734,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap)
         * stored in the actual records represent BASE+LEN, not BASE.  The
         * first record containing bio_offset will have a key > bio_offset.
         */
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
        cursor.key_beg.obj_id = ip->obj_id;
        cursor.key_beg.create_tid = 0;
        cursor.key_beg.delete_tid = 0;
@@ -1992,6 +2014,7 @@ hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
        namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
 retry:
        hammer_init_cursor(trans, &cursor, &dip->cache[0], dip);
+       cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
         cursor.key_beg.obj_id = dip->obj_id;
        cursor.key_beg.key = namekey;
         cursor.key_beg.create_tid = 0;