HAMMER VFS - Add hinting capability to block allocator, hint B-Tree
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 20 Jun 2009 21:49:12 +0000 (14:49 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 20 Jun 2009 21:49:12 +0000 (14:49 -0700)
* A hammer_off_t can now be supplied to the blockmap allocator as a hint.

* Use the hinting mechanism to better-localize B-Tree node allocations
  and meta-data updates.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_blockmap.c
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_mirror.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_reblock.c

index 40df11f..3daea5b 100644 (file)
@@ -1050,10 +1050,12 @@ void            hammer_flush_node(hammer_node_t node);
 
 void hammer_dup_buffer(struct hammer_buffer **bufferp,
                        struct hammer_buffer *buffer);
-hammer_node_t hammer_alloc_btree(hammer_transaction_t trans, int *errorp);
+hammer_node_t hammer_alloc_btree(hammer_transaction_t trans,
+                       hammer_off_t hint, int *errorp);
 void *hammer_alloc_data(hammer_transaction_t trans, int32_t data_len,
                        u_int16_t rec_type, hammer_off_t *data_offsetp,
-                       struct hammer_buffer **data_bufferp, int *errorp);
+                       struct hammer_buffer **data_bufferp,
+                       hammer_off_t hint, int *errorp);
 
 int hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io,
                        hammer_off_t zone1_offset, void *base, int len);
@@ -1067,7 +1069,7 @@ void hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset,
                        hammer_off_t owner, int *errorp);
 int hammer_checkspace(hammer_mount_t hmp, int slop);
 hammer_off_t hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
-                       int bytes, int *errorp);
+                       int bytes, hammer_off_t hint, int *errorp);
 hammer_reserve_t hammer_blockmap_reserve(hammer_mount_t hmp, int zone,
                        int bytes, hammer_off_t *zone_offp, int *errorp);
 void hammer_blockmap_reserve_complete(hammer_mount_t hmp,
index 069fff6..afac580 100644 (file)
@@ -65,8 +65,8 @@ hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
  * Allocate bytes from a zone
  */
 hammer_off_t
-hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
-                     int bytes, int *errorp)
+hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
+                     hammer_off_t hint, int *errorp)
 {
        hammer_mount_t hmp;
        hammer_volume_t root_volume;
@@ -86,6 +86,7 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
        hammer_off_t base_off;
        int loops = 0;
        int offset;             /* offset within big-block */
+       int use_hint;
 
        hmp = trans->hmp;
 
@@ -108,8 +109,26 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
        freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
        KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
 
-       next_offset = blockmap->next_offset;
+       /*
+        * Use the hint if we have one.
+        */
+       if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
+               next_offset = (hint + 15) & ~(hammer_off_t)15;
+               use_hint = 1;
+       } else {
+               next_offset = blockmap->next_offset;
+               use_hint = 0;
+       }
 again:
+
+       /*
+        * use_hint is turned off if we leave the hinted big-block.
+        */
+       if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
+               next_offset = blockmap->next_offset;
+               use_hint = 0;
+       }
+
        /*
         * Check for wrap
         */
@@ -207,6 +226,26 @@ again:
                goto again;
        }
 
+       /*
+        * If operating in the current non-hint blockmap block, do not
+        * allow it to get over-full.  Also drop any active hinting so
+        * blockmap->next_offset is updated at the end.
+        *
+        * We do this for B-Tree and meta-data allocations to provide
+        * localization for updates.
+        */
+       if ((zone == HAMMER_ZONE_BTREE_INDEX ||
+            zone == HAMMER_ZONE_META_INDEX) &&
+           offset >= HAMMER_LARGEBLOCK_OVERFILL &&
+           !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
+       ) {
+               if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
+                       next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
+                       use_hint = 0;
+                       goto again;
+               }
+       }
+
        /*
         * We need the lock from this point on.  We have to re-check zone
         * ownership after acquiring the lock and also check for reservations.
@@ -310,11 +349,15 @@ again:
        result_offset = next_offset;
 
        /*
-        * Process allocated result_offset
+        * If we weren't supplied with a hint or could not use the hint
+        * then we wound up using blockmap->next_offset as the hint and
+        * need to save it.
         */
-       hammer_modify_volume(NULL, root_volume, NULL, 0);
-       blockmap->next_offset = next_offset + bytes;
-       hammer_modify_volume_done(root_volume);
+       if (use_hint == 0) {
+               hammer_modify_volume(NULL, root_volume, NULL, 0);
+               blockmap->next_offset = next_offset + bytes;
+               hammer_modify_volume_done(root_volume);
+       }
        hammer_unlock(&hmp->blkmap_lock);
 failed:
 
index d208df7..f3fe3ad 100644 (file)
@@ -1425,6 +1425,7 @@ btree_split_internal(hammer_cursor_t cursor)
        hammer_btree_elm_t parent_elm;
        struct hammer_node_lock lockroot;
        hammer_mount_t hmp = cursor->trans->hmp;
+       hammer_off_t hint;
        int parent_index;
        int made_root;
        int split;
@@ -1458,7 +1459,8 @@ btree_split_internal(hammer_cursor_t cursor)
         * modifications until we know the whole operation will work.
         */
        if (ondisk->parent == 0) {
-               parent = hammer_alloc_btree(cursor->trans, &error);
+               parent = hammer_alloc_btree(cursor->trans, node->node_offset,
+                                           &error);
                if (parent == NULL)
                        goto done;
                hammer_lock_ex(&parent->lock);
@@ -1482,10 +1484,24 @@ btree_split_internal(hammer_cursor_t cursor)
                parent_index = cursor->parent_index;
        }
 
+       /*
+        * Calculate a hint for the allocation of the new B-Tree node.
+        * The most likely expansion is coming from the insertion point
+        * at cursor->index, so try to localize the allocation of our
+        * new node to accomodate that sub-tree.
+        *
+        * Use the right-most sub-tree when expandinging on the right edge.
+        * This is a very common case when copying a directory tree.
+        */
+       if (cursor->index == ondisk->count)
+               hint = ondisk->elms[cursor->index - 1].internal.subtree_offset;
+       else
+               hint = ondisk->elms[cursor->index].internal.subtree_offset;
+
        /*
         * Split node into new_node at the split point.
         *
-        *  B O O O P N N B     <-- P = node->elms[split]
+        *  B O O O P N N B     <-- P = node->elms[split] (index 4)
         *   0 1 2 3 4 5 6      <-- subtree indices
         *
         *       x x P x x
@@ -1493,9 +1509,8 @@ btree_split_internal(hammer_cursor_t cursor)
         *         /   \
         *  B O O O B    B N N B        <--- inner boundary points are 'P'
         *   0 1 2 3      4 5 6  
-        *
         */
-       new_node = hammer_alloc_btree(cursor->trans, &error);
+       new_node = hammer_alloc_btree(cursor->trans, hint, &error);
        if (new_node == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
@@ -1650,6 +1665,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        hammer_btree_elm_t elm;
        hammer_btree_elm_t parent_elm;
        hammer_base_elm_t mid_boundary;
+       hammer_off_t hint;
        int parent_index;
        int made_root;
        int split;
@@ -1694,7 +1710,8 @@ btree_split_leaf(hammer_cursor_t cursor)
         * until we know the whole operation will work.
         */
        if (ondisk->parent == 0) {
-               parent = hammer_alloc_btree(cursor->trans, &error);
+               parent = hammer_alloc_btree(cursor->trans, leaf->node_offset,
+                                           &error);
                if (parent == NULL)
                        goto done;
                hammer_lock_ex(&parent->lock);
@@ -1718,6 +1735,25 @@ btree_split_leaf(hammer_cursor_t cursor)
                parent_index = cursor->parent_index;
        }
 
+       /*
+        * Calculate a hint for the allocation of the new B-Tree leaf node.
+        * For now just try to localize it within the same bigblock as
+        * the current leaf.
+        *
+        * If the insertion point is at the end of the leaf we recognize a
+        * likely append sequence of some sort (data, meta-data, inodes,
+        * whatever).  Set the hint to zero to allocate out of linear space
+        * instead of trying to completely fill previously hinted space.
+        *
+        * This also sets the stage for recursive splits to localize using
+        * the new space.
+        */
+       ondisk = leaf->ondisk;
+       if (cursor->index == ondisk->count)
+               hint = 0;
+       else
+               hint = leaf->node_offset;
+
        /*
         * Split leaf into new_leaf at the split point.  Select a separator
         * value in-between the two leafs but with a bent towards the right
@@ -1730,7 +1766,7 @@ btree_split_leaf(hammer_cursor_t cursor)
         *         /   \
         *  L L L L     L L L L
         */
-       new_leaf = hammer_alloc_btree(cursor->trans, &error);
+       new_leaf = hammer_alloc_btree(cursor->trans, hint, &error);
        if (new_leaf == NULL) {
                if (made_root) {
                        hammer_unlock(&parent->lock);
index 46758e8..8da1e1d 100644 (file)
@@ -197,8 +197,15 @@ typedef u_int32_t hammer_crc_t;
  * offset into a raw zone 2 offset.  Each layer handles 18 bits.  The 8M
  * large-block size is 23 bits so two layers gives us 23+18+18 = 59 bits
  * of address space.
+ *
+ * When using hinting for a blockmap lookup, the hint is lost when the
+ * scan leaves the HINTBLOCK, which is typically several LARGEBLOCK's.
+ * HINTBLOCK is a heuristic.
  */
+#define HAMMER_HINTBLOCK_SIZE          (HAMMER_LARGEBLOCK_SIZE * 4)
+#define HAMMER_HINTBLOCK_MASK64                ((u_int64_t)HAMMER_HINTBLOCK_SIZE - 1)
 #define HAMMER_LARGEBLOCK_SIZE         (8192 * 1024)
+#define HAMMER_LARGEBLOCK_OVERFILL     (6144 * 1024)
 #define HAMMER_LARGEBLOCK_SIZE64       ((u_int64_t)HAMMER_LARGEBLOCK_SIZE)
 #define HAMMER_LARGEBLOCK_MASK         (HAMMER_LARGEBLOCK_SIZE - 1)
 #define HAMMER_LARGEBLOCK_MASK64       ((u_int64_t)HAMMER_LARGEBLOCK_SIZE - 1)
index 2267902..3c4b966 100644 (file)
@@ -805,7 +805,8 @@ hammer_mirror_write(hammer_cursor_t cursor,
        if (mrec->leaf.data_len && mrec->leaf.data_offset) {
                ndata = hammer_alloc_data(trans, mrec->leaf.data_len,
                                          mrec->leaf.base.rec_type,
-                                         &ndata_offset, &data_buffer, &error);
+                                         &ndata_offset, &data_buffer,
+                                         0, &error);
                if (ndata == NULL)
                        return(error);
                mrec->leaf.data_offset = ndata_offset;
index 1ec890a..5068cda 100644 (file)
@@ -1168,10 +1168,10 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
        /*
         * We are inserting.
         *
-        * Issue a lookup to position the cursor and locate the cluster.  The
-        * target key should not exist.  If we are creating a directory entry
-        * we may have to iterate the low 32 bits of the key to find an unused
-        * key.
+        * Issue a lookup to position the cursor and locate the insertion
+        * point.  The target key should not exist.  If we are creating a
+        * directory entry we may have to iterate the low 32 bits of the
+        * key to find an unused key.
         */
        hammer_sync_lock_sh(trans);
        cursor->flags |= HAMMER_CURSOR_INSERT;
@@ -1219,7 +1219,8 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
                bdata = hammer_alloc_data(trans, record->leaf.data_len,
                                          record->leaf.base.rec_type,
                                          &record->leaf.data_offset,
-                                         &cursor->data_buffer, &error);
+                                         &cursor->data_buffer,
+                                         0, &error);
                if (bdata == NULL)
                        goto done_unlock;
                hammer_crc_set_leaf(record->data, &record->leaf);
index cc1bf08..ff24acf 100644 (file)
@@ -1412,7 +1412,7 @@ hammer_flush_buffer_nodes(hammer_buffer_t buffer)
  * Allocate a B-Tree node.
  */
 hammer_node_t
-hammer_alloc_btree(hammer_transaction_t trans, int *errorp)
+hammer_alloc_btree(hammer_transaction_t trans, hammer_off_t hint, int *errorp)
 {
        hammer_buffer_t buffer = NULL;
        hammer_node_t node = NULL;
@@ -1420,7 +1420,7 @@ hammer_alloc_btree(hammer_transaction_t trans, int *errorp)
 
        node_offset = hammer_blockmap_alloc(trans, HAMMER_ZONE_BTREE_INDEX,
                                            sizeof(struct hammer_node_ondisk),
-                                           errorp);
+                                           hint, errorp);
        if (*errorp == 0) {
                node = hammer_get_node(trans, node_offset, 1, errorp);
                hammer_modify_node_noundo(trans, node);
@@ -1445,7 +1445,8 @@ hammer_alloc_btree(hammer_transaction_t trans, int *errorp)
 void *
 hammer_alloc_data(hammer_transaction_t trans, int32_t data_len, 
                  u_int16_t rec_type, hammer_off_t *data_offsetp,
-                 struct hammer_buffer **data_bufferp, int *errorp)
+                 struct hammer_buffer **data_bufferp,
+                 hammer_off_t hint, int *errorp)
 {
        void *data;
        int zone;
@@ -1478,8 +1479,8 @@ hammer_alloc_data(hammer_transaction_t trans, int32_t data_len,
                        zone = 0;       /* NOT REACHED */
                        break;
                }
-               *data_offsetp = hammer_blockmap_alloc(trans, zone,
-                                                     data_len, errorp);
+               *data_offsetp = hammer_blockmap_alloc(trans, zone, data_len,
+                                                     hint, errorp);
        } else {
                *data_offsetp = 0;
        }
index 73774f6..f2a0c3e 100644 (file)
@@ -375,7 +375,8 @@ hammer_reblock_data(struct hammer_ioc_reblock *reblock,
                return (error);
        ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
                                  elm->leaf.base.rec_type,
-                                 &ndata_offset, &data_buffer, &error);
+                                 &ndata_offset, &data_buffer,
+                                 0, &error);
        if (error)
                goto done;
 
@@ -414,8 +415,12 @@ hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
        hammer_node_t nnode;
        int error;
 
+       /*
+        * Don't supply a hint when allocating the leaf.  Fills are done
+        * from the leaf upwards.
+        */
        onode = cursor->node;
-       nnode = hammer_alloc_btree(cursor->trans, &error);
+       nnode = hammer_alloc_btree(cursor->trans, 0, &error);
 
        if (nnode == NULL)
                return (error);
@@ -482,6 +487,7 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
        struct hammer_node_lock lockroot;
        hammer_node_t onode;
        hammer_node_t nnode;
+       hammer_off_t hint;
        int error;
        int i;
 
@@ -490,8 +496,17 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
        if (error)
                goto done;
 
+       /*
+        * The internal node is visited after recursing through its
+        * first element.  Use the subtree offset allocated for that
+        * element as a hint for allocating the internal node.
+        */
        onode = cursor->node;
-       nnode = hammer_alloc_btree(cursor->trans, &error);
+       if (onode->ondisk->count)
+               hint = onode->ondisk->elms[0].internal.subtree_offset;
+       else
+               hint = 0;
+       nnode = hammer_alloc_btree(cursor->trans, hint, &error);
 
        if (nnode == NULL)
                goto done;