From: Matthew Dillon Date: Sat, 20 Jun 2009 21:49:12 +0000 (-0700) Subject: HAMMER VFS - Add hinting capability to block allocator, hint B-Tree X-Git-Tag: v2.3.2~128 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/df2ccbac56a7e11715c25f2c1617dafb6db040b1 HAMMER VFS - Add hinting capability to block allocator, hint B-Tree * A hammer_off_t can now be supplied to the blockmap allocator as a hint. * Use the hinting mechanism to better-localize B-Tree node allocations and meta-data updates. --- diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 40df11f131..3daea5b2e4 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -1050,10 +1050,12 @@ void hammer_flush_node(hammer_node_t node); void hammer_dup_buffer(struct hammer_buffer **bufferp, struct hammer_buffer *buffer); -hammer_node_t hammer_alloc_btree(hammer_transaction_t trans, int *errorp); +hammer_node_t hammer_alloc_btree(hammer_transaction_t trans, + hammer_off_t hint, int *errorp); void *hammer_alloc_data(hammer_transaction_t trans, int32_t data_len, u_int16_t rec_type, hammer_off_t *data_offsetp, - struct hammer_buffer **data_bufferp, int *errorp); + struct hammer_buffer **data_bufferp, + hammer_off_t hint, int *errorp); int hammer_generate_undo(hammer_transaction_t trans, hammer_io_t io, hammer_off_t zone1_offset, void *base, int len); @@ -1067,7 +1069,7 @@ void hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, hammer_off_t owner, int *errorp); int hammer_checkspace(hammer_mount_t hmp, int slop); hammer_off_t hammer_blockmap_alloc(hammer_transaction_t trans, int zone, - int bytes, int *errorp); + int bytes, hammer_off_t hint, int *errorp); hammer_reserve_t hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes, hammer_off_t *zone_offp, int *errorp); void hammer_blockmap_reserve_complete(hammer_mount_t hmp, diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c index 069fff6673..afac580d64 100644 --- a/sys/vfs/hammer/hammer_blockmap.c +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -65,8 +65,8 @@ hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2) * Allocate bytes from a zone */ hammer_off_t -hammer_blockmap_alloc(hammer_transaction_t trans, int zone, - int bytes, int *errorp) +hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes, + hammer_off_t hint, int *errorp) { hammer_mount_t hmp; hammer_volume_t root_volume; @@ -86,6 +86,7 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone, hammer_off_t base_off; int loops = 0; int offset; /* offset within big-block */ + int use_hint; hmp = trans->hmp; @@ -108,8 +109,26 @@ hammer_blockmap_alloc(hammer_transaction_t trans, int zone, freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone); - next_offset = blockmap->next_offset; + /* + * Use the hint if we have one. + */ + if (hint && HAMMER_ZONE_DECODE(hint) == zone) { + next_offset = (hint + 15) & ~(hammer_off_t)15; + use_hint = 1; + } else { + next_offset = blockmap->next_offset; + use_hint = 0; + } again: + + /* + * use_hint is turned off if we leave the hinted big-block. + */ + if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) { + next_offset = blockmap->next_offset; + use_hint = 0; + } + /* * Check for wrap */ @@ -207,6 +226,26 @@ again: goto again; } + /* + * If operating in the current non-hint blockmap block, do not + * allow it to get over-full. Also drop any active hinting so + * blockmap->next_offset is updated at the end. + * + * We do this for B-Tree and meta-data allocations to provide + * localization for updates. + */ + if ((zone == HAMMER_ZONE_BTREE_INDEX || + zone == HAMMER_ZONE_META_INDEX) && + offset >= HAMMER_LARGEBLOCK_OVERFILL && + !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64) + ) { + if (offset >= HAMMER_LARGEBLOCK_OVERFILL) { + next_offset += (HAMMER_LARGEBLOCK_SIZE - offset); + use_hint = 0; + goto again; + } + } + /* * We need the lock from this point on. We have to re-check zone * ownership after acquiring the lock and also check for reservations. @@ -310,11 +349,15 @@ again: result_offset = next_offset; /* - * Process allocated result_offset + * If we weren't supplied with a hint or could not use the hint + * then we wound up using blockmap->next_offset as the hint and + * need to save it. */ - hammer_modify_volume(NULL, root_volume, NULL, 0); - blockmap->next_offset = next_offset + bytes; - hammer_modify_volume_done(root_volume); + if (use_hint == 0) { + hammer_modify_volume(NULL, root_volume, NULL, 0); + blockmap->next_offset = next_offset + bytes; + hammer_modify_volume_done(root_volume); + } hammer_unlock(&hmp->blkmap_lock); failed: diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index d208df74b6..f3fe3ad45c 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -1425,6 +1425,7 @@ btree_split_internal(hammer_cursor_t cursor) hammer_btree_elm_t parent_elm; struct hammer_node_lock lockroot; hammer_mount_t hmp = cursor->trans->hmp; + hammer_off_t hint; int parent_index; int made_root; int split; @@ -1458,7 +1459,8 @@ btree_split_internal(hammer_cursor_t cursor) * modifications until we know the whole operation will work. */ if (ondisk->parent == 0) { - parent = hammer_alloc_btree(cursor->trans, &error); + parent = hammer_alloc_btree(cursor->trans, node->node_offset, + &error); if (parent == NULL) goto done; hammer_lock_ex(&parent->lock); @@ -1482,10 +1484,24 @@ btree_split_internal(hammer_cursor_t cursor) parent_index = cursor->parent_index; } + /* + * Calculate a hint for the allocation of the new B-Tree node. + * The most likely expansion is coming from the insertion point + * at cursor->index, so try to localize the allocation of our + * new node to accomodate that sub-tree. + * + * Use the right-most sub-tree when expandinging on the right edge. + * This is a very common case when copying a directory tree. + */ + if (cursor->index == ondisk->count) + hint = ondisk->elms[cursor->index - 1].internal.subtree_offset; + else + hint = ondisk->elms[cursor->index].internal.subtree_offset; + /* * Split node into new_node at the split point. * - * B O O O P N N B <-- P = node->elms[split] + * B O O O P N N B <-- P = node->elms[split] (index 4) * 0 1 2 3 4 5 6 <-- subtree indices * * x x P x x @@ -1493,9 +1509,8 @@ btree_split_internal(hammer_cursor_t cursor) * / \ * B O O O B B N N B <--- inner boundary points are 'P' * 0 1 2 3 4 5 6 - * */ - new_node = hammer_alloc_btree(cursor->trans, &error); + new_node = hammer_alloc_btree(cursor->trans, hint, &error); if (new_node == NULL) { if (made_root) { hammer_unlock(&parent->lock); @@ -1650,6 +1665,7 @@ btree_split_leaf(hammer_cursor_t cursor) hammer_btree_elm_t elm; hammer_btree_elm_t parent_elm; hammer_base_elm_t mid_boundary; + hammer_off_t hint; int parent_index; int made_root; int split; @@ -1694,7 +1710,8 @@ btree_split_leaf(hammer_cursor_t cursor) * until we know the whole operation will work. */ if (ondisk->parent == 0) { - parent = hammer_alloc_btree(cursor->trans, &error); + parent = hammer_alloc_btree(cursor->trans, leaf->node_offset, + &error); if (parent == NULL) goto done; hammer_lock_ex(&parent->lock); @@ -1718,6 +1735,25 @@ btree_split_leaf(hammer_cursor_t cursor) parent_index = cursor->parent_index; } + /* + * Calculate a hint for the allocation of the new B-Tree leaf node. + * For now just try to localize it within the same bigblock as + * the current leaf. + * + * If the insertion point is at the end of the leaf we recognize a + * likely append sequence of some sort (data, meta-data, inodes, + * whatever). Set the hint to zero to allocate out of linear space + * instead of trying to completely fill previously hinted space. + * + * This also sets the stage for recursive splits to localize using + * the new space. + */ + ondisk = leaf->ondisk; + if (cursor->index == ondisk->count) + hint = 0; + else + hint = leaf->node_offset; + /* * Split leaf into new_leaf at the split point. Select a separator * value in-between the two leafs but with a bent towards the right @@ -1730,7 +1766,7 @@ btree_split_leaf(hammer_cursor_t cursor) * / \ * L L L L L L L L */ - new_leaf = hammer_alloc_btree(cursor->trans, &error); + new_leaf = hammer_alloc_btree(cursor->trans, hint, &error); if (new_leaf == NULL) { if (made_root) { hammer_unlock(&parent->lock); diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 46758e82a9..8da1e1d9d5 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -197,8 +197,15 @@ typedef u_int32_t hammer_crc_t; * offset into a raw zone 2 offset. Each layer handles 18 bits. The 8M * large-block size is 23 bits so two layers gives us 23+18+18 = 59 bits * of address space. + * + * When using hinting for a blockmap lookup, the hint is lost when the + * scan leaves the HINTBLOCK, which is typically several LARGEBLOCK's. + * HINTBLOCK is a heuristic. */ +#define HAMMER_HINTBLOCK_SIZE (HAMMER_LARGEBLOCK_SIZE * 4) +#define HAMMER_HINTBLOCK_MASK64 ((u_int64_t)HAMMER_HINTBLOCK_SIZE - 1) #define HAMMER_LARGEBLOCK_SIZE (8192 * 1024) +#define HAMMER_LARGEBLOCK_OVERFILL (6144 * 1024) #define HAMMER_LARGEBLOCK_SIZE64 ((u_int64_t)HAMMER_LARGEBLOCK_SIZE) #define HAMMER_LARGEBLOCK_MASK (HAMMER_LARGEBLOCK_SIZE - 1) #define HAMMER_LARGEBLOCK_MASK64 ((u_int64_t)HAMMER_LARGEBLOCK_SIZE - 1) diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index 2267902fde..3c4b966690 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -805,7 +805,8 @@ hammer_mirror_write(hammer_cursor_t cursor, if (mrec->leaf.data_len && mrec->leaf.data_offset) { ndata = hammer_alloc_data(trans, mrec->leaf.data_len, mrec->leaf.base.rec_type, - &ndata_offset, &data_buffer, &error); + &ndata_offset, &data_buffer, + 0, &error); if (ndata == NULL) return(error); mrec->leaf.data_offset = ndata_offset; diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 1ec890a33d..5068cda545 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -1168,10 +1168,10 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) /* * We are inserting. * - * Issue a lookup to position the cursor and locate the cluster. The - * target key should not exist. If we are creating a directory entry - * we may have to iterate the low 32 bits of the key to find an unused - * key. + * Issue a lookup to position the cursor and locate the insertion + * point. The target key should not exist. If we are creating a + * directory entry we may have to iterate the low 32 bits of the + * key to find an unused key. */ hammer_sync_lock_sh(trans); cursor->flags |= HAMMER_CURSOR_INSERT; @@ -1219,7 +1219,8 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) bdata = hammer_alloc_data(trans, record->leaf.data_len, record->leaf.base.rec_type, &record->leaf.data_offset, - &cursor->data_buffer, &error); + &cursor->data_buffer, + 0, &error); if (bdata == NULL) goto done_unlock; hammer_crc_set_leaf(record->data, &record->leaf); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index cc1bf08874..ff24acfc65 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -1412,7 +1412,7 @@ hammer_flush_buffer_nodes(hammer_buffer_t buffer) * Allocate a B-Tree node. */ hammer_node_t -hammer_alloc_btree(hammer_transaction_t trans, int *errorp) +hammer_alloc_btree(hammer_transaction_t trans, hammer_off_t hint, int *errorp) { hammer_buffer_t buffer = NULL; hammer_node_t node = NULL; @@ -1420,7 +1420,7 @@ hammer_alloc_btree(hammer_transaction_t trans, int *errorp) node_offset = hammer_blockmap_alloc(trans, HAMMER_ZONE_BTREE_INDEX, sizeof(struct hammer_node_ondisk), - errorp); + hint, errorp); if (*errorp == 0) { node = hammer_get_node(trans, node_offset, 1, errorp); hammer_modify_node_noundo(trans, node); @@ -1445,7 +1445,8 @@ hammer_alloc_btree(hammer_transaction_t trans, int *errorp) void * hammer_alloc_data(hammer_transaction_t trans, int32_t data_len, u_int16_t rec_type, hammer_off_t *data_offsetp, - struct hammer_buffer **data_bufferp, int *errorp) + struct hammer_buffer **data_bufferp, + hammer_off_t hint, int *errorp) { void *data; int zone; @@ -1478,8 +1479,8 @@ hammer_alloc_data(hammer_transaction_t trans, int32_t data_len, zone = 0; /* NOT REACHED */ break; } - *data_offsetp = hammer_blockmap_alloc(trans, zone, - data_len, errorp); + *data_offsetp = hammer_blockmap_alloc(trans, zone, data_len, + hint, errorp); } else { *data_offsetp = 0; } diff --git a/sys/vfs/hammer/hammer_reblock.c b/sys/vfs/hammer/hammer_reblock.c index 73774f6c16..f2a0c3e77a 100644 --- a/sys/vfs/hammer/hammer_reblock.c +++ b/sys/vfs/hammer/hammer_reblock.c @@ -375,7 +375,8 @@ hammer_reblock_data(struct hammer_ioc_reblock *reblock, return (error); ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, elm->leaf.base.rec_type, - &ndata_offset, &data_buffer, &error); + &ndata_offset, &data_buffer, + 0, &error); if (error) goto done; @@ -414,8 +415,12 @@ hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock, hammer_node_t nnode; int error; + /* + * Don't supply a hint when allocating the leaf. Fills are done + * from the leaf upwards. + */ onode = cursor->node; - nnode = hammer_alloc_btree(cursor->trans, &error); + nnode = hammer_alloc_btree(cursor->trans, 0, &error); if (nnode == NULL) return (error); @@ -482,6 +487,7 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, struct hammer_node_lock lockroot; hammer_node_t onode; hammer_node_t nnode; + hammer_off_t hint; int error; int i; @@ -490,8 +496,17 @@ hammer_reblock_int_node(struct hammer_ioc_reblock *reblock, if (error) goto done; + /* + * The internal node is visited after recursing through its + * first element. Use the subtree offset allocated for that + * element as a hint for allocating the internal node. + */ onode = cursor->node; - nnode = hammer_alloc_btree(cursor->trans, &error); + if (onode->ondisk->count) + hint = onode->ondisk->elms[0].internal.subtree_offset; + else + hint = 0; + nnode = hammer_alloc_btree(cursor->trans, hint, &error); if (nnode == NULL) goto done;