From 40043e7f2eabeb460c6601a568bd78156264ad8d Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 10 Feb 2008 09:51:01 +0000 Subject: [PATCH] HAMMER 28/many: Implement zoned blockmap * Implement a zoned blockmap. Separate B-Tree nodes, records, small blocks of data, and large blocks of data into their own zones. Use 8MB large blocks, 32-byte blockmap entry structures, and two layers to support 59 bits (512 petabytes). * Create a temporary freeblock allocator so the blockmap can be tested. It just allocates sequentially and asserts when it hits the end of the volume. This will be replaced with a real freeblock allocator soon. * Clean up some of the mess I created from the temporary fifo mechanism that had been put in-place to test the major rewiring in 27. * Adjust newfs_hammer. The 'hammer' utility has not yet been adjusted (it can't decode blockmaps yet but will soon). --- sbin/hammer/hammer_util.h | 6 +- sbin/hammer/ondisk.c | 211 +++++++++++++---- sbin/newfs_hammer/newfs_hammer.c | 32 ++- sys/conf/files | 5 +- sys/vfs/hammer/Makefile | 6 +- sys/vfs/hammer/hammer.h | 33 +-- sys/vfs/hammer/hammer_blockmap.c | 226 ++++++++++++++++++ sys/vfs/hammer/hammer_btree.c | 55 +---- sys/vfs/hammer/hammer_btree.h | 12 +- sys/vfs/hammer/hammer_cursor.c | 15 +- sys/vfs/hammer/hammer_cursor.h | 8 +- sys/vfs/hammer/hammer_disk.h | 194 +++++++++++---- sys/vfs/hammer/hammer_freemap.c | 62 +++++ sys/vfs/hammer/hammer_inode.c | 6 +- sys/vfs/hammer/hammer_object.c | 154 +++++++----- sys/vfs/hammer/hammer_ondisk.c | 265 ++++++++++++++------- sys/vfs/hammer/hammer_spike.c | 392 ------------------------------- sys/vfs/hammer/hammer_vfsops.c | 13 +- sys/vfs/hammer/hammer_vnops.c | 38 +-- 19 files changed, 982 insertions(+), 751 deletions(-) create mode 100644 sys/vfs/hammer/hammer_blockmap.c create mode 100644 sys/vfs/hammer/hammer_freemap.c delete mode 100644 sys/vfs/hammer/hammer_spike.c diff --git a/sbin/hammer/hammer_util.h b/sbin/hammer/hammer_util.h index e343f1d8db..81b1c7184c 100644 --- a/sbin/hammer/hammer_util.h +++ b/sbin/hammer/hammer_util.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.8 2008/02/08 08:30:56 dillon Exp $ + * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.9 2008/02/10 09:50:55 dillon Exp $ */ #include @@ -105,15 +105,17 @@ struct volume_info *setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags); struct volume_info *get_volume(int32_t vol_no); struct buffer_info *get_buffer(hammer_off_t buf_offset, int isnew); +void *get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp, + int isnew); hammer_node_ondisk_t get_node(hammer_off_t node_offset, struct buffer_info **bufp); void rel_volume(struct volume_info *volume); void rel_buffer(struct buffer_info *buffer); +void format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off); void *alloc_btree_element(hammer_off_t *offp); hammer_record_ondisk_t alloc_record_element(hammer_off_t *offp, - u_int8_t rec_type, int32_t rec_len, int32_t data_len, void **datap); int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2); diff --git a/sbin/hammer/ondisk.c b/sbin/hammer/ondisk.c index a009b41fc2..1a87884ad3 100644 --- a/sbin/hammer/ondisk.c +++ b/sbin/hammer/ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/hammer/ondisk.c,v 1.10 2008/02/08 08:30:56 dillon Exp $ + * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $ */ #include @@ -45,10 +45,13 @@ #include #include "hammer_util.h" +static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, + struct buffer_info **bufferp); +static hammer_off_t alloc_bigblock(void); +#if 0 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type); static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, struct buffer_info **bufp, u_int16_t hdr_type); -#if 0 static void readhammerbuf(struct volume_info *vol, void *data, int64_t offset); #endif @@ -126,7 +129,7 @@ setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags) vol->vol_no = vol_no; if (isnew) { - init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL); + /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/ vol->cache.modified = 1; } @@ -242,6 +245,19 @@ rel_buffer(struct buffer_info *buffer) } } +void * +get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp, + int isnew) +{ + struct buffer_info *buffer; + + if (*bufferp) { + rel_buffer(*bufferp); + } + buffer = *bufferp = get_buffer(buf_offset, isnew); + return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK)); +} + /* * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying * bufp is freed if non-NULL and a referenced buffer is loaded into it. @@ -267,35 +283,38 @@ get_node(hammer_off_t node_offset, struct buffer_info **bufp) void * alloc_btree_element(hammer_off_t *offp) { - struct buffer_info *buf; - void *item; - - *offp = hammer_alloc_fifo(sizeof(struct hammer_node_ondisk), 0, - &buf, HAMMER_HEAD_TYPE_BTREE); - item = (char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK); - /* XXX buf not released, ptr remains valid */ - return(item); + struct buffer_info *buffer = NULL; + hammer_node_ondisk_t node; + + node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), + offp, &buffer); + bzero(node, sizeof(*node)); + /* XXX buffer not released, pointer remains valid */ + return(node); } hammer_record_ondisk_t -alloc_record_element(hammer_off_t *offp, u_int8_t rec_type, - int32_t rec_len, int32_t data_len, void **datap) +alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap) { - struct buffer_info *buf; + struct buffer_info *record_buffer = NULL; + struct buffer_info *data_buffer = NULL; hammer_record_ondisk_t rec; - int32_t aligned_rec_len; - aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) & - ~HAMMER_HEAD_ALIGN_MASK; + rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec), + offp, &record_buffer); + bzero(rec, sizeof(*rec)); - *offp = hammer_alloc_fifo(aligned_rec_len, data_len, &buf, - HAMMER_HEAD_TYPE_RECORD); - rec = (void *)((char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK)); - rec->base.base.rec_type = rec_type; - if (data_len) { - rec->base.data_off = *offp + aligned_rec_len; + if (data_len >= HAMMER_BUFSIZE) { + assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */ + *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len, + &rec->base.data_off, &data_buffer); rec->base.data_len = data_len; - *datap = (char *)rec + aligned_rec_len; + bzero(*datap, data_len); + } else if (data_len) { + *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len, + &rec->base.data_off, &data_buffer); + rec->base.data_len = data_len; + bzero(*datap, data_len); } else { *datap = NULL; } @@ -303,14 +322,117 @@ alloc_record_element(hammer_off_t *offp, u_int8_t rec_type, return(rec); } +/* + * Format a new blockmap + */ +void +format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off) +{ + blockmap->phys_offset = alloc_bigblock(); + blockmap->alloc_offset = zone_off; +} + +static +void * +alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, + struct buffer_info **bufferp) +{ + struct buffer_info *buffer; + struct volume_info *volume; + hammer_blockmap_entry_t rootmap; + hammer_blockmap_entry_t blockmap; + void *ptr; + int i; + + volume = get_volume(RootVolNo); + + rootmap = &volume->ondisk->vol0_blockmap[zone]; + + /* + * Alignment and buffer-boundary issues + */ + bytes = (bytes + 7) & ~7; + if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) & + ~HAMMER_BUFMASK64) { + volume->cache.modified = 1; + rootmap->phys_offset = (rootmap->phys_offset + bytes) & + ~HAMMER_BUFMASK64; + } + + /* + * Dive layer 2 + */ + i = (rootmap->alloc_offset >> (HAMMER_LARGEBLOCK_BITS + + HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = get_buffer_data(rootmap->phys_offset + i * sizeof(*blockmap), + bufferp, 0); + buffer = *bufferp; + if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) { + buffer->cache.modified = 1; + bzero(blockmap, sizeof(*blockmap)); + blockmap->phys_offset = alloc_bigblock(); + } + + /* + * Dive layer 1 + */ + i = (rootmap->alloc_offset >> HAMMER_LARGEBLOCK_BITS) & + HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = get_buffer_data( + blockmap->phys_offset + i * sizeof(*blockmap), bufferp, 0); + buffer = *bufferp; + + if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) { + buffer->cache.modified = 1; + bzero(blockmap, sizeof(*blockmap)); + blockmap->phys_offset = alloc_bigblock(); + blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE; + } + + buffer->cache.modified = 1; + volume->cache.modified = 1; + blockmap->bytes_free -= bytes; + *result_offp = rootmap->alloc_offset; + rootmap->alloc_offset += bytes; + + i = (rootmap->phys_offset >> HAMMER_BUFFER_BITS) & + HAMMER_BUFFERS_PER_LARGEBLOCK_MASK; + ptr = get_buffer_data( + blockmap->phys_offset + i * HAMMER_BUFSIZE + + ((int32_t)*result_offp & HAMMER_BUFMASK), bufferp, 0); + buffer->cache.modified = 1; + + rel_volume(volume); + return(ptr); +} + +static +hammer_off_t +alloc_bigblock(void) +{ + struct volume_info *volume; + hammer_off_t result_offset; + + volume = get_volume(RootVolNo); + result_offset = volume->ondisk->vol0_free_off; + volume->ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE; + if ((volume->ondisk->vol0_free_off & HAMMER_OFF_SHORT_MASK) > + (hammer_off_t)(volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)) { + panic("alloc_bigblock: Ran out of room, filesystem too small"); + } + rel_volume(volume); + return(result_offset); +} + +#if 0 /* * Reserve space from the FIFO. Make sure that bytes does not cross a * record boundary. * - * Initialize the fifo header, keep track of the previous entry's size - * so the reverse poitner can be initialized (using lastBlk), and also - * store a terminator (used by the recovery code) which will be overwritten - * by the next allocation. + * Zero out base_bytes and initialize the fifo head and tail. The + * data area is not zerod. */ static hammer_off_t @@ -320,12 +442,12 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, struct buffer_info *buf; struct volume_info *volume; hammer_fifo_head_t head; + hammer_fifo_tail_t tail; hammer_off_t off; int32_t aligned_bytes; - static u_int32_t lastBlk; - aligned_bytes = (base_bytes + ext_bytes + HAMMER_HEAD_ALIGN_MASK) & - ~HAMMER_HEAD_ALIGN_MASK; + aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE + + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK; volume = get_volume(RootVolNo); off = volume->ondisk->vol0_fifo_end; @@ -335,7 +457,7 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, * only newfs_hammer uses this function. */ assert((off & ~HAMMER_BUFMASK64) == - ((off + aligned_bytes + sizeof(*head)) & ~HAMMER_BUFMASK)); + ((off + aligned_bytes) & ~HAMMER_BUFMASK)); *bufp = buf = get_buffer(off, 0); @@ -345,27 +467,26 @@ hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK)); bzero(head, base_bytes); + head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = hdr_type; - head->hdr_rev_link = lastBlk; - head->hdr_fwd_link = aligned_bytes; + head->hdr_size = aligned_bytes; head->hdr_seq = volume->ondisk->vol0_next_seq++; - lastBlk = head->hdr_fwd_link; + + tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE); + tail->tail_signature = HAMMER_TAIL_SIGNATURE; + tail->tail_type = hdr_type; + tail->tail_size = aligned_bytes; volume->ondisk->vol0_fifo_end += aligned_bytes; volume->cache.modified = 1; - head = (void *)((char *)head + aligned_bytes); - head->hdr_signature = HAMMER_HEAD_SIGNATURE; - head->hdr_type = HAMMER_HEAD_TYPE_TERM; - head->hdr_rev_link = lastBlk; - head->hdr_fwd_link = 0; - head->hdr_crc = 0; - head->hdr_seq = volume->ondisk->vol0_next_seq; rel_volume(volume); return(off); } +#endif + /* * Flush various tracking structures to disk */ @@ -400,6 +521,7 @@ flush_buffer(struct buffer_info *buffer) buffer->cache.modified = 0; } +#if 0 /* * Generic buffer initialization */ @@ -408,12 +530,13 @@ init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type) { head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = hdr_type; - head->hdr_rev_link = 0; - head->hdr_fwd_link = 0; + head->hdr_size = 0; head->hdr_crc = 0; head->hdr_seq = 0; } +#endif + #if 0 /* * Core I/O operations diff --git a/sbin/newfs_hammer/newfs_hammer.c b/sbin/newfs_hammer/newfs_hammer.c index d85d5271ec..72a36b465f 100644 --- a/sbin/newfs_hammer/newfs_hammer.c +++ b/sbin/newfs_hammer/newfs_hammer.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.17 2008/02/08 08:30:58 dillon Exp $ + * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.18 2008/02/10 09:50:56 dillon Exp $ */ #include "newfs_hammer.h" @@ -57,6 +57,8 @@ main(int ac, char **av) * if it gets broken! */ assert(sizeof(struct hammer_volume_ondisk) <= HAMMER_BUFSIZE); + assert(sizeof(union hammer_record_ondisk) == HAMMER_RECORD_SIZE); + assert(sizeof(struct hammer_blockmap_entry) == 32); /* * Generate a filesysem id and lookup the filesystem type @@ -346,6 +348,11 @@ format_volume(struct volume_info *vol, int nvols, const char *label) vol->vol_alloc += BootAreaSize; ondisk->vol_mem_beg = vol->vol_alloc; vol->vol_alloc += MemAreaSize; + + /* + * The remaining area is the zone 2 buffer allocation area. These + * buffers + */ ondisk->vol_buf_beg = vol->vol_alloc; ondisk->vol_buf_end = vol->size & ~(int64_t)HAMMER_BUFMASK; @@ -369,10 +376,21 @@ format_volume(struct volume_info *vol, int nvols, const char *label) * in volume 0. hammer_off_t must be properly formatted * (see vfs/hammer/hammer_disk.h) */ - ondisk->vol0_fifo_beg = HAMMER_ENCODE_RAW_BUFFER(0, 0); - ondisk->vol0_fifo_end = ondisk->vol0_fifo_beg; + ondisk->vol0_free_off = HAMMER_ENCODE_RAW_BUFFER(0, 0); ondisk->vol0_next_tid = createtid(); ondisk->vol0_next_seq = 1; + format_blockmap( + &ondisk->vol0_blockmap[HAMMER_ZONE_BTREE_INDEX], + HAMMER_ZONE_BTREE); + format_blockmap( + &ondisk->vol0_blockmap[HAMMER_ZONE_RECORD_INDEX], + HAMMER_ZONE_RECORD); + format_blockmap( + &ondisk->vol0_blockmap[HAMMER_ZONE_LARGE_DATA_INDEX], + HAMMER_ZONE_LARGE_DATA); + format_blockmap( + &ondisk->vol0_blockmap[HAMMER_ZONE_SMALL_DATA_INDEX], + HAMMER_ZONE_SMALL_DATA); ondisk->vol0_btree_root = format_root(); ++ondisk->vol0_stat_inodes; /* root inode */ @@ -395,9 +413,7 @@ format_root(void) hammer_btree_elm_t elm; bnode = alloc_btree_element(&btree_off); - rec = alloc_record_element(&rec_off, HAMMER_RECTYPE_INODE, - sizeof(rec->inode), sizeof(*idata), - (void **)&idata); + rec = alloc_record_element(&rec_off, sizeof(*idata), (void **)&idata); /* * Populate the inode data and inode record for the root directory. @@ -414,7 +430,7 @@ format_root(void) rec->base.base.obj_type = HAMMER_OBJTYPE_DIRECTORY; /* rec->base.data_offset - initialized by alloc_record_element */ /* rec->base.data_len - initialized by alloc_record_element */ - rec->base.head.hdr_crc = crc32(idata, sizeof(*idata)); + rec->base.data_crc = crc32(idata, sizeof(*idata)); rec->inode.ino_atime = rec->base.base.create_tid; rec->inode.ino_mtime = rec->base.base.create_tid; rec->inode.ino_size = 0; @@ -432,7 +448,7 @@ format_root(void) elm->leaf.rec_offset = rec_off; elm->leaf.data_offset = rec->base.data_off; elm->leaf.data_len = rec->base.data_len; - elm->leaf.data_crc = rec->base.head.hdr_crc; + elm->leaf.data_crc = rec->base.data_crc; return(btree_off); } diff --git a/sys/conf/files b/sys/conf/files index 67871b704c..e18a2de34f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.203 2008/02/08 08:30:55 dillon Exp $ +# $DragonFly: src/sys/conf/files,v 1.204 2008/02/10 09:50:59 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -1139,9 +1139,10 @@ vfs/hammer/hammer_btree.c optional hammer vfs/hammer/hammer_io.c optional hammer vfs/hammer/hammer_transaction.c optional hammer vfs/hammer/hammer_object.c optional hammer -vfs/hammer/hammer_spike.c optional hammer vfs/hammer/hammer_recover.c optional hammer vfs/hammer/hammer_ioctl.c optional hammer +vfs/hammer/hammer_blockmap.c optional hammer +vfs/hammer/hammer_freemap.c optional hammer vm/default_pager.c standard vm/device_pager.c standard vm/phys_pager.c standard diff --git a/sys/vfs/hammer/Makefile b/sys/vfs/hammer/Makefile index af5b7188af..afcffb08ae 100644 --- a/sys/vfs/hammer/Makefile +++ b/sys/vfs/hammer/Makefile @@ -1,12 +1,12 @@ # -# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.7 2008/02/08 08:30:59 dillon Exp $ +# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.8 2008/02/10 09:51:01 dillon Exp $ KMOD= hammer SRCS= hammer_vfsops.c hammer_vnops.c hammer_inode.c \ hammer_subs.c hammer_ondisk.c hammer_io.c \ hammer_cursor.c hammer_btree.c hammer_transaction.c \ - hammer_object.c hammer_spike.c \ - hammer_recover.c hammer_ioctl.c + hammer_object.c hammer_recover.c hammer_ioctl.c \ + hammer_blockmap.c hammer_freemap.c NOMAN= diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 2ca209ec9b..811c210d6f 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.35 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.36 2008/02/10 09:51:01 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -217,7 +217,6 @@ struct hammer_record { union hammer_record_ondisk rec; union hammer_data_ondisk *data; int flags; - int rec_len; int blocked; }; @@ -226,7 +225,7 @@ typedef struct hammer_record *hammer_record_t; #define HAMMER_RECF_ALLOCDATA 0x0001 #define HAMMER_RECF_ONRBTREE 0x0002 #define HAMMER_RECF_DELETED 0x0004 -#define HAMMER_RECF_UNUSED0008 0x0008 +#define HAMMER_RECF_INBAND 0x0008 #define HAMMER_RECF_SYNCING 0x0010 #define HAMMER_RECF_WANTED 0x0020 @@ -287,7 +286,6 @@ typedef struct hammer_io *hammer_io_t; struct hammer_volume { struct hammer_io io; RB_ENTRY(hammer_volume) rb_node; - struct hammer_nod_rb_tree rb_nods_root; struct hammer_buf_rb_tree rb_bufs_root; struct hammer_volume_ondisk *ondisk; int32_t vol_no; @@ -335,8 +333,8 @@ struct hammer_node { TAILQ_ENTRY(hammer_node) entry; /* per-buffer linkage */ RB_ENTRY(hammer_node) rb_node; /* per-cluster linkage */ hammer_off_t node_offset; /* full offset spec */ + struct hammer_mount *hmp; struct hammer_buffer *buffer; /* backing buffer */ - struct hammer_volume *volume; /* backing volume */ hammer_node_ondisk_t ondisk; /* ptr to on-disk structure */ struct hammer_node **cache1; /* passive cache(s) */ struct hammer_node **cache2; @@ -383,6 +381,7 @@ struct hammer_mount { /*struct vnode *rootvp;*/ struct hammer_ino_rb_tree rb_inos_root; struct hammer_vol_rb_tree rb_vols_root; + struct hammer_nod_rb_tree rb_nods_root; struct hammer_volume *rootvol; struct hammer_base_elm root_btree_beg; struct hammer_base_elm root_btree_end; @@ -396,6 +395,7 @@ struct hammer_mount { hammer_tid_t asof; u_int32_t namekey_iterator; struct netexport export; + struct lock blockmap_lock; }; typedef struct hammer_mount *hammer_mount_t; @@ -445,6 +445,7 @@ int hammer_install_volume(hammer_mount_t hmp, const char *volname); int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip); int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip); int hammer_ip_next(hammer_cursor_t cursor); +int hammer_ip_resolve_record_and_data(hammer_cursor_t cursor); int hammer_ip_resolve_data(hammer_cursor_t cursor); int hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid); int hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes); @@ -455,7 +456,7 @@ int hammer_sync_volume(hammer_volume_t volume, void *data); int hammer_sync_buffer(hammer_buffer_t buffer, void *data); hammer_record_t - hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len); + hammer_alloc_mem_record(hammer_inode_t ip); void hammer_rel_mem_record(hammer_record_t record); int hammer_cursor_up(hammer_cursor_t cursor); @@ -550,20 +551,22 @@ void hammer_dup_buffer(struct hammer_buffer **bufferp, hammer_node_t hammer_alloc_btree(hammer_mount_t hmp, int *errorp); void *hammer_alloc_record(hammer_mount_t hmp, hammer_off_t *rec_offp, u_int8_t rec_type, - int32_t rec_len, struct hammer_buffer **rec_bufferp, - hammer_off_t *data_offp, int32_t data_len, - void **data1p, void **data2p, int32_t *data2_index, - struct hammer_buffer **data2_bufferp, - int *errorp); -void hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset); -void hammer_unwind_fifo(hammer_mount_t hmp, hammer_off_t rec_offset); -void hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type); + struct hammer_buffer **rec_bufferp, + int32_t data_len, void **datap, + struct hammer_buffer **data_bufferp, int *errorp); int hammer_generate_undo(hammer_mount_t hmp, hammer_off_t undo_offset, void *base, int len); void hammer_put_volume(struct hammer_volume *volume, int flush); void hammer_put_buffer(struct hammer_buffer *buffer, int flush); +hammer_off_t hammer_freemap_alloc(hammer_mount_t hmp, int *errorp); +hammer_off_t hammer_blockmap_alloc(hammer_mount_t hmp, int zone, + int bytes, int *errorp); +int hammer_blockmap_free(hammer_mount_t hmp, hammer_off_t bmap_off, int bytes); +hammer_off_t hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, + int *errorp); + void hammer_start_transaction(struct hammer_transaction *trans, struct hammer_mount *hmp); void hammer_start_transaction_tid(struct hammer_transaction *trans, @@ -595,8 +598,6 @@ int hammer_ip_sync_data(struct hammer_transaction *trans, hammer_inode_t ip, int64_t offset, void *data, int bytes); int hammer_ip_sync_record(hammer_record_t rec); -int hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t rec, - int32_t rec_len, void *data, int cursor_flags); int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, struct ucred *cred); diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c new file mode 100644 index 0000000000..517cb388db --- /dev/null +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.1 2008/02/10 09:51:01 dillon Exp $ + */ + +/* + * HAMMER blockmap + */ +#include "hammer.h" + +/* + * Allocate bytes from a zone + */ +hammer_off_t +hammer_blockmap_alloc(hammer_mount_t hmp, int zone, int bytes, int *errorp) +{ + hammer_volume_t root_volume; + hammer_blockmap_entry_t rootmap; + hammer_blockmap_entry_t blockmap; + hammer_buffer_t buffer = NULL; + hammer_off_t alloc_offset; + hammer_off_t result_offset; + int32_t i; + + KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); + root_volume = hammer_get_root_volume(hmp, errorp); + if (*errorp) + return(0); + rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + KKASSERT(rootmap->phys_offset != 0); + KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == + HAMMER_ZONE_RAW_BUFFER_INDEX); + KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone); + + /* + * Deal with alignment and buffer-boundary issues. + * + * Be careful, certain primary alignments are used below to allocate + * new blockmap blocks. + */ + bytes = (bytes + 7) & ~7; + KKASSERT(bytes <= HAMMER_BUFSIZE); + + lockmgr(&hmp->blockmap_lock, LK_EXCLUSIVE|LK_RETRY); + alloc_offset = rootmap->alloc_offset; + result_offset = alloc_offset + bytes; + if ((alloc_offset ^ (result_offset - 1)) & ~HAMMER_BUFMASK64) { + alloc_offset = (result_offset - 1) & ~HAMMER_BUFMASK64; + } + + /* + * Dive layer 2, each entry is a layer-1 entry. If we are at the + * start of a new entry, allocate a layer 1 large-block + */ + i = (alloc_offset >> (HAMMER_LARGEBLOCK_BITS + + HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = hammer_bread(hmp, rootmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer); + KKASSERT(*errorp == 0); + + if ((alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) { + hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap)); + bzero(blockmap, sizeof(*blockmap)); + blockmap->phys_offset = hammer_freemap_alloc(hmp, errorp); + KKASSERT(*errorp == 0); + kprintf("ALLOC LAYER2 %016llx\n", blockmap->phys_offset); + } +#if 0 + kprintf("blkmap_alloc %016llx [%2d@%016llx]", alloc_offset, i, blockmap->phys_offset); +#endif + KKASSERT(blockmap->phys_offset); + + /* + * Dive layer 1, each entry is a large-block. If we are at the + * start of a new entry, allocate a large-block. + */ + i = (alloc_offset >> HAMMER_LARGEBLOCK_BITS) & + HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = hammer_bread(hmp, blockmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer); + KKASSERT(*errorp == 0); + + if ((alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) { + hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap)); + /* XXX rootmap changed */ + bzero(blockmap, sizeof(*blockmap)); + blockmap->phys_offset = hammer_freemap_alloc(hmp, errorp); + blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE; + KKASSERT(*errorp == 0); + kprintf("ALLOC LAYER1 %016llx\n", blockmap->phys_offset); + } + + hammer_modify_buffer(buffer, blockmap, sizeof(*blockmap)); + blockmap->bytes_free -= bytes; +#if 0 + kprintf("[%2d@%016llx] free=%d phys %016llx\n", i, blockmap->phys_offset, blockmap->bytes_free, blockmap->phys_offset + (result_offset & HAMMER_LARGEBLOCK_MASK64)); +#endif + + hammer_modify_volume(root_volume, &rootmap->alloc_offset, + sizeof(rootmap->alloc_offset)); + result_offset = alloc_offset; + rootmap->alloc_offset = alloc_offset + bytes; + + /* + * Calling bnew on the buffer backing the allocation gets it into + * the system without a disk read. + * + * XXX This can only be done when appending into a new buffer. + */ + if (((int32_t)result_offset & HAMMER_BUFMASK) == 0) { + hammer_bnew(hmp, blockmap->phys_offset + (result_offset & HAMMER_LARGEBLOCK_MASK64), errorp, &buffer); + } + + if (buffer) + hammer_rel_buffer(buffer, 0); + hammer_rel_volume(root_volume, 0); + lockmgr(&hmp->blockmap_lock, LK_RELEASE); + return(result_offset); +} + +/* + * Free (offset,bytes) in a zone + */ +int +hammer_blockmap_free(hammer_mount_t hmp, hammer_off_t bmap_off, int bytes) +{ + kprintf("hammer_blockmap_free %016llx %d\n", bmap_off, bytes); + return(0); +} + +/* + * Lookup a blockmap offset. + */ +hammer_off_t +hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t bmap_off, int *errorp) +{ + hammer_volume_t root_volume; + hammer_blockmap_entry_t rootmap; + hammer_blockmap_entry_t blockmap; + hammer_buffer_t buffer = NULL; + hammer_off_t result_offset; + int zone; + int i; + + zone = HAMMER_ZONE_DECODE(bmap_off); + KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES); + root_volume = hammer_get_root_volume(hmp, errorp); + if (*errorp) + return(0); + rootmap = &root_volume->ondisk->vol0_blockmap[zone]; + KKASSERT(rootmap->phys_offset != 0); + KKASSERT(HAMMER_ZONE_DECODE(rootmap->phys_offset) == + HAMMER_ZONE_RAW_BUFFER_INDEX); + KKASSERT(HAMMER_ZONE_DECODE(rootmap->alloc_offset) == zone); + + if (bmap_off >= rootmap->alloc_offset) { + panic("hammer_blockmap_lookup: %016llx beyond EOF %016llx", + bmap_off, rootmap->alloc_offset); + result_offset = 0; + goto done; + } + + /* + * Dive layer 2, each entry is a layer-1 entry. If we are at the + * start of a new entry, allocate a layer 1 large-block + */ + i = (bmap_off >> (HAMMER_LARGEBLOCK_BITS + + HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = hammer_bread(hmp, rootmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer); + KKASSERT(*errorp == 0); + KKASSERT(blockmap->phys_offset); + + /* + * Dive layer 1, entry entry is a large-block. If we are at the + * start of a new entry, allocate a large-block. + */ + i = (bmap_off >> HAMMER_LARGEBLOCK_BITS) & HAMMER_BLOCKMAP_RADIX_MASK; + + blockmap = hammer_bread(hmp, blockmap->phys_offset + i * sizeof(*blockmap), errorp, &buffer); + KKASSERT(*errorp == 0); + KKASSERT(blockmap->phys_offset); + result_offset = blockmap->phys_offset + + (bmap_off & HAMMER_LARGEBLOCK_MASK64); +done: + if (buffer) + hammer_rel_buffer(buffer, 0); + hammer_rel_volume(root_volume, 0); + if (hammer_debug_general & 0x0800) { + kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n", + bmap_off, result_offset); + } + return(result_offset); +} + diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 812c04418f..20318b8a2c 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.29 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.30 2008/02/10 09:51:01 dillon Exp $ */ /* @@ -569,7 +569,6 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) hammer_btree_elm_t elm; hammer_off_t rec_off; hammer_off_t data_off; - hammer_off_t data_end; int error; /* @@ -578,10 +577,8 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) */ node = cursor->node->ondisk; elm = &node->elms[cursor->index]; - cursor->data1 = NULL; - cursor->data2 = NULL; - cursor->data_split = 0; - hmp = cursor->node->volume->hmp; + cursor->data = NULL; + hmp = cursor->node->hmp; flags |= cursor->flags & HAMMER_CURSOR_DATAEXTOK; /* @@ -597,7 +594,6 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) flags &= ~HAMMER_CURSOR_GET_DATA; data_off = elm->leaf.data_offset; - data_end = data_off + elm->leaf.data_len - 1; if (data_off == 0) flags &= ~HAMMER_CURSOR_GET_DATA; rec_off = elm->leaf.rec_offset; @@ -618,44 +614,17 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) { if ((rec_off ^ data_off) & ~HAMMER_BUFMASK64) { /* - * The data is not in the same buffer as the last - * record we cached, but it could still be embedded - * in a record. Note that we may not have loaded the - * record's buffer above, depending on flags. - * - * Assert that the data does not cross into additional - * buffers. + * Data and record are in different buffers. */ - cursor->data_split = 0; - cursor->data2 = hammer_bread(hmp, data_off, - &error, &cursor->data_buffer); - KKASSERT(((data_off ^ data_end) & - ~HAMMER_BUFMASK64) == 0); + cursor->data = hammer_bread(hmp, data_off, &error, + &cursor->data_buffer); } else { /* - * The data starts in same buffer as record. Check - * to determine if the data extends into another - * buffer. + * Data resides in same buffer as record. */ - cursor->data1 = (void *) + cursor->data = (void *) ((char *)cursor->record_buffer->ondisk + ((int32_t)data_off & HAMMER_BUFMASK)); - if ((data_off ^ data_end) & ~HAMMER_BUFMASK64) { - cursor->data_split = HAMMER_BUFSIZE - - ((int32_t)data_off & HAMMER_BUFMASK); - if (flags & HAMMER_CURSOR_DATAEXTOK) { - /* - * NOTE: Assumes data buffer does not - * cross a volume boundary. - */ - cursor->data2 = hammer_bread(hmp, data_off + cursor->data_split, - &error, &cursor->data_buffer); - } else { - panic("Illegal data extension"); - } - } else { - cursor->data_split = elm->leaf.data_len; - } } } return(error); @@ -1292,7 +1261,7 @@ btree_split_internal(hammer_cursor_t cursor) split = (ondisk->count + 1) / 2; if (cursor->index <= split) --split; - hmp = node->volume->hmp; + hmp = node->hmp; /* * If we are at the root of the filesystem, create a new root node @@ -1506,7 +1475,7 @@ btree_split_leaf(hammer_cursor_t cursor) if (cursor->index <= split) --split; error = 0; - hmp = leaf->volume->hmp; + hmp = leaf->hmp; elm = &ondisk->elms[split]; @@ -2044,7 +2013,7 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm) switch(elm->base.btype) { case HAMMER_BTREE_TYPE_INTERNAL: case HAMMER_BTREE_TYPE_LEAF: - child = hammer_get_node(node->volume->hmp, + child = hammer_get_node(node->hmp, elm->internal.subtree_offset, &error); if (error == 0) { hammer_modify_node(child); @@ -2090,7 +2059,7 @@ hammer_btree_lock_children(hammer_cursor_t cursor, switch(elm->base.btype) { case HAMMER_BTREE_TYPE_INTERNAL: case HAMMER_BTREE_TYPE_LEAF: - child = hammer_get_node(node->volume->hmp, + child = hammer_get_node(node->hmp, elm->internal.subtree_offset, &error); break; diff --git a/sys/vfs/hammer/hammer_btree.h b/sys/vfs/hammer/hammer_btree.h index 08b09de916..5dc0e91578 100644 --- a/sys/vfs/hammer/hammer_btree.h +++ b/sys/vfs/hammer/hammer_btree.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.11 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.12 2008/02/10 09:51:01 dillon Exp $ */ /* @@ -150,9 +150,9 @@ union hammer_btree_elm { typedef union hammer_btree_elm *hammer_btree_elm_t; /* - * B-Tree node (normal or meta) + * B-Tree node (normal or meta) (16x64 = 1K structure) * - * Each node contains 14 elements. The last element for an internal node + * Each node contains 15 elements. The last element for an internal node * is the right-boundary so internal nodes have one fewer logical elements * then leaf nodes. * @@ -169,7 +169,7 @@ typedef union hammer_btree_elm *hammer_btree_elm_t; * reserved for left/right leaf linkage fields, flags, and other future * features. */ -#define HAMMER_BTREE_LEAF_ELMS 14 +#define HAMMER_BTREE_LEAF_ELMS 15 #define HAMMER_BTREE_INT_ELMS (HAMMER_BTREE_LEAF_ELMS - 1) /* @@ -187,7 +187,8 @@ struct hammer_node_ondisk { /* * B-Tree node header (64 bytes) */ - struct hammer_fifo_head head; + u_int32_t signature; + u_int32_t crc; hammer_off_t parent; /* 0 if at root of cluster */ int32_t count; u_int8_t type; @@ -197,6 +198,7 @@ struct hammer_node_ondisk { hammer_off_t reserved04; /* future link_right */ hammer_off_t reserved05; hammer_off_t reserved06; + hammer_off_t reserved07; /* * Element array. Internal nodes have one less logical element diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index db3c6031c5..eddfde2284 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.17 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.18 2008/02/10 09:51:01 dillon Exp $ */ /* @@ -142,9 +142,7 @@ hammer_done_cursor(hammer_cursor_t cursor) cursor->deadlk_node = NULL; } - cursor->data1 = NULL; - cursor->data2 = NULL; - cursor->data_split = 0; + cursor->data = NULL; cursor->record = NULL; cursor->left_bound = NULL; cursor->right_bound = NULL; @@ -241,11 +239,11 @@ hammer_load_cursor_parent(hammer_cursor_t cursor) int error; int i; - hmp = cursor->node->volume->hmp; + hmp = cursor->node->hmp; if (cursor->node->ondisk->parent) { node = cursor->node; - parent = hammer_get_node(node->volume->hmp, + parent = hammer_get_node(node->hmp, node->ondisk->parent, &error); if (error) return(error); @@ -354,11 +352,10 @@ hammer_cursor_down(hammer_cursor_t cursor) KKASSERT(elm->internal.subtree_offset != 0); cursor->left_bound = &elm[0].internal.base; cursor->right_bound = &elm[1].internal.base; - node = hammer_get_node(node->volume->hmp, - elm->internal.subtree_offset, + node = hammer_get_node(node->hmp, elm->internal.subtree_offset, &error); if (error == 0) { - KKASSERT(elm->base.btype == node->ondisk->type); + KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node)); if (node->ondisk->parent != cursor->parent->node_offset) panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset); KKASSERT(node->ondisk->parent == cursor->parent->node_offset); diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 795d5d3a23..adedcb6ebb 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.12 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.13 2008/02/10 09:51:01 dillon Exp $ */ /* @@ -92,12 +92,10 @@ struct hammer_cursor { * can be NULL when data and/or record is not, typically indicating * information referenced via an in-memory record. */ - struct hammer_buffer *record_buffer; /* record+data */ + struct hammer_buffer *record_buffer; /* record (+ built-in data) */ struct hammer_buffer *data_buffer; /* extended data */ union hammer_record_ondisk *record; - union hammer_data_ondisk *data1; - union hammer_data_ondisk *data2; - int data_split; /* data split point if any */ + union hammer_data_ondisk *data; /* * Iteration and extraction control variables diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 3692ad93ef..d88a5566a6 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.22 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.23 2008/02/10 09:51:01 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -59,10 +59,19 @@ * I/O is done in multiples of 16K. Most buffer-sized headers such as those * used by volumes, super-clusters, clusters, and basic filesystem buffers * use fixed-sized A-lists which are heavily dependant on HAMMER_BUFSIZE. + * + * Per-volume storage limit: 52 bits 4096 TB + * Per-Zone storage limit: 59 bits 512 KTB (due to blockmap) + * Per-filesystem storage limit: 60 bits 1 MTB */ -#define HAMMER_BUFSIZE 16384 -#define HAMMER_BUFMASK (HAMMER_BUFSIZE - 1) -#define HAMMER_MAXDATA (256*1024) +#define HAMMER_BUFSIZE 16384 +#define HAMMER_BUFMASK (HAMMER_BUFSIZE - 1) +#define HAMMER_MAXDATA (256*1024) +#define HAMMER_BUFFER_BITS 14 + +#if (1 << HAMMER_BUFFER_BITS) != HAMMER_BUFSIZE +#error "HAMMER_BUFFER_BITS BROKEN" +#endif #define HAMMER_BUFSIZE64 ((u_int64_t)HAMMER_BUFSIZE) #define HAMMER_BUFMASK64 ((u_int64_t)HAMMER_BUFMASK) @@ -104,16 +113,45 @@ typedef u_int64_t hammer_off_t; * zone 0 (z,v,o): reserved (for sanity) * zone 1 (z,v,o): raw volume relative (offset 0 is the volume header) * zone 2 (z,v,o): raw buffer relative (offset 0 is the first buffer) - * zone 3-15 : reserved + * zone 3 (z,o): undo fifo - blockmap backed + * + * zone 8 (z,o): B-Tree - blkmap-backed + * zone 9 (z,o): Record - blkmap-backed + * zone 10 (z,o): Large-data - blkmap-backed */ #define HAMMER_ZONE_RAW_VOLUME 0x1000000000000000ULL #define HAMMER_ZONE_RAW_BUFFER 0x2000000000000000ULL +#define HAMMER_ZONE_UNDO 0x3000000000000000ULL +#define HAMMER_ZONE_RESERVED04 0x4000000000000000ULL +#define HAMMER_ZONE_RESERVED05 0x5000000000000000ULL +#define HAMMER_ZONE_RESERVED06 0x6000000000000000ULL +#define HAMMER_ZONE_RESERVED07 0x7000000000000000ULL +#define HAMMER_ZONE_BTREE 0x8000000000000000ULL +#define HAMMER_ZONE_RECORD 0x9000000000000000ULL +#define HAMMER_ZONE_LARGE_DATA 0xA000000000000000ULL +#define HAMMER_ZONE_SMALL_DATA 0xB000000000000000ULL +#define HAMMER_ZONE_RESERVED0C 0xC000000000000000ULL +#define HAMMER_ZONE_RESERVED0D 0xD000000000000000ULL +#define HAMMER_ZONE_RESERVED0E 0xE000000000000000ULL +#define HAMMER_ZONE_RESERVED0F 0xF000000000000000ULL + +#define HAMMER_ZONE_RAW_VOLUME_INDEX 1 +#define HAMMER_ZONE_RAW_BUFFER_INDEX 2 +#define HAMMER_ZONE_UNDO_INDEX 3 +#define HAMMER_ZONE_BTREE_INDEX 8 +#define HAMMER_ZONE_RECORD_INDEX 9 +#define HAMMER_ZONE_LARGE_DATA_INDEX 10 +#define HAMMER_ZONE_SMALL_DATA_INDEX 11 + +#define HAMMER_MAX_ZONES 16 #define HAMMER_VOL_ENCODE(vol_no) \ ((hammer_off_t)((vol_no) & 255) << 52) #define HAMMER_VOL_DECODE(ham_off) \ (int32_t)(((hammer_off_t)(ham_off) >> 52) & 255) +#define HAMMER_ZONE_DECODE(ham_off) \ + (int32_t)(((hammer_off_t)(ham_off) >> 60)) #define HAMMER_SHORT_OFF_ENCODE(offset) \ ((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK) #define HAMMER_LONG_OFF_ENCODE(offset) \ @@ -129,12 +167,64 @@ typedef u_int64_t hammer_off_t; HAMMER_VOL_ENCODE(vol_no) | \ HAMMER_SHORT_OFF_ENCODE(offset)) +/* + * Large-Block backing store + * + * A blockmap is a two-level map which translates a blockmap-backed zone + * offset into a raw zone 2 offset. Each layer handles 18 bits. The 8M + * large-block size is 23 bits so two layers gives us 23+18+18 = 59 bits + * of address space. + */ +#define HAMMER_LARGEBLOCK_SIZE (8192 * 1024) +#define HAMMER_LARGEBLOCK_MASK (HAMMER_LARGEBLOCK_SIZE - 1) +#define HAMMER_LARGEBLOCK_MASK64 ((u_int64_t)HAMMER_LARGEBLOCK_SIZE - 1) +#define HAMMER_LARGEBLOCK_BITS 23 +#if (1 << HAMMER_LARGEBLOCK_BITS) != HAMMER_LARGEBLOCK_SIZE +#error "HAMMER_LARGEBLOCK_BITS BROKEN" +#endif + +#define HAMMER_BUFFERS_PER_LARGEBLOCK \ + (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE) +#define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK \ + (HAMMER_BUFFERS_PER_LARGEBLOCK - 1) +#define HAMMER_BUFFERS_PER_LARGEBLOCK_MASK64 \ + ((hammer_off_t)HAMMER_BUFFERS_PER_LARGEBLOCK_MASK) + +#define HAMMER_BLOCKMAP_RADIX \ + (HAMMER_LARGEBLOCK_SIZE / sizeof(struct hammer_blockmap_entry)) +#define HAMMER_BLOCKMAP_RADIX_MASK \ + (HAMMER_BLOCKMAP_RADIX - 1) +#define HAMMER_BLOCKMAP_BITS 18 +#if (1 << HAMMER_BLOCKMAP_BITS) != (HAMMER_LARGEBLOCK_SIZE / 32) +#error "HAMMER_BLOCKMAP_BITS BROKEN" +#endif + +#define HAMMER_LARGEBLOCK_LAYER1 \ + ((hammer_off_t)HAMMER_LARGEBLOCK_SIZE * HAMMER_BLOCKMAP_RADIX) +#define HAMMER_LARGEBLOCK_LAYER2 \ + (HAMMER_LARGEBLOCK_LAYER1 * HAMMER_BLOCKMAP_RADIX) + +#define HAMMER_LARGEBLOCK_LAYER1_MASK (HAMMER_LARGEBLOCK_LAYER1 - 1) +#define HAMMER_LARGEBLOCK_LAYER2_MASK (HAMMER_LARGEBLOCK_LAYER2 - 1) + +struct hammer_blockmap_entry { + hammer_off_t phys_offset; /* zone-2 physical offset */ + int32_t bytes_free; /* bytes free within the big-block */ + u_int32_t entry_crc; + u_int32_t reserved01; + u_int32_t reserved02; + hammer_off_t alloc_offset; /* zone-X logical offset */ +}; + +typedef struct hammer_blockmap_entry *hammer_blockmap_entry_t; /* * All on-disk HAMMER structures which make up elements of the FIFO contain - * a hammer_fifo_head structure. This structure contains all the information - * required to validate the fifo element and to scan the fifo in either - * direction. + * a hammer_fifo_head and hammer_fifo_tail structure. This structure + * contains all the information required to validate the fifo element + * and to scan the fifo in either direction. The head is typically embedded + * in higher level hammer on-disk structures while the tail is typically + * out-of-band. hdr_size is the size of the whole mess, including the tail. * * Nearly all such structures are guaranteed to not cross a 16K filesystem * buffer boundary. The one exception is a record, whos related data may @@ -144,38 +234,49 @@ typedef u_int64_t hammer_off_t; * (i.e. the base of the buffer will not be in the middle of a data record). * This is used to allow the recovery code to re-sync after hitting corrupted * data. + * + * PAD elements are allowed to take up only 8 bytes of space as a special + * case, containing only hdr_signature, hdr_type, and hdr_size fields, + * and with the tail overloaded onto the head structure for 8 bytes total. */ -#define HAMMER_HEAD_ONDISK_SIZE 32 +#define HAMMER_HEAD_ONDISK_SIZE 24 #define HAMMER_HEAD_RECOVERY_ALIGNMENT (16 * 1024 * 1024) -#define HAMMER_HEAD_ALIGN 32 +#define HAMMER_HEAD_ALIGN 8 #define HAMMER_HEAD_ALIGN_MASK (HAMMER_HEAD_ALIGN - 1) +#define HAMMER_TAIL_ONDISK_SIZE 8 struct hammer_fifo_head { u_int16_t hdr_signature; u_int16_t hdr_type; - u_int32_t hdr_fwd_link; - u_int32_t hdr_rev_link; + u_int32_t hdr_size; /* aligned size of the whole mess */ u_int32_t hdr_crc; - hammer_tid_t hdr_seq; - hammer_tid_t hdr_tid; + u_int32_t hdr_reserved02; + hammer_tid_t hdr_seq; /* related sequence number */ +}; + +struct hammer_fifo_tail { + u_int16_t tail_signature; + u_int16_t tail_type; + u_int32_t tail_size; /* aligned size of the whole mess */ }; typedef struct hammer_fifo_head *hammer_fifo_head_t; +typedef struct hammer_fifo_tail *hammer_fifo_tail_t; /* * Fifo header types. */ -#define HAMMER_HEAD_TYPE_PAD 0xF000U /* FIFO pad (also FREED) */ -#define HAMMER_HEAD_TYPE_VOL 0x7001U /* Volume (dummy header) */ -#define HAMMER_HEAD_TYPE_BTREE 0x7002U /* B-Tree node */ -#define HAMMER_HEAD_TYPE_UNDO 0x7003U /* random UNDO information */ -#define HAMMER_HEAD_TYPE_DELETE 0x7004U /* record deletion */ -#define HAMMER_HEAD_TYPE_RECORD 0x7005U /* Filesystem record */ -#define HAMMER_HEAD_TYPE_TERM 0x7009U /* Dummy Terminator */ +#define HAMMER_HEAD_TYPE_PAD (0x0040U|HAMMER_HEAD_FLAG_FREE) +#define HAMMER_HEAD_TYPE_VOL 0x0041U /* Volume (dummy header) */ +#define HAMMER_HEAD_TYPE_BTREE 0x0042U /* B-Tree node */ +#define HAMMER_HEAD_TYPE_UNDO 0x0043U /* random UNDO information */ +#define HAMMER_HEAD_TYPE_DELETE 0x0044U /* record deletion */ +#define HAMMER_HEAD_TYPE_RECORD 0x0045U /* Filesystem record */ -#define HAMMER_HEAD_TYPEF_FREED 0x8000U /* Indicates object freed */ +#define HAMMER_HEAD_FLAG_FREE 0x8000U /* Indicates object freed */ #define HAMMER_HEAD_SIGNATURE 0xC84EU +#define HAMMER_TAIL_SIGNATURE 0xC74FU /* * Misc FIFO structures (except for the B-Tree node and hammer record) @@ -224,11 +325,6 @@ typedef struct hammer_fifo_undo *hammer_fifo_undo_t; * any records remaining in memory can be flushed to the memory log * area. This allows the kernel to immediately return success. */ -#define HAMMER_VOL_MAXCLUSTERS 32768 /* 1-layer */ -#define HAMMER_VOL_MAXSUPERCLUSTERS 4096 /* 2-layer */ -#define HAMMER_VOL_SUPERCLUSTER_GROUP 16 -#define HAMMER_VOL_METAELMS_1LYR HAMMER_ALIST_METAELMS_32K_1LYR -#define HAMMER_VOL_METAELMS_2LYR HAMMER_ALIST_METAELMS_16K_2LYR #define HAMMER_BOOT_MINBYTES (32*1024) #define HAMMER_BOOT_NOMBYTES (64LL*1024*1024) @@ -239,7 +335,8 @@ typedef struct hammer_fifo_undo *hammer_fifo_undo_t; #define HAMMER_MEM_MAXBYTES (64LL*1024*1024*1024) struct hammer_volume_ondisk { - struct hammer_fifo_head head; + u_int64_t vol_signature;/* Signature */ + int64_t vol_bot_beg; /* byte offset of boot area or 0 */ int64_t vol_mem_beg; /* byte offset of memory log or 0 */ int64_t vol_buf_beg; /* byte offset of first buffer in volume */ @@ -250,7 +347,6 @@ struct hammer_volume_ondisk { uuid_t vol_fstype; /* identify filesystem type */ char vol_name[64]; /* Name of volume */ - u_int64_t vol_signature;/* Signature #2 */ int32_t vol_no; /* volume number within filesystem */ int32_t vol_count; /* number of volumes making up FS */ @@ -259,7 +355,7 @@ struct hammer_volume_ondisk { u_int32_t vol_flags; /* volume flags */ u_int32_t vol_rootvol; /* which volume is the root volume? */ - int32_t vol_reserved04; /* cluster size (same for all volumes) */ + int32_t vol_reserved04; int32_t vol_reserved05; u_int32_t vol_reserved06; u_int32_t vol_reserved07; @@ -268,6 +364,14 @@ struct hammer_volume_ondisk { int32_t vol_reserved08; int64_t vol_nblocks; /* total allocatable hammer bufs */ + /* + * bigblock freemap. + * + * XXX not implemented yet, just use a sequential index at + * the moment. + */ + hammer_off_t vol0_free_off; + /* * These fields are initialized and space is reserved in every * volume making up a HAMMER filesytem, but only the master volume @@ -276,11 +380,15 @@ struct hammer_volume_ondisk { int64_t vol0_stat_bytes; /* for statfs only */ int64_t vol0_stat_inodes; /* for statfs only */ int64_t vol0_stat_records; /* total records in filesystem */ - hammer_off_t vol0_fifo_beg; /* CIRCULAR FIFO START */ - hammer_off_t vol0_fifo_end; /* CIRCULAR FIFO END */ hammer_off_t vol0_btree_root; /* B-Tree root */ hammer_tid_t vol0_next_tid; /* highest synchronized TID */ - hammer_tid_t vol0_next_seq; /* next SEQ no */ + hammer_tid_t vol0_next_seq; /* next SEQ no for undo */ + + /* + * Blockmaps for zones. Not all zones use a blockmap. + */ + struct hammer_blockmap_entry vol0_blockmap[HAMMER_MAX_ZONES]; + }; typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t; @@ -289,20 +397,20 @@ typedef struct hammer_volume_ondisk *hammer_volume_ondisk_t; #define HAMMER_VOLF_OPEN 0x0002 /* volume is open */ /* - * All HAMMER records have a common 72-byte base and a variable-length - * extension, plus a possible data reference. The data portion of the - * HAMMER record can cross a filesystem buffer boundary (but not the primary - * record portion). - * - * Current only relative in-band data offsets are supported, but the field - * is large enough for future out-of-band references. + * All HAMMER records have a common 64-byte base and a 32 byte extension, + * plus a possible data reference. The data reference can be in-band or + * out-of-band. */ + +#define HAMMER_RECORD_SIZE (64+32) + struct hammer_base_record { - struct hammer_fifo_head head; /* 16 byte fifo header */ + u_int32_t signature; /* record signature */ + u_int32_t data_crc; /* data crc */ struct hammer_base_elm base; /* 40 byte base element */ hammer_off_t data_off; /* in-band or out-of-band */ int32_t data_len; /* size of data in bytes */ - u_int32_t reserved03; + u_int32_t reserved02; }; /* @@ -407,6 +515,7 @@ struct hammer_inode_record { */ struct hammer_data_record { struct hammer_base_record base; + char data[32]; }; /* @@ -431,6 +540,7 @@ struct hammer_entry_record { struct hammer_base_record base; u_int64_t obj_id; /* object being referenced */ u_int64_t reserved01; + char name[16]; }; /* diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c new file mode 100644 index 0000000000..7560d0fd58 --- /dev/null +++ b/sys/vfs/hammer/hammer_freemap.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2008 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.1 2008/02/10 09:51:01 dillon Exp $ + */ + +/* + * HAMMER freemap - bigblock allocator + */ +#include "hammer.h" + +hammer_off_t +hammer_freemap_alloc(hammer_mount_t hmp, int *errorp) +{ + hammer_volume_t root_volume; + hammer_volume_ondisk_t ondisk; + hammer_off_t raw_offset; + + root_volume = hammer_get_root_volume(hmp, errorp); + if (*errorp) + return(0); + ondisk = root_volume->ondisk; + + hammer_modify_volume(root_volume, &ondisk->vol0_free_off, + sizeof(ondisk->vol0_free_off)); + raw_offset = ondisk->vol0_free_off; + ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE; + KKASSERT(ondisk->vol0_free_off <= root_volume->maxbuf_off); + hammer_rel_volume(root_volume, 0); + return(raw_offset); +} + diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 458904d564..8989564fd3 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.29 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.30 2008/02/10 09:51:01 dillon Exp $ */ #include "hammer.h" @@ -237,7 +237,7 @@ retry: */ if (*errorp == 0) { ip->ino_rec = cursor.record->inode; - ip->ino_data = cursor.data1->inode; + ip->ino_data = cursor.data->inode; hammer_cache_node(cursor.node, &ip->cache[0]); if (cache) hammer_cache_node(cursor.node, cache); @@ -417,7 +417,7 @@ retry: * will remain set and prevent further updates. */ if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { - record = hammer_alloc_mem_record(ip, sizeof(struct hammer_inode_record)); + record = hammer_alloc_mem_record(ip); record->rec.inode = ip->ino_rec; record->rec.inode.base.base.create_tid = last_tid; record->rec.inode.base.data_len = sizeof(ip->ino_data); diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 34e14e59b0..1bdc0b3830 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.29 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.30 2008/02/10 09:51:01 dillon Exp $ */ #include "hammer.h" @@ -133,7 +133,7 @@ RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, * returned referenced. */ hammer_record_t -hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len) +hammer_alloc_mem_record(hammer_inode_t ip) { hammer_record_t record; @@ -141,7 +141,6 @@ hammer_alloc_mem_record(hammer_inode_t ip, int32_t rec_len) record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); record->ip = ip; record->rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD; - record->rec_len = rec_len; hammer_ref(&record->lock); return (record); } @@ -352,7 +351,7 @@ hammer_ip_add_directory(struct hammer_transaction *trans, int error; int bytes; - record = hammer_alloc_mem_record(dip, sizeof(struct hammer_entry_record)); + record = hammer_alloc_mem_record(dip); bytes = ncp->nc_nlen; /* NOTE: terminating \0 is NOT included */ if (++trans->hmp->namekey_iterator == 0) @@ -456,9 +455,7 @@ hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, hammer_record_ondisk_t rec; union hammer_btree_elm elm; hammer_off_t rec_offset; - hammer_off_t data_offset; - void *bdata1, *bdata2; - int32_t data2_index; + void *bdata; int error; KKASSERT((offset & HAMMER_BUFMASK) == 0); @@ -494,12 +491,13 @@ retry: * can cross buffer boundaries so we may have to split our bcopy. */ rec = hammer_alloc_record(ip->hmp, &rec_offset, HAMMER_RECTYPE_DATA, - sizeof(rec->data), &cursor.record_buffer, - &data_offset, bytes, - &bdata1, &bdata2, &data2_index, + &cursor.record_buffer, + bytes, &bdata, &cursor.data_buffer, &error); if (rec == NULL) goto done; + if (hammer_debug_general & 0x1000) + kprintf("OOB RECOR2 DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, rec->base.data_len); /* * Fill everything in and insert our B-Tree node. @@ -514,22 +512,16 @@ retry: rec->base.base.create_tid = trans->tid; rec->base.base.delete_tid = 0; rec->base.base.rec_type = HAMMER_RECTYPE_DATA; - rec->base.head.hdr_crc = crc32(data, bytes); - KKASSERT(rec->base.data_off == data_offset); + rec->base.data_crc = crc32(data, bytes); KKASSERT(rec->base.data_len == bytes); - if (data2_index < bytes) { - bcopy(data, bdata1, data2_index); - bcopy((char *)data + data2_index, bdata2, bytes - data2_index); - } else { - bcopy(data, bdata1, bytes); - } + bcopy(data, bdata, bytes); elm.leaf.base = rec->base.base; elm.leaf.rec_offset = rec_offset; elm.leaf.data_offset = rec->base.data_off; elm.leaf.data_len = bytes; - elm.leaf.data_crc = rec->base.head.hdr_crc; + elm.leaf.data_crc = rec->base.data_crc; /* * Data records can wind up on-disk before the inode itself is @@ -542,11 +534,7 @@ retry: if (error == 0) goto done; - /* - * If we fail we may be able to unwind the allocation. - */ - rec->base.head.hdr_type |= HAMMER_HEAD_TYPEF_FREED; - hammer_unwind_fifo(ip->hmp, rec_offset); + hammer_blockmap_free(ip->hmp, rec_offset, HAMMER_RECORD_SIZE); done: hammer_done_cursor(&cursor); if (error == EDEADLK) @@ -555,7 +543,7 @@ done: } /* - * Sync an in-memory record to the disk. this is typically called via fsync + * Sync an in-memory record to the disk. This is typically called via fsync * from a cached record source. This code is responsible for actually * writing a record out to the disk. */ @@ -567,9 +555,7 @@ hammer_ip_sync_record(hammer_record_t record) hammer_mount_t hmp; union hammer_btree_elm elm; hammer_off_t rec_offset; - hammer_off_t data_offset; - void *bdata1; - int32_t alloc_data_len; + void *bdata; int error; hmp = record->ip->hmp; @@ -653,19 +639,33 @@ retry: * marked as being modified and further calls to * hammer_modify_buffer() will result in unneeded UNDO records. * - * Support zero-fill records. + * Support zero-fill records (data == NULL and data_len != 0) */ - if (record->data == NULL) - alloc_data_len = 0; - else - alloc_data_len = record->rec.base.data_len; - - rec = hammer_alloc_record(hmp, &rec_offset, - record->rec.base.base.rec_type, - record->rec_len, &cursor.record_buffer, - &data_offset, alloc_data_len, - &bdata1, NULL, NULL, - NULL, &error); + if (record->data == NULL) { + rec = hammer_alloc_record(hmp, &rec_offset, + record->rec.base.base.rec_type, + &cursor.record_buffer, + 0, &bdata, + NULL, &error); + if (hammer_debug_general & 0x1000) + kprintf("NULL RECORD DATA\n"); + } else if (record->flags & HAMMER_RECF_INBAND) { + rec = hammer_alloc_record(hmp, &rec_offset, + record->rec.base.base.rec_type, + &cursor.record_buffer, + record->rec.base.data_len, &bdata, + NULL, &error); + if (hammer_debug_general & 0x1000) + kprintf("INBAND RECORD DATA %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len); + } else { + rec = hammer_alloc_record(hmp, &rec_offset, + record->rec.base.base.rec_type, + &cursor.record_buffer, + record->rec.base.data_len, &bdata, + &cursor.data_buffer, &error); + if (hammer_debug_general & 0x1000) + kprintf("OOB RECORD DATA REC %016llx DATA %016llx LEN=%d\n", rec_offset, rec->base.data_off, record->rec.base.data_len); + } if (rec == NULL) goto done; @@ -674,27 +674,24 @@ retry: * Fill in the remaining fields and insert our B-Tree node. */ rec->base.base = record->rec.base.base; - if (record->rec_len > sizeof(rec->base)) { - bcopy(&record->rec.base + 1, &rec->base + 1, - record->rec_len - sizeof(rec->base)); - } + bcopy(&record->rec.base + 1, &rec->base + 1, + HAMMER_RECORD_SIZE - sizeof(record->rec.base)); /* * Copy the data and deal with zero-fill support. */ if (record->data) { - rec->base.head.hdr_crc = crc32(record->data, alloc_data_len); - KKASSERT(alloc_data_len == rec->base.data_len); - bcopy(record->data, bdata1, alloc_data_len); + rec->base.data_crc = crc32(record->data, rec->base.data_len); + bcopy(record->data, bdata, rec->base.data_len); } else { rec->base.data_len = record->rec.base.data_len; } elm.leaf.base = record->rec.base.base; elm.leaf.rec_offset = rec_offset; - elm.leaf.data_offset = data_offset; + elm.leaf.data_offset = rec->base.data_off; elm.leaf.data_len = rec->base.data_len; - elm.leaf.data_crc = rec->base.head.hdr_crc; + elm.leaf.data_crc = rec->base.data_crc; error = hammer_btree_insert(&cursor, &elm); @@ -709,8 +706,7 @@ retry: /* * Try to unwind the fifo allocation */ - rec->base.head.hdr_type |= HAMMER_HEAD_TYPEF_FREED; - hammer_unwind_fifo(hmp, rec_offset); + hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE); done: record->flags &= ~HAMMER_RECF_SYNCING; hammer_done_cursor(&cursor); @@ -735,8 +731,9 @@ static int hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) { - int bytes; void *data; + int bytes; + int reclen; /* * Make a private copy of record->data @@ -747,11 +744,22 @@ hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) * union, otherwise allocate a copy. */ bytes = record->rec.base.data_len; - if (bytes <= (int)sizeof(record->rec) - record->rec_len) { - bcopy(record->data, - (char *)&record->rec + record->rec_len, bytes); - record->data = (void *)((char *)&record->rec + - record->rec_len); + switch(record->rec.base.base.rec_type) { + case HAMMER_RECTYPE_DIRENTRY: + reclen = offsetof(struct hammer_entry_record, name[0]); + break; + case HAMMER_RECTYPE_DATA: + reclen = offsetof(struct hammer_data_record, data[0]); + break; + default: + reclen = sizeof(record->rec); + break; + } + if (reclen + bytes <= HAMMER_RECORD_SIZE) { + bcopy(record->data, (char *)&record->rec + reclen, + bytes); + record->data = (void *)((char *)&record->rec + reclen); + record->flags |= HAMMER_RECF_INBAND; } else { ++hammer_count_record_datas; data = kmalloc(bytes, M_HAMMER, M_WAITOK); @@ -1019,7 +1027,7 @@ hammer_ip_next(hammer_cursor_t cursor) } /* - * Resolve the cursor->data1/2 pointer for the current cursor position in + * Resolve the cursor->data pointer for the current cursor position in * a merged iteration. */ int @@ -1028,9 +1036,7 @@ hammer_ip_resolve_data(hammer_cursor_t cursor) int error; if (cursor->iprec && cursor->record == &cursor->iprec->rec) { - cursor->data1 = cursor->iprec->data; - cursor->data2 = NULL; - cursor->data_split = cursor->iprec->rec.base.data_len; + cursor->data = cursor->iprec->data; error = 0; } else { error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA); @@ -1038,6 +1044,21 @@ hammer_ip_resolve_data(hammer_cursor_t cursor) return(error); } +int +hammer_ip_resolve_record_and_data(hammer_cursor_t cursor) +{ + int error; + + if (cursor->iprec && cursor->record == &cursor->iprec->rec) { + cursor->data = cursor->iprec->data; + error = 0; + } else { + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | + HAMMER_CURSOR_GET_RECORD); + } + return(error); +} + /* * Delete all records within the specified range for inode ip. * @@ -1260,7 +1281,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid) */ error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); elm = NULL; - hmp = cursor->node->volume->hmp; + hmp = cursor->node->hmp; dodelete = 0; if (error == 0) { @@ -1320,7 +1341,14 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int64_t *stat_bytes) cursor->flags |= HAMMER_CURSOR_DELBTREE; cursor->flags &= ~HAMMER_CURSOR_ATEDISK; } - hammer_free_fifo(cursor->node->volume->hmp, rec_offset); + } + if (error == 0) { + hammer_blockmap_free(cursor->node->hmp, rec_offset, + sizeof(union hammer_record_ondisk)); + } + if (error == 0 && + (data_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_LARGE_DATA) { + hammer_blockmap_free(cursor->node->hmp, data_offset, data_len); } #if 0 kprintf("hammer_delete_at_cursor: %d:%d:%08x %08x/%d " diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 336739ddd4..1022e34950 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.28 2008/02/08 08:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.29 2008/02/10 09:51:01 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -49,6 +49,7 @@ static void hammer_free_volume(hammer_volume_t volume); static int hammer_load_volume(hammer_volume_t volume); static int hammer_load_buffer(hammer_buffer_t buffer, int isnew); static int hammer_load_node(hammer_node_t node); +#if 0 static hammer_off_t hammer_advance_fifo(hammer_volume_t volume, hammer_off_t off, int32_t bytes); @@ -56,6 +57,7 @@ static hammer_off_t hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, struct hammer_buffer **rec_bufferp, u_int16_t hdr_type, int can_cross, struct hammer_buffer **data2_bufferp, int *errorp); +#endif /* * Red-Black tree support for various structures @@ -487,8 +489,8 @@ hammer_rel_volume(hammer_volume_t volume, int flush) * BUFFERS * ************************************************************************ * - * Manage buffers. Note that a buffer holds a reference to its associated - * cluster, and its cluster will hold a reference to the cluster's volume. + * Manage buffers. Currently all blockmap-backed zones are translated + * to zone-2 buffer offsets. */ hammer_buffer_t hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset, @@ -497,13 +499,21 @@ hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset, hammer_buffer_t buffer; hammer_volume_t volume; int vol_no; + int zone; + zone = HAMMER_ZONE_DECODE(buf_offset); + if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) { + buf_offset = hammer_blockmap_lookup(hmp, buf_offset, errorp); + KKASSERT(*errorp == 0); + } buf_offset &= ~HAMMER_BUFMASK64; - KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER); + KKASSERT((buf_offset & HAMMER_ZONE_RAW_BUFFER) == + HAMMER_ZONE_RAW_BUFFER); vol_no = HAMMER_VOL_DECODE(buf_offset); volume = hammer_get_volume(hmp, vol_no, errorp); if (volume == NULL) return(NULL); + /* * NOTE: buf_offset and maxbuf_off are both full offset * specifications. @@ -777,30 +787,21 @@ hammer_bnew(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp, hammer_node_t hammer_get_node(hammer_mount_t hmp, hammer_off_t node_offset, int *errorp) { - hammer_volume_t volume; hammer_node_t node; - int32_t vol_no; - KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == - HAMMER_ZONE_RAW_BUFFER); - vol_no = HAMMER_VOL_DECODE(node_offset); - volume = hammer_get_volume(hmp, vol_no, errorp); - if (volume == NULL) - return(NULL); + KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_BTREE); /* * Locate the structure, allocating one if necessary. */ again: - node = RB_LOOKUP(hammer_nod_rb_tree, &volume->rb_nods_root, - node_offset); + node = RB_LOOKUP(hammer_nod_rb_tree, &hmp->rb_nods_root, node_offset); if (node == NULL) { ++hammer_count_nodes; node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO); node->node_offset = node_offset; - node->volume = volume; /* not directly referenced */ - if (RB_INSERT(hammer_nod_rb_tree, &volume->rb_nods_root, - node)) { + node->hmp = hmp; + if (RB_INSERT(hammer_nod_rb_tree, &hmp->rb_nods_root, node)) { --hammer_count_nodes; kfree(node, M_HAMMER); goto again; @@ -812,7 +813,6 @@ again: hammer_rel_node(node); node = NULL; } - hammer_rel_volume(volume, 0); return(node); } @@ -854,7 +854,7 @@ hammer_load_node(hammer_node_t node) if ((buffer = node->buffer) != NULL) { error = hammer_ref_buffer(buffer); } else { - buffer = hammer_get_buffer(node->volume->hmp, + buffer = hammer_get_buffer(node->hmp, node->node_offset, 0, &error); if (buffer) { @@ -943,8 +943,10 @@ hammer_rel_node(hammer_node_t node) * it as being free. Note that the disk space is physically * freed when the fifo cycles back through the node. */ - if (node->flags & HAMMER_NODE_DELETED) - hammer_free_fifo(node->volume->hmp, node->node_offset); + if (node->flags & HAMMER_NODE_DELETED) { + hammer_blockmap_free(node->hmp, node->node_offset, + sizeof(*node->ondisk)); + } /* * Destroy the node. Record pertainant data because the node @@ -1031,8 +1033,7 @@ hammer_flush_node(hammer_node_t node) if (node->cache2) *node->cache2 = NULL; if (node->lock.refs == 0 && node->ondisk == NULL) { - RB_REMOVE(hammer_nod_rb_tree, &node->volume->rb_nods_root, - node); + RB_REMOVE(hammer_nod_rb_tree, &node->hmp->rb_nods_root, node); if ((buffer = node->buffer) != NULL) { node->buffer = NULL; TAILQ_REMOVE(&buffer->clist, node, entry); @@ -1076,12 +1077,14 @@ hammer_alloc_btree(hammer_mount_t hmp, int *errorp) hammer_node_t node = NULL; hammer_off_t node_offset; - node_offset = hammer_alloc_fifo(hmp, sizeof(struct hammer_node_ondisk), - 0, &buffer, HAMMER_HEAD_TYPE_BTREE, - 0, NULL, - errorp); - if (*errorp == 0) + node_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_BTREE_INDEX, + sizeof(struct hammer_node_ondisk), + errorp); + if (*errorp == 0) { node = hammer_get_node(hmp, node_offset, errorp); + hammer_modify_node(node); + bzero(node->ondisk, sizeof(*node->ondisk)); + } if (buffer) hammer_rel_buffer(buffer, 0); return(node); @@ -1091,60 +1094,102 @@ hammer_alloc_btree(hammer_mount_t hmp, int *errorp) * The returned buffers are already appropriately marked as being modified. * If the caller marks them again unnecessary undo records may be generated. * - * The core record (rec_len) cannot cross a buffer boundary. The record + data - * is only allowed to cross a buffer boundary for HAMMER_RECTYPE_DATA + * In-band data is indicated by data_bufferp == NULL. Pass a data_len of 0 + * for zero-fill (caller modifies data_len afterwords). */ void * hammer_alloc_record(hammer_mount_t hmp, - hammer_off_t *rec_offp, u_int8_t rec_type, - int32_t rec_len, struct hammer_buffer **rec_bufferp, - hammer_off_t *data_offp, int32_t data_len, - void **data1p, void **data2p, int32_t *data2_index, - struct hammer_buffer **data2_bufferp, - int *errorp) + hammer_off_t *rec_offp, u_int8_t rec_type, + struct hammer_buffer **rec_bufferp, + int32_t data_len, void **datap, + struct hammer_buffer **data_bufferp, int *errorp) { - int32_t aligned_rec_len, n; - hammer_off_t rec_offset; hammer_record_ondisk_t rec; - int can_cross; + hammer_off_t rec_offset; + hammer_off_t data_offset; + int32_t reclen; - aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) & - ~HAMMER_HEAD_ALIGN_MASK; - can_cross = (rec_type == HAMMER_RECTYPE_DATA); + if (datap) + *datap = NULL; - rec_offset = hammer_alloc_fifo(hmp, aligned_rec_len, data_len, - rec_bufferp, HAMMER_HEAD_TYPE_RECORD, - can_cross, data2_bufferp, errorp); + /* + * Allocate the record + */ + rec_offset = hammer_blockmap_alloc(hmp, HAMMER_ZONE_RECORD_INDEX, + HAMMER_RECORD_SIZE, errorp); if (*errorp) return(NULL); + /* + * Allocate data + */ + if (data_len) { + if (data_bufferp == NULL) { + switch(rec_type) { + case HAMMER_RECTYPE_DATA: + reclen = offsetof(struct hammer_data_record, + data[0]); + break; + case HAMMER_RECTYPE_DIRENTRY: + reclen = offsetof(struct hammer_entry_record, + name[0]); + break; + default: + panic("hammer_alloc_record: illegal " + "in-band data"); + /* NOT REACHED */ + reclen = 0; + break; + } + KKASSERT(reclen + data_len <= HAMMER_RECORD_SIZE); + data_offset = rec_offset + reclen; + } else if (data_len < HAMMER_BUFSIZE) { + data_offset = hammer_blockmap_alloc(hmp, + HAMMER_ZONE_SMALL_DATA_INDEX, + data_len, errorp); + } else { + data_offset = hammer_blockmap_alloc(hmp, + HAMMER_ZONE_LARGE_DATA_INDEX, + data_len, errorp); + } + } else { + data_offset = 0; + } + if (*errorp) { + hammer_blockmap_free(hmp, rec_offset, HAMMER_RECORD_SIZE); + return(NULL); + } + /* * Basic return values. */ *rec_offp = rec_offset; - if (data_offp) - *data_offp = rec_offset + aligned_rec_len; - rec = (void *)((char *)(*rec_bufferp)->ondisk + - ((int32_t)rec_offset & HAMMER_BUFMASK)); - if (data_len) - rec->base.data_off = rec_offset + aligned_rec_len; + rec = hammer_bread(hmp, rec_offset, errorp, rec_bufferp); + KKASSERT(*errorp == 0); + rec->base.data_off = data_offset; rec->base.data_len = data_len; - if (data1p) - *data1p = (void *)((char *)rec + aligned_rec_len); - if (data2_index) { - n = ((int32_t)rec_offset & HAMMER_BUFMASK) + - aligned_rec_len + data_len; - if (n > HAMMER_BUFSIZE) { - *data2_index = data_len - (n - HAMMER_BUFSIZE); - KKASSERT(can_cross != 0); - *data2p = (*data2_bufferp)->ondisk; + hammer_modify_buffer(*rec_bufferp, NULL, 0); + + if (data_bufferp) { + if (data_len) { + *datap = hammer_bread(hmp, data_offset, errorp, + data_bufferp); + KKASSERT(*errorp == 0); + hammer_modify_buffer(*data_bufferp, NULL, 0); } else { - *data2_index = data_len; - *data2p = NULL; + *datap = NULL; + } + } else if (data_len) { + KKASSERT(data_offset + data_len - rec_offset <= + HAMMER_RECORD_SIZE); + if (datap) { + *datap = (void *)((char *)rec + + (int32_t)(data_offset - rec_offset)); } } else { - KKASSERT(data2p == NULL); + KKASSERT(datap == NULL); } + KKASSERT(*errorp == 0); return(rec); } @@ -1156,6 +1201,8 @@ hammer_alloc_record(hammer_mount_t hmp, int hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len) { + return(0); +#if 0 hammer_off_t rec_offset; hammer_fifo_undo_t undo; hammer_buffer_t buffer = NULL; @@ -1173,8 +1220,11 @@ hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len) if (buffer) hammer_rel_buffer(buffer, 0); return(error); +#endif } +#if 0 + /* * Allocate space from the FIFO. The first rec_len bytes will be zero'd. * The entire space is marked modified (the caller should not remark it as @@ -1191,6 +1241,7 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, hammer_volume_t end_volume; hammer_volume_ondisk_t ondisk; hammer_fifo_head_t head; + hammer_fifo_tail_t tail; hammer_off_t end_off = 0; hammer_off_t tmp_off = 0; int32_t end_vol_no; @@ -1199,12 +1250,14 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, int32_t aligned_bytes; int must_pad; - aligned_bytes = (rec_len + data_len + HAMMER_HEAD_ALIGN_MASK) & - ~HAMMER_HEAD_ALIGN_MASK; + aligned_bytes = (rec_len + data_len + HAMMER_TAIL_ONDISK_SIZE + + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK; root_volume = hammer_get_root_volume(hmp, errorp); - while (root_volume) { + if (root_volume) hammer_modify_volume(root_volume, NULL, 0); + + while (root_volume) { ondisk = root_volume->ondisk; end_off = ondisk->vol0_fifo_end; @@ -1278,6 +1331,10 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, * The entire record cannot cross a buffer boundary if * can_cross is 0. * + * The entire record cannot cover more then two whole buffers + * regardless. Even if the data portion is 16K, this case + * can occur due to the addition of the fifo_tail. + * * It is illegal for a record to cross a volume boundary. * * It is illegal for a record to cross a recovery boundary @@ -1302,36 +1359,79 @@ hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, HAMMER_OFF_SHORT_REC_MASK) { must_pad = 1; } + if (xoff + aligned_bytes - HAMMER_BUFSIZE > + HAMMER_BUFSIZE) { + KKASSERT(xoff != 0); + must_pad = 1; + } } + + /* + * Pad to end of the buffer if necessary. PADs can be + * squeezed into as little as 8 bytes (hence our alignment + * requirement). The crc, reserved, and sequence number + * fields are not used, but initialize them anyway if there + * is enough room. + */ if (must_pad) { - must_pad = HAMMER_BUFSIZE - xoff; + xoff = HAMMER_BUFSIZE - xoff; head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = HAMMER_HEAD_TYPE_PAD; - head->hdr_fwd_link = must_pad; - head->hdr_seq = 0; /* XXX seq */ - KKASSERT((must_pad & 7) == 0); + head->hdr_size = xoff; + if (xoff >= HAMMER_HEAD_ONDISK_SIZE + + HAMMER_TAIL_ONDISK_SIZE) { + head->hdr_crc = 0; + head->hdr_reserved02 = 0; + head->hdr_seq = 0; + } + + tail = (void *)((char *)head + xoff - + HAMMER_TAIL_ONDISK_SIZE); + if ((void *)head != (void *)tail) { + tail->tail_signature = HAMMER_TAIL_SIGNATURE; + tail->tail_type = HAMMER_HEAD_TYPE_PAD; + tail->tail_size = xoff; + } + KKASSERT((xoff & HAMMER_HEAD_ALIGN_MASK) == 0); ondisk->vol0_fifo_end = hammer_advance_fifo((*rec_bufferp)->volume, - end_off, must_pad); - /* XXX rev_link */ + end_off, xoff); continue; } if (xoff + aligned_bytes > HAMMER_BUFSIZE) { - KKASSERT(xoff + aligned_bytes <= HAMMER_BUFSIZE * 2); - hammer_bnew(hmp, end_off + (HAMMER_BUFSIZE - xoff), - errorp, data2_bufferp); + xoff = xoff + aligned_bytes - HAMMER_BUFSIZE; + + KKASSERT(xoff <= HAMMER_BUFSIZE); + tail = hammer_bnew(hmp, end_off + aligned_bytes - + HAMMER_TAIL_ONDISK_SIZE, + errorp, data2_bufferp); hammer_modify_buffer(*data2_bufferp, NULL, 0); if (*errorp) goto done; + + /* + * Retry if someone else appended to the fifo while + * we were blocked. + */ + if (ondisk->vol0_fifo_end != end_off) + continue; + } else { + tail = (void *)((char *)head + aligned_bytes - + HAMMER_TAIL_ONDISK_SIZE); } + bzero(head, rec_len); head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = hdr_type; - head->hdr_fwd_link = aligned_bytes / 64; - head->hdr_rev_link = -1; /* XXX */ + head->hdr_size = aligned_bytes; head->hdr_crc = 0; - head->hdr_seq = 0; /* XXX */ + head->hdr_seq = root_volume->ondisk->vol0_next_seq++; + + tail->tail_signature = HAMMER_TAIL_SIGNATURE; + tail->tail_type = hdr_type; + tail->tail_size = aligned_bytes; + ondisk->vol0_fifo_end = hammer_advance_fifo((*rec_bufferp)->volume, end_off, aligned_bytes); @@ -1358,7 +1458,7 @@ hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset) if (head) { hammer_modify_buffer(buffer, &head->hdr_type, sizeof(head->hdr_type)); - head->hdr_type |= HAMMER_HEAD_TYPEF_FREED; + head->hdr_type |= HAMMER_HEAD_FLAG_FREE; } if (buffer) hammer_rel_buffer(buffer, 0); @@ -1394,6 +1494,7 @@ hammer_advance_fifo(hammer_volume_t volume, hammer_off_t off, int32_t bytes) } return(off); } +#endif /* * Sync dirty buffers to the media @@ -1471,6 +1572,7 @@ hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused) return(0); } +#if 0 /* * Generic buffer initialization. Initialize the A-list into an all-allocated * state with the free block limit properly set. @@ -1483,9 +1585,10 @@ hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type) { head->hdr_signature = HAMMER_HEAD_SIGNATURE; head->hdr_type = type; - head->hdr_rev_link = 0; - head->hdr_fwd_link = 0; + head->hdr_size = 0; head->hdr_crc = 0; head->hdr_seq = 0; } +#endif + diff --git a/sys/vfs/hammer/hammer_spike.c b/sys/vfs/hammer/hammer_spike.c deleted file mode 100644 index 8b9a52d3f0..0000000000 --- a/sys/vfs/hammer/hammer_spike.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright (c) 2007 The DragonFly Project. All rights reserved. - * - * This code is derived from software contributed to The DragonFly Project - * by Matthew Dillon - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of The DragonFly Project nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific, prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.15 2008/02/08 08:31:00 dillon Exp $ - */ - -#include "hammer.h" - -#if 0 - -/* - * Load spike info given a cursor. The cursor must point to the leaf node - * that needs to be spiked after a failed insertion. - */ -void -hammer_load_spike(hammer_cursor_t cursor, struct hammer_cursor **spikep) -{ - hammer_cursor_t spike; - - KKASSERT(cursor->node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); - KKASSERT(*spikep == NULL); - *spikep = spike = kmalloc(sizeof(*spike), M_HAMMER, M_WAITOK|M_ZERO); - ++hammer_count_spikes; - - spike->parent = cursor->parent; - spike->parent_index = cursor->parent_index; - spike->node = cursor->node; - spike->index = cursor->index; - spike->left_bound = cursor->left_bound; - spike->right_bound = cursor->right_bound; - spike->key_beg = cursor->key_beg; - - if (spike->parent) { - hammer_ref_node(spike->parent); - hammer_lock_sh(&spike->parent->lock); - } - hammer_ref_node(spike->node); - hammer_lock_sh(&spike->node->lock); - if (hammer_debug_general & 0x40) - kprintf("LOAD SPIKE %p\n", spike); -} - -/* - * Spike code - make room in a cluster by spiking in a new cluster. - * - * The spike structure contains a locked and reference B-Tree leaf node. - * The spike at a minimum must move the contents of the leaf into a - * new cluster and replace the leaf with two elements representing the - * SPIKE_BEG and SPIKE_END. - * - * Various optimizations are desireable, including merging the spike node - * with an adjacent node that has already been spiked, if its cluster is - * not full, or promoting the spike node to the parent cluster of the current - * cluster when it represents the right hand boundary leaf node in the - * cluster (to avoid append chains). - */ -int -hammer_spike(struct hammer_cursor **spikep) -{ - hammer_cursor_t spike; - struct hammer_cursor ncursor; - hammer_cluster_t ocluster; - hammer_cluster_t ncluster; - hammer_node_ondisk_t ondisk; - hammer_btree_elm_t elm; - hammer_node_t onode; - hammer_record_ondisk_t rec; - hammer_node_locklist_t locklist = NULL; - int error; - int b, e; - const int esize = sizeof(*elm); - - if (hammer_debug_general & 0x40) - kprintf("hammer_spike: ENOSPC in cluster, spiking\n"); - /*Debugger("ENOSPC");*/ - - /* - * Validate and lock the spike. If this fails due to a deadlock - * we still return 0 since a spike is only called when the - * caller intends to retry the operation. - */ - spike = *spikep; - KKASSERT(spike != NULL); - KKASSERT(spike->parent && - spike->parent->cluster == spike->node->cluster); - KKASSERT(spike->node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); - - error = hammer_cursor_upgrade(spike); - if (error) { - error = 0; - goto failed4; - } - - /* - * Our leaf may contain spikes. We have to lock the root node - * in each target cluster. - */ - error = hammer_btree_lock_children(spike, &locklist); - if (error) { - error = 0; - goto failed4; - } - - onode = spike->node; - ocluster = onode->cluster; - ondisk = onode->ondisk; - hammer_lock_ex(&ocluster->io.lock); - - /* - * Calculate the range of elements in the leaf that we will push - * down into our spike. For the moment push them all down. - */ - b = 0; - e = ondisk->count; - - /* - * Use left-bound for spike if b == 0, else use the base element - * for the item to the left and adjust it past one unit. - */ - if (b == 0) { - spike->key_beg = *spike->left_bound; - } else { - spike->key_beg = ondisk->elms[b-1].leaf.base; - if (spike->key_beg.create_tid != 0) { - ++spike->key_beg.create_tid; - } else if (spike->key_beg.key != HAMMER_MAX_KEY) { - ++spike->key_beg.key; - spike->key_beg.create_tid = 1; - } else if (spike->key_beg.rec_type != HAMMER_MAX_RECTYPE) { - ++spike->key_beg.rec_type; - spike->key_beg.key = HAMMER_MIN_KEY; - spike->key_beg.create_tid = 1; - } else if (spike->key_beg.obj_id != HAMMER_MAX_OBJID) { - ++spike->key_beg.obj_id; - spike->key_beg.key = HAMMER_MIN_KEY; - spike->key_beg.create_tid = 1; - spike->key_beg.rec_type = HAMMER_MIN_RECTYPE; - } else { - panic("hammer_spike: illegal key"); - } - KKASSERT(hammer_btree_cmp(&ondisk->elms[b-1].base, &spike->key_beg) < 0); - } - - /* - * Use the right-bound if e is terminal, otherwise use the element - * at [e]. key_end is exclusive for the call to hammer_init_cluster() - * and is then made inclusive later to construct the SPIKE_END - * element. - */ - if (e == ondisk->count) - spike->key_end = *spike->right_bound; - else - spike->key_end = ondisk->elms[e].leaf.base; - - /* - * Heuristic: Attempt to size the spike range according to - * expected traffic. This is primarily responsible for the - * initial layout of the filesystem. - */ - if (e && b != e) { - int32_t clsize = ocluster->volume->ondisk->vol_clsize; - int64_t delta = 1000000000; - int64_t dkey; - - elm = &ondisk->elms[e-1]; - if (elm->base.obj_id == spike->key_end.obj_id && - elm->base.rec_type == spike->key_end.rec_type) { - /* - * NOTE: dkey can overflow. - */ - dkey = elm->base.key + clsize; - if (dkey > elm->base.key && dkey < spike->key_end.key) - spike->key_end.key = elm->base.key + clsize; - } else if (elm->base.obj_id + delta < spike->key_end.obj_id) { - spike->key_end.obj_id = elm->base.obj_id + delta; - } - } - - /* - * Allocate and lock a new cluster, initialize its bounds. - */ - ncluster = hammer_alloc_cluster(ocluster->volume->hmp, ocluster, - &error); - if (ncluster == NULL) - goto failed3; - hammer_init_cluster(ncluster, &spike->key_beg, &spike->key_end); - - /* - * Get a cursor for the new cluster. Operations will be limited to - * this cluster. Set HAMMER_CURSOR_RECOVER to force internal - * boundary elements in a way that allows us to copy spikes. - */ - error = hammer_init_cursor_cluster(&ncursor, ncluster); - if (error) - goto failed1; - ncursor.flags |= HAMMER_CURSOR_INSERT | HAMMER_CURSOR_RECOVER; - - /* - * Copy the elements in the leaf node to the new target cluster. - */ - for (spike->index = b; spike->index < e; ++spike->index) { - elm = &onode->ondisk->elms[spike->index]; - - if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END) - continue; - error = hammer_btree_extract(spike, - HAMMER_CURSOR_GET_RECORD | - HAMMER_CURSOR_GET_DATA); - if (error == 0) { - ncursor.key_beg = elm->leaf.base; - error = hammer_write_record(&ncursor, spike->record, - spike->data, spike->flags); - } - - KKASSERT(error != EDEADLK); - if (error == ENOSPC) { - kprintf("impossible ENOSPC error on spike\n"); - error = EIO; - } - if (error) - goto failed1; - } - - /* - * Delete the records and data associated with the old leaf node, - * replacing them with the spike elements. - * - * XXX I/O ordering issue, we're destroying these records too - * early, but we need one for the spike allocation. What to do? - */ - for (spike->index = b; spike->index < e; ++spike->index) { - int32_t roff; - u_int8_t rec_type; - - elm = &onode->ondisk->elms[spike->index]; - if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG) - continue; - KKASSERT(elm->leaf.rec_offset > 0); - if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_RECORD) - rec_type = elm->leaf.base.rec_type; - else - rec_type = HAMMER_RECTYPE_CLUSTER; - hammer_free_record(ocluster, elm->leaf.rec_offset, rec_type); - if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_RECORD && - elm->leaf.data_offset) { - roff = elm->leaf.data_offset - elm->leaf.rec_offset; - if (roff < 0 || roff >= HAMMER_RECORD_SIZE) { - hammer_free_data(ocluster, - elm->leaf.data_offset, - elm->leaf.data_len); - } - } - } - - /* - * Add a record representing the spike using space freed up by the - * above deletions. - */ - rec = hammer_alloc_record(ocluster, &error, - HAMMER_RECTYPE_CLUSTER, - &spike->record_buffer); - KKASSERT(error == 0); - rec->spike.base.base.btype = HAMMER_BTREE_TYPE_RECORD; - rec->spike.base.base.rec_type = HAMMER_RECTYPE_CLUSTER; - rec->spike.base.rec_id = hammer_alloc_recid(ocluster); - rec->spike.clu_no = ncluster->clu_no; - rec->spike.vol_no = ncluster->volume->vol_no; - rec->spike.clu_id = 0; - - /* - * Construct the spike elements. Note that the right boundary - * is range-exclusive whereas the SPIKE_END must be range-inclusive. - */ - hammer_modify_node(onode); - ondisk = onode->ondisk; - elm = &ondisk->elms[b]; - - if (e - b != 2) - bcopy(&elm[e - b], &elm[2], (ondisk->count - e) * esize); - ondisk->count = ondisk->count - (e - b) + 2; - - elm[0].leaf.base = spike->key_beg; - elm[0].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_BEG; - elm[0].leaf.rec_offset = hammer_bclu_offset(spike->record_buffer, rec); - elm[0].leaf.spike_clu_no = ncluster->clu_no; - elm[0].leaf.spike_vol_no = ncluster->volume->vol_no; - elm[0].leaf.spike_unused01 = 0; - - elm[1].leaf.base = spike->key_end; - elm[1].leaf.base.btype = HAMMER_BTREE_TYPE_SPIKE_END; - elm[1].leaf.rec_offset = elm[0].leaf.rec_offset; - elm[1].leaf.spike_clu_no = ncluster->clu_no; - elm[1].leaf.spike_vol_no = ncluster->volume->vol_no; - elm[1].leaf.spike_unused01 = 0; - - /* - * Make the SPIKE_END element inclusive. - */ - if (elm[1].leaf.base.create_tid != 1) { - --elm[1].leaf.base.create_tid; - } else if (elm[0].leaf.base.key != HAMMER_MIN_KEY) { - --elm[0].leaf.base.key; - elm[0].leaf.base.create_tid = 0; /* max value */ - } else if (elm[0].leaf.base.rec_type != HAMMER_MIN_RECTYPE) { - --elm[0].leaf.base.rec_type; - elm[0].leaf.base.key = HAMMER_MAX_KEY; - elm[0].leaf.base.create_tid = 0; /* max value */ - } else if (elm[0].leaf.base.obj_id != HAMMER_MIN_OBJID) { - --elm[0].leaf.base.obj_id; - elm[0].leaf.base.rec_type = HAMMER_MAX_RECTYPE; - elm[0].leaf.base.key = HAMMER_MAX_KEY; - elm[0].leaf.base.create_tid = 0; /* max value */ - } else { - panic("hammer_spike: illegal key"); - } - - /* - * Adjust ncluster - */ - { - hammer_cluster_ondisk_t ondisk; - - hammer_modify_cluster(ncluster); - ondisk = ncluster->ondisk; - ondisk->clu_btree_parent_vol_no = ocluster->volume->vol_no; - ondisk->clu_btree_parent_clu_no = ocluster->clu_no; - ondisk->clu_btree_parent_offset = onode->node_offset; - ondisk->clu_btree_parent_clu_gen = ocluster->ondisk->clu_gen; - } - - /* - * XXX I/O dependancy - new cluster must be flushed before current - * cluster can be flushed. - */ - /*Debugger("COPY COMPLETE");*/ - hammer_done_cursor(&ncursor); - goto success; - - /* - * Cleanup - */ -failed1: - hammer_done_cursor(&ncursor); - hammer_free_cluster(ncluster); -success: - hammer_unlock(&ncluster->io.lock); - hammer_rel_cluster(ncluster, 0); -failed3: - if (hammer_debug_general & 0x40) - kprintf("UNLOAD SPIKE %p %d\n", spike, error); - hammer_unlock(&ocluster->io.lock); -failed4: - hammer_btree_unlock_children(&locklist); - hammer_done_cursor(spike); - --hammer_count_spikes; - kfree(spike, M_HAMMER); - *spikep = NULL; - return (error); -} - - -#endif diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 9ade65ae93..674a7bfe90 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.18 2008/02/08 08:31:00 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.19 2008/02/10 09:51:01 dillon Exp $ */ #include @@ -175,6 +175,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, hmp->root_btree_end.delete_tid = 0; /* special case */ hmp->root_btree_end.rec_type = 0xFFFFU; hmp->root_btree_end.obj_type = 0; + lockinit(&hmp->blockmap_lock, "blkmap", 0, 0); } hmp->hflags = info.hflags; if (info.asof) { @@ -200,6 +201,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, RB_INIT(&hmp->rb_vols_root); RB_INIT(&hmp->rb_inos_root); + RB_INIT(&hmp->rb_nods_root); hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); /* @@ -349,6 +351,7 @@ hammer_free_hmp(struct mount *mp) mp->mnt_flag &= ~MNT_LOCAL; hmp->mp = NULL; kfree(hmp->zbuf, M_HAMMER); + lockuninit(&hmp->blockmap_lock); kfree(hmp, M_HAMMER); } @@ -404,9 +407,6 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) hammer_volume_ondisk_t ondisk; int error; int64_t bfree; - int32_t vol_no; - hammer_off_t fifo_beg; - hammer_off_t fifo_end; volume = hammer_get_root_volume(hmp, &error); if (error) @@ -417,9 +417,11 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) * Basic stats */ mp->mnt_stat.f_files = ondisk->vol0_stat_inodes; + bfree = 0; + hammer_rel_volume(volume, 0); +#if 0 fifo_beg = ondisk->vol0_fifo_beg; fifo_end = ondisk->vol0_fifo_end; - hammer_rel_volume(volume, 0); /* * Calculate how many free blocks we have by counting the @@ -442,6 +444,7 @@ hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) fifo_end = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0); hammer_rel_volume(volume, 0); } +#endif mp->mnt_stat.f_bfree = bfree / HAMMER_BUFSIZE; mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree; if (mp->mnt_stat.f_files < 0) diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 4d76beb100..50365fecb4 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.29 2008/02/08 08:31:00 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.30 2008/02/10 09:51:01 dillon Exp $ */ #include @@ -666,7 +666,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) break; rec = cursor.record; if (nlen == rec->entry.base.data_len && - bcmp(ncp->nc_name, cursor.data1, nlen) == 0) { + bcmp(ncp->nc_name, cursor.data, nlen) == 0) { obj_id = rec->entry.obj_id; break; } @@ -1047,7 +1047,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap) error = hammer_ip_first(&cursor, ip); while (error == 0) { - error = hammer_ip_resolve_data(&cursor); + error = hammer_ip_resolve_record_and_data(&cursor); if (error) break; rec = cursor.record; @@ -1061,7 +1061,7 @@ hammer_vop_readdir(struct vop_readdir_args *ap) &error, uio, rec->entry.obj_id, hammer_get_dtype(rec->entry.base.base.obj_type), rec->entry.base.data_len, - (void *)cursor.data1); + (void *)cursor.data); if (r) break; ++saveoff; @@ -1128,7 +1128,7 @@ hammer_vop_readlink(struct vop_readlink_args *ap) if (error == 0) { error = hammer_ip_resolve_data(&cursor); if (error == 0) { - error = uiomove((char *)cursor.data1, + error = uiomove((char *)cursor.data, cursor.record->base.data_len, ap->a_uio); } @@ -1237,7 +1237,7 @@ retry: break; rec = cursor.record; if (fncp->nc_nlen == rec->entry.base.data_len && - bcmp(fncp->nc_name, cursor.data1, fncp->nc_nlen) == 0) { + bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) { break; } error = hammer_ip_next(&cursor); @@ -1475,7 +1475,7 @@ hammer_vop_nsymlink(struct vop_nsymlink_args *ap) * as pure data, not a string, and is no \0 terminated. */ if (error == 0) { - record = hammer_alloc_mem_record(nip, sizeof(struct hammer_base_record)); + record = hammer_alloc_mem_record(nip); bytes = strlen(ap->a_target); record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK; @@ -1621,7 +1621,6 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) int error; int boff; int roff; - int x; int n; bio = ap->a_bio; @@ -1695,25 +1694,8 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) KKASSERT(n > 0); if (n > bp->b_bufsize - boff) n = bp->b_bufsize - boff; - if (roff + n > cursor.data_split) { - if (roff < cursor.data_split) { - x = cursor.data_split - roff; - bcopy((char *)cursor.data1 + roff, - (char *)bp->b_data + boff, - x); - bcopy((char *)cursor.data2, - (char *)bp->b_data + boff + x, - n - x); - } else { - bcopy((char *)cursor.data2 + roff - - cursor.data_split, - (char *)bp->b_data + boff, - n); - } - } else { - bcopy((char *)cursor.data1 + roff, - (char *)bp->b_data + boff, n); - } + bcopy((char *)cursor.data + roff, + (char *)bp->b_data + boff, n); boff += n; if (boff == bp->b_bufsize) break; @@ -1868,7 +1850,7 @@ retry: break; rec = cursor.record; if (ncp->nc_nlen == rec->entry.base.data_len && - bcmp(ncp->nc_name, cursor.data1, ncp->nc_nlen) == 0) { + bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) { break; } error = hammer_ip_next(&cursor); -- 2.41.0