From a89aec1b5ac590a302c603b53ffc16011b2500e9 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 20 Nov 2007 07:16:28 +0000 Subject: [PATCH] HAMMER 4/many - more core infrastructure * Add reserved areas for a boot area and a memory log. * Add merged scan operations which are the core procedures used to execute most filesystem operations. These functions will access both the in-memory tree of unsynchronized information and the on-disk topology to generate a 'merged' result. Amoung other things this allows the filesystem to hold operations in a memory cache without actually having to mess with the HAMMER topology on-disk. The on-disk topology is then updated in a deferred manner. Disk I/O is entirely avoided for self contained operations which create, write, and delete related files quickly enough. * Add unmount sequencing, make mount and df work again. * Test the reference counting and flushing system on most primary structures. * Test basic buffer cache interactions, reading, writing, and lazy synchronization. * Start tying VNOPS into the infrastructure. open/close/read/write works now via the in-memory cache (none of it is synched to the disk topology yet!). readdir doesn't yet work... the 32 bit cookies are not large enough. --- sbin/hammer/hammer_util.h | 6 +- sbin/hammer/ondisk.c | 8 +- sbin/newfs_hammer/newfs_hammer.c | 81 ++++- sbin/newfs_hammer/newfs_hammer.h | 6 +- sbin/newfs_hammer/ondisk.c | 8 +- sys/vfs/hammer/hammer.h | 40 ++- sys/vfs/hammer/hammer_btree.c | 46 ++- sys/vfs/hammer/hammer_cursor.c | 17 +- sys/vfs/hammer/hammer_cursor.h | 18 +- sys/vfs/hammer/hammer_disk.h | 28 +- sys/vfs/hammer/hammer_inode.c | 75 ++-- sys/vfs/hammer/hammer_io.c | 22 +- sys/vfs/hammer/hammer_object.c | 510 +++++++++++++++++++++++++--- sys/vfs/hammer/hammer_ondisk.c | 69 +++- sys/vfs/hammer/hammer_subs.c | 3 +- sys/vfs/hammer/hammer_transaction.c | 46 ++- sys/vfs/hammer/hammer_vfsops.c | 24 +- sys/vfs/hammer/hammer_vnops.c | 141 +++++--- 18 files changed, 892 insertions(+), 256 deletions(-) diff --git a/sbin/hammer/hammer_util.h b/sbin/hammer/hammer_util.h index 1e96ede521..0299fc6672 100644 --- a/sbin/hammer/hammer_util.h +++ b/sbin/hammer/hammer_util.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.2 2007/11/02 00:38:36 dillon Exp $ + * $DragonFly: src/sbin/hammer/hammer_util.h,v 1.3 2007/11/20 07:16:27 dillon Exp $ */ #include @@ -66,7 +66,7 @@ struct buffer_info; struct volume_info { struct volume_info *next; int vol_no; - int64_t vol_cluster_off; + int64_t vol_alloc; const char *name; int fd; @@ -130,7 +130,7 @@ extern struct hammer_alist_config Clu_master_alist_config; extern struct hammer_alist_config Clu_slave_alist_config; extern uuid_t Hammer_FSType; extern uuid_t Hammer_FSId; -extern int32_t ClusterSize; +extern int64_t ClusterSize; extern int UsingSuperClusters; extern int NumVolumes; extern struct volume_info *VolBase; diff --git a/sbin/hammer/ondisk.c b/sbin/hammer/ondisk.c index 78f79aece5..1384887946 100644 --- a/sbin/hammer/ondisk.c +++ b/sbin/hammer/ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/hammer/ondisk.c,v 1.2 2007/11/02 00:38:36 dillon Exp $ + * $DragonFly: src/sbin/hammer/ondisk.c,v 1.3 2007/11/20 07:16:27 dillon Exp $ */ #include "newfs_hammer.h" @@ -118,7 +118,7 @@ get_supercl(struct volume_info *vol, int32_t scl_no) HAMMER_VOL_SUPERCLUSTER_GROUP) + ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP * ClusterSize * HAMMER_SCL_MAXCLUSTERS); - scl->scl_offset = vol->vol_cluster_off + + scl->scl_offset = vol->ondisk->vol_clo_beg + scl_group * scl_group_size + (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE; @@ -183,13 +183,13 @@ get_cluster(struct volume_info *vol, int32_t clu_no) scl_group_size += HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE; cl->clu_offset = - vol->vol_cluster_off + + vol->ondisk->vol_clo_beg + scl_group * scl_group_size + (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) + ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS * HAMMER_VOL_SUPERCLUSTER_GROUP)) * HAMMER_BUFSIZE; } else { - cl->clu_offset = vol->vol_cluster_off + + cl->clu_offset = vol->ondisk->vol_clo_beg + (int64_t)clu_no * ClusterSize; } } diff --git a/sbin/newfs_hammer/newfs_hammer.c b/sbin/newfs_hammer/newfs_hammer.c index 56102c82bd..6dbb1faf7c 100644 --- a/sbin/newfs_hammer/newfs_hammer.c +++ b/sbin/newfs_hammer/newfs_hammer.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.6 2007/11/19 00:53:39 dillon Exp $ + * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.7 2007/11/20 07:16:27 dillon Exp $ */ #include "newfs_hammer.h" @@ -52,7 +52,9 @@ struct hammer_alist_config Clu_master_alist_config; struct hammer_alist_config Clu_slave_alist_config; uuid_t Hammer_FSType; uuid_t Hammer_FSId; -int32_t ClusterSize; +int64_t ClusterSize; +int64_t BootAreaSize; +int64_t MemAreaSize; int UsingSuperClusters; int NumVolumes; struct volume_info *VolBase; @@ -112,16 +114,26 @@ main(int ac, char **av) /* * Parse arguments */ - while ((ch = getopt(ac, av, "L:c:S")) != -1) { + while ((ch = getopt(ac, av, "L:b:c:m:S")) != -1) { switch(ch) { case 'L': label = optarg; break; + case 'b': + BootAreaSize = getsize(optarg, + HAMMER_BUFSIZE, + HAMMER_BOOT_MAXBYTES, 2); + break; case 'c': ClusterSize = getsize(optarg, HAMMER_BUFSIZE * 256LL, HAMMER_CLU_MAXBYTES, 1); break; + case 'm': + MemAreaSize = getsize(optarg, + HAMMER_BUFSIZE, + HAMMER_MEM_MAXBYTES, 2); + break; case 'S': /* * Force the use of super-clusters @@ -179,6 +191,28 @@ main(int ac, char **av) } } + /* + * Calculate defaults for the boot and memory area sizes. + */ + if (BootAreaSize == 0) { + BootAreaSize = HAMMER_BOOT_NOMBYTES; + while (BootAreaSize > total / NumVolumes / 256) + BootAreaSize >>= 1; + if (BootAreaSize < HAMMER_BOOT_MINBYTES) + BootAreaSize = 0; + } else if (BootAreaSize < HAMMER_BOOT_MINBYTES) { + BootAreaSize = HAMMER_BOOT_MINBYTES; + } + if (MemAreaSize == 0) { + MemAreaSize = HAMMER_MEM_NOMBYTES; + while (MemAreaSize > total / NumVolumes / 256) + MemAreaSize >>= 1; + if (MemAreaSize < HAMMER_MEM_MINBYTES) + MemAreaSize = 0; + } else if (MemAreaSize < HAMMER_MEM_MINBYTES) { + MemAreaSize = HAMMER_MEM_MINBYTES; + } + printf("---------------------------------------------\n"); printf("%d volume%s total size %s\n", NumVolumes, (NumVolumes == 1 ? "" : "s"), sizetostr(total)); @@ -188,7 +222,7 @@ main(int ac, char **av) max_volume_size = (int64_t)HAMMER_VOL_MAXSUPERCLUSTERS * \ HAMMER_SCL_MAXCLUSTERS * ClusterSize; } else { - max_volume_size = (int64_t)HAMMER_VOL_MAXCLUSTERS * ClusterSize; + max_volume_size = HAMMER_VOL_MAXCLUSTERS * ClusterSize; } printf("max-volume-size: %s\n", sizetostr(max_volume_size)); @@ -196,6 +230,8 @@ main(int ac, char **av) (max_volume_size * 32768LL < max_volume_size) ? "Unlimited" : sizetostr(max_volume_size * 32768LL)); + printf("boot-area-size: %s\n", sizetostr(BootAreaSize)); + printf("memory-log-size: %s\n", sizetostr(MemAreaSize)); printf("\n"); /* @@ -287,10 +323,15 @@ getsize(const char *str, int64_t minval, int64_t maxval, int powerof2) str, sizetostr(maxval)); /* not reached */ } - if (powerof2 && (val ^ (val - 1)) != ((val << 1) - 1)) { + if ((powerof2 & 1) && (val ^ (val - 1)) != ((val << 1) - 1)) { errx(1, "Value not power of 2: %s\n", str); /* not reached */ } + if ((powerof2 & 2) && (val & HAMMER_BUFMASK)) { + errx(1, "Value not an integral multiple of %dK: %s", + HAMMER_BUFSIZE / 1024, str); + /* not reached */ + } return(val); } @@ -371,9 +412,9 @@ check_volume(struct volume_info *vol) } /* - * Reserve space for (future) boot junk + * Reserve space for (future) header junk */ - vol->vol_cluster_off = HAMMER_BUFSIZE * 16; + vol->vol_alloc = HAMMER_BUFSIZE * 16; } /* @@ -395,7 +436,7 @@ format_volume(struct volume_info *vol, int nvols, const char *label) * The last cluster in a volume may wind up truncated. It must be * at least minclsize to really be workable as a cluster. */ - minclsize = ClusterSize / 4; + minclsize = (int32_t)(ClusterSize / 4); if (minclsize < HAMMER_BUFSIZE * 64) minclsize = HAMMER_BUFSIZE * 64; @@ -410,14 +451,18 @@ format_volume(struct volume_info *vol, int nvols, const char *label) ondisk->vol_no = vol->vol_no; ondisk->vol_count = nvols; ondisk->vol_version = 1; - ondisk->vol_clsize = ClusterSize; + ondisk->vol_clsize = (int32_t)ClusterSize; if (UsingSuperClusters) ondisk->vol_flags = HAMMER_VOLF_USINGSUPERCL; - ondisk->vol_beg = vol->vol_cluster_off; - ondisk->vol_end = vol->size; + ondisk->vol_bot_beg = vol->vol_alloc; + vol->vol_alloc += BootAreaSize; + ondisk->vol_mem_beg = vol->vol_alloc; + vol->vol_alloc += MemAreaSize; + ondisk->vol_clo_beg = vol->vol_alloc; + ondisk->vol_clo_end = vol->size; - if (ondisk->vol_end < ondisk->vol_beg) { + if (ondisk->vol_clo_end < ondisk->vol_clo_beg) { errx(1, "volume %d %s is too small to hold the volume header", vol->vol_no, vol->name); } @@ -448,7 +493,7 @@ format_volume(struct volume_info *vol, int nvols, const char *label) scl_group_size = scl_header_size + (int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP * ClusterSize * HAMMER_SCL_MAXCLUSTERS; - nscl_groups = (ondisk->vol_end - ondisk->vol_beg) / + nscl_groups = (ondisk->vol_clo_end - ondisk->vol_clo_beg) / scl_group_size; nclusters = nscl_groups * HAMMER_SCL_MAXCLUSTERS * HAMMER_VOL_SUPERCLUSTER_GROUP; @@ -457,7 +502,7 @@ format_volume(struct volume_info *vol, int nvols, const char *label) * Figure out how much space we have left and calculate the * remaining number of clusters. */ - n64 = (ondisk->vol_end - ondisk->vol_beg) - + n64 = (ondisk->vol_clo_end - ondisk->vol_clo_beg) - (nscl_groups * scl_group_size); if (n64 > scl_header_size) { nclusters += (n64 + minclsize) / ClusterSize; @@ -466,11 +511,11 @@ format_volume(struct volume_info *vol, int nvols, const char *label) nclusters, nscl_groups); hammer_alist_free(&vol->clu_alist, 0, nclusters); } else { - nclusters = (ondisk->vol_end - ondisk->vol_beg + minclsize) / - ClusterSize; + nclusters = (ondisk->vol_clo_end - ondisk->vol_clo_beg + + minclsize) / ClusterSize; if (nclusters > HAMMER_VOL_MAXCLUSTERS) { errx(1, "Volume is too large, max %s\n", - sizetostr((int64_t)nclusters * ClusterSize)); + sizetostr(nclusters * ClusterSize)); } hammer_alist_free(&vol->clu_alist, 0, nclusters); } @@ -524,7 +569,7 @@ format_cluster(struct volume_info *vol, int isroot) ondisk->clu_flags = 0; ondisk->clu_start = HAMMER_BUFSIZE; if (vol->size - cluster->clu_offset > ClusterSize) - ondisk->clu_limit = ClusterSize; + ondisk->clu_limit = (u_int32_t)ClusterSize; else ondisk->clu_limit = (u_int32_t)(vol->size - cluster->clu_offset); diff --git a/sbin/newfs_hammer/newfs_hammer.h b/sbin/newfs_hammer/newfs_hammer.h index b56070c5fe..85c48b78f1 100644 --- a/sbin/newfs_hammer/newfs_hammer.h +++ b/sbin/newfs_hammer/newfs_hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.h,v 1.2 2007/11/02 00:38:36 dillon Exp $ + * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.h,v 1.3 2007/11/20 07:16:27 dillon Exp $ */ #include @@ -66,7 +66,7 @@ struct buffer_info; struct volume_info { struct volume_info *next; int vol_no; - int64_t vol_cluster_off; + int64_t vol_alloc; const char *name; int fd; @@ -130,7 +130,7 @@ extern struct hammer_alist_config Clu_master_alist_config; extern struct hammer_alist_config Clu_slave_alist_config; extern uuid_t Hammer_FSType; extern uuid_t Hammer_FSId; -extern int32_t ClusterSize; +extern int64_t ClusterSize; extern int UsingSuperClusters; extern int NumVolumes; extern struct volume_info *VolBase; diff --git a/sbin/newfs_hammer/ondisk.c b/sbin/newfs_hammer/ondisk.c index 5375687cca..80fdbc84b5 100644 --- a/sbin/newfs_hammer/ondisk.c +++ b/sbin/newfs_hammer/ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/newfs_hammer/Attic/ondisk.c,v 1.2 2007/11/02 00:38:36 dillon Exp $ + * $DragonFly: src/sbin/newfs_hammer/Attic/ondisk.c,v 1.3 2007/11/20 07:16:27 dillon Exp $ */ #include "newfs_hammer.h" @@ -118,7 +118,7 @@ get_supercl(struct volume_info *vol, int32_t scl_no) HAMMER_VOL_SUPERCLUSTER_GROUP) + ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP * ClusterSize * HAMMER_SCL_MAXCLUSTERS); - scl->scl_offset = vol->vol_cluster_off + + scl->scl_offset = vol->ondisk->vol_clo_beg + scl_group * scl_group_size + (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE; @@ -183,13 +183,13 @@ get_cluster(struct volume_info *vol, int32_t clu_no) scl_group_size += HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE; cl->clu_offset = - vol->vol_cluster_off + + vol->ondisk->vol_clo_beg + scl_group * scl_group_size + (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) + ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS * HAMMER_VOL_SUPERCLUSTER_GROUP)) * HAMMER_BUFSIZE; } else { - cl->clu_offset = vol->vol_cluster_off + + cl->clu_offset = vol->ondisk->vol_clo_beg + (int64_t)clu_no * ClusterSize; } } diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 2fffec3582..4c9345887a 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.6 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.7 2007/11/20 07:16:28 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -78,6 +78,7 @@ typedef struct hammer_inode_info { struct hammer_transaction { struct hammer_mount *hmp; hammer_tid_t tid; + struct hammer_volume *rootvol; }; typedef struct hammer_transaction *hammer_transaction_t; @@ -158,6 +159,7 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_INODE_RDIRTY 0x0002 /* in-memory ino_rec is dirty */ #define HAMMER_INODE_ITIMES 0x0004 /* in-memory mtime/atime modified */ #define HAMMER_INODE_ONDISK 0x0010 /* inode is on-disk (else not yet) */ +#define HAMMER_INODE_FLUSH 0x0020 /* flush on last ref */ #define HAMMER_MAX_INODE_CURSORS 4 @@ -173,7 +175,7 @@ typedef struct hammer_inode *hammer_inode_t; */ struct hammer_record { RB_ENTRY(hammer_record) rb_node; - hammer_tid_t last_tid; + struct hammer_lock lock; struct hammer_inode *ip; union hammer_record_ondisk rec; union hammer_data_ondisk *data; @@ -185,7 +187,7 @@ typedef struct hammer_record *hammer_record_t; #define HAMMER_RECF_ALLOCDATA 0x0001 #define HAMMER_RECF_ONRBTREE 0x0002 -#define HAMMER_RECF_DELETION 0x0004 /* placemark a deletion */ +#define HAMMER_RECF_DELETED 0x0004 /* * Structures used to internally represent a volume and a cluster @@ -373,8 +375,6 @@ struct hammer_mount { uuid_t fsid; udev_t fsid_udev; u_int32_t namekey_iterator; - hammer_tid_t last_tid; /* tid for transaction id */ - hammer_tid_t last_ino; /* inode creation iterator */ }; typedef struct hammer_mount *hammer_mount_t; @@ -405,17 +405,20 @@ void hammer_put_inode_ref(struct hammer_inode *ip); int hammer_unload_inode(hammer_inode_t ip, void *data __unused); int hammer_unload_volume(hammer_volume_t volume, void *data __unused); +int hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused); +int hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused); +int hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused); int hammer_install_volume(hammer_mount_t hmp, const char *volname); -hammer_record_ondisk_t - hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip); -hammer_record_ondisk_t - hammer_ip_next(hammer_cursor_t cursor); +int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip); +int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip); +int hammer_ip_next(hammer_cursor_t cursor); int hammer_ip_resolve_data(hammer_cursor_t cursor); hammer_record_t - hammer_alloc_ip_record(struct hammer_transaction *trans, + hammer_alloc_mem_record(struct hammer_transaction *trans, hammer_inode_t ip); -void hammer_free_ip_record(hammer_record_t record); +void hammer_rel_mem_record(struct hammer_record **recordp); +void hammer_free_mem_record(hammer_record_t record); int hammer_cursor_up(hammer_cursor_t cursor); int hammer_cursor_toroot(hammer_cursor_t cursor); @@ -433,6 +436,8 @@ u_int32_t hammer_to_unix_xid(uuid_t *uuid); void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid); void hammer_to_timespec(hammer_tid_t tid, struct timespec *ts); hammer_tid_t hammer_timespec_to_transid(struct timespec *ts); +hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans); +hammer_tid_t hammer_alloc_recid(hammer_transaction_t trans); enum vtype hammer_get_vnode_type(u_int8_t obj_type); u_int8_t hammer_get_obj_type(enum vtype vtype); @@ -484,7 +489,6 @@ void hammer_cache_node(hammer_node_t node, void hammer_uncache_node(struct hammer_node **cache); void hammer_flush_node(hammer_node_t node); -struct hammer_cluster *hammer_get_rootcl(struct hammer_mount *hmp); void hammer_dup_buffer(struct hammer_buffer **bufferp, struct hammer_buffer *buffer); void hammer_dup_cluster(struct hammer_cluster **clusterp, @@ -522,17 +526,17 @@ void hammer_modify_inode(struct hammer_transaction *trans, int hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap, struct ucred *cred, struct hammer_inode *dip, struct hammer_inode **ipp); -void hammer_rel_inode(hammer_inode_t ip); +void hammer_rel_inode(hammer_inode_t ip, int flush); -int hammer_add_directory(struct hammer_transaction *trans, +int hammer_ip_add_directory(struct hammer_transaction *trans, hammer_inode_t dip, struct namecache *ncp, hammer_inode_t nip); -int hammer_del_directory(struct hammer_transaction *trans, +int hammer_ip_del_directory(struct hammer_transaction *trans, hammer_cursor_t cursor, hammer_inode_t dip, hammer_inode_t ip); -int hammer_delete_range(struct hammer_transaction *trans, - hammer_inode_t ip, int64_t off_beg, int64_t off_end); -int hammer_add_data(struct hammer_transaction *trans, +int hammer_ip_delete_range(struct hammer_transaction *trans, + hammer_inode_t ip, int64_t ran_beg, int64_t ran_end); +int hammer_ip_add_data(struct hammer_transaction *trans, hammer_inode_t ip, int64_t offset, void *data, int bytes); diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index b7ad8b7272..d3d2d987ee 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.4 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ */ /* @@ -127,10 +127,8 @@ hammer_btree_iterate(hammer_cursor_t cursor) * Skip past the current record */ node = cursor->node->ondisk; - if (node == NULL) { - KKASSERT(cursor->last_error != 0); - return(cursor->last_error); - } + if (node == NULL) + return(ENOENT); if (cursor->index < node->count) ++cursor->index; @@ -178,6 +176,7 @@ hammer_btree_iterate(hammer_cursor_t cursor) * history. */ if (node->type == HAMMER_BTREE_TYPE_INTERNAL) { + KKASSERT(node->count != 0); elm = &node->elms[cursor->index]; if (elm[0].base.obj_id == elm[1].base.obj_id && elm[0].base.rec_type == elm[1].base.rec_type && @@ -213,7 +212,6 @@ hammer_btree_iterate(hammer_cursor_t cursor) if (error) break; KKASSERT(cursor->index == 0); - KKASSERT(cursor->index != node->count); node = cursor->node->ondisk; continue; } @@ -244,7 +242,6 @@ hammer_btree_iterate(hammer_cursor_t cursor) error = (r < 0) ? ENOENT : 0; break; } - cursor->last_error = error; return(error); } @@ -300,7 +297,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) cluster = cursor->node->cluster; error = 0; - if ((flags & HAMMER_BTREE_GET_RECORD) && error == 0) { + if ((flags & HAMMER_CURSOR_GET_RECORD) && error == 0) { cloff = elm->leaf.rec_offset; cursor->record = hammer_bread(cluster, cloff, HAMMER_FSBUF_RECORDS, &error, @@ -308,7 +305,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) } else { cloff = 0; } - if ((flags & HAMMER_BTREE_GET_DATA) && error == 0) { + if ((flags & HAMMER_CURSOR_GET_DATA) && error == 0) { if ((cloff ^ elm->leaf.data_offset) & ~HAMMER_BUFMASK) { /* * Data in different buffer than record @@ -339,7 +336,7 @@ hammer_btree_extract(hammer_cursor_t cursor, int flags) * The cursor is positioned such that the element at and beyond the cursor * are shifted to make room for the new record. * - * The caller must call hammer_btree_lookup() with the HAMMER_BTREE_INSERT + * The caller must call hammer_btree_lookup() with the HAMMER_CURSOR_INSERT * flag set and that call must return ENOENT before this function can be * called. * @@ -361,7 +358,7 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) * The search also does some setup for our insert, so there is always * room in the leaf. */ - error = btree_search(cursor, HAMMER_BTREE_INSERT); + error = btree_search(cursor, HAMMER_CURSOR_INSERT); if (error != ENOENT) { if (error == 0) error = EEXIST; @@ -404,7 +401,7 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) * The cursor is positioned such that the current element is the one * to be deleted. * - * The caller must call hammer_btree_lookup() with the HAMMER_BTREE_DELETE + * The caller must call hammer_btree_lookup() with the HAMMER_CURSOR_DELETE * flag set and that call must return 0 before this function can be * called. * @@ -429,7 +426,7 @@ hammer_btree_delete(hammer_cursor_t cursor) * Locate the leaf element to delete. The search is also responsible * for doing some of the rebalancing work on its way down. */ - error = btree_search(cursor, HAMMER_BTREE_DELETE); + error = btree_search(cursor, HAMMER_CURSOR_DELETE); if (error) return (error); #endif @@ -574,7 +571,7 @@ btree_search(hammer_cursor_t cursor, int flags) * XXX as an optimization it should be possible to unbalance the tree * and stop at the root of the current cluster. */ - while (flags & HAMMER_BTREE_INSERT) { + while (flags & HAMMER_CURSOR_INSERT) { if (btree_node_is_full(cursor->node->ondisk) == 0) break; if (cursor->parent == NULL) @@ -600,8 +597,10 @@ btree_search(hammer_cursor_t cursor, int flags) * elements. * * NOTE: These cursor-up's CAN continue to cross cluster boundaries. + * + * XXX NOTE: Iterations may not set this flag anyway. */ - while (flags & HAMMER_BTREE_DELETE) { + while (flags & HAMMER_CURSOR_DELETE) { if (cursor->node->ondisk->count > 1) break; if (cursor->parent == NULL) @@ -626,8 +625,10 @@ btree_search(hammer_cursor_t cursor, int flags) * If we are a the root node and deleting, try to collapse * all of the root's children into the root. This is the * only point where tree depth is reduced. + * + * XXX NOTE: Iterations may not set this flag anyway. */ - if ((flags & HAMMER_BTREE_DELETE) && cursor->parent == NULL) { + if ((flags & HAMMER_CURSOR_DELETE) && cursor->parent == NULL) { error = btree_collapse(cursor); /* node becomes stale after call */ if (error) @@ -663,7 +664,7 @@ btree_search(hammer_cursor_t cursor, int flags) * adjust cursor->node and cursor->index if the current * index winds up in the new node. */ - if (flags & HAMMER_BTREE_INSERT) { + if (flags & HAMMER_CURSOR_INSERT) { if (node->count == HAMMER_BTREE_INT_ELMS) { error = btree_split_internal(cursor); if (error) @@ -690,8 +691,10 @@ btree_search(hammer_cursor_t cursor, int flags) * * XXX test for subtree_count < maxelms / 2, minus 1 or 2 * for hysteresis? + * + * XXX NOTE: Iterations may not set this flag anyway. */ - if (flags & HAMMER_BTREE_DELETE) { + if (flags & HAMMER_CURSOR_DELETE) { if (node->elms[i].internal.subtree_count <= 1) { error = btree_rebalance(cursor); if (error) @@ -753,7 +756,7 @@ btree_search(hammer_cursor_t cursor, int flags) * cursor->index. */ cursor->index = i; - if ((flags & HAMMER_BTREE_INSERT) && + if ((flags & HAMMER_CURSOR_INSERT) && node->count == HAMMER_BTREE_LEAF_ELMS) { error = btree_split_leaf(cursor); /* NOT USED @@ -764,12 +767,7 @@ btree_search(hammer_cursor_t cursor, int flags) goto done; } error = ENOENT; - - /* - * Set the cursor's last_error. - */ done: - cursor->last_error = error; return(error); } diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 861be29fe2..4baf855a17 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.1 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.2 2007/11/20 07:16:28 dillon Exp $ */ /* @@ -77,18 +77,20 @@ hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp) int hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip) { + hammer_node_t node; int error; if (ip->cache) { bzero(cursor, sizeof(*cursor)); - cursor->node = ip->cache; - error = hammer_ref_node(cursor->node); + node = ip->cache; + error = hammer_ref_node(node); if (error == 0) { - hammer_lock_ex(&cursor->node->lock); + hammer_lock_ex(&node->lock); + cursor->node = node; error = hammer_load_cursor_parent(cursor); } else { - hammer_rel_node(cursor->node); - cursor->node = NULL; + node = NULL; + cursor->node = node; } } else { error = hammer_init_cursor_hmp(cursor, ip->hmp); @@ -121,6 +123,9 @@ hammer_done_cursor(hammer_cursor_t cursor) hammer_rel_buffer(cursor->record_buffer, 0); cursor->record_buffer = NULL; } + if (cursor->iprec) + hammer_rel_mem_record(&cursor->iprec); + cursor->data = NULL; cursor->record = NULL; cursor->left_bound = NULL; diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index db84c61f7a..700725d6ef 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.1 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.2 2007/11/20 07:16:28 dillon Exp $ */ /* @@ -87,7 +87,6 @@ struct hammer_cursor { * Iteration and extraction control variables */ int flags; - int last_error; /* * Merged in-memory/on-disk iterations also use these fields. @@ -98,9 +97,14 @@ struct hammer_cursor { typedef struct hammer_cursor *hammer_cursor_t; -#define HAMMER_BTREE_GET_RECORD 0x0001 -#define HAMMER_BTREE_GET_DATA 0x0002 -#define HAMMER_BTREE_CLUSTER_TAG 0x0004 /* stop at the cluster tag */ -#define HAMMER_BTREE_INSERT 0x0008 /* adjust for insert */ -#define HAMMER_BTREE_DELETE 0x0010 /* adjust for delete */ +#define HAMMER_CURSOR_GET_RECORD 0x0001 +#define HAMMER_CURSOR_GET_DATA 0x0002 +#define HAMMER_CURSOR_CLUSTER_TAG 0x0004 /* stop at the cluster tag */ +#define HAMMER_CURSOR_INSERT 0x0008 /* adjust for insert */ +#define HAMMER_CURSOR_DELETE 0x0010 /* adjust for delete */ + +#define HAMMER_CURSOR_ATEDISK 0x0100 +#define HAMMER_CURSOR_ATEMEM 0x0200 +#define HAMMER_CURSOR_DISKEOF 0x0400 +#define HAMMER_CURSOR_MEMEOF 0x0800 diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 58a5c6b07c..54a11c4a06 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.6 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.7 2007/11/20 07:16:28 dillon Exp $ */ #ifndef _SYS_UUID_H_ @@ -142,6 +142,18 @@ typedef struct hammer_fsbuf_head *hammer_fsbuf_head_t; * * NOTE: A 32768-element single-layer and 16384-element duel-layer A-list * is the same size. + * + * Special field notes: + * + * vol_bot_beg - offset of boot area (mem_beg - bot_beg bytes) + * vol_mem_beg - offset of memory log (clu_beg - mem_beg bytes) + * vol_clo_beg - offset of cluster #0 in volume + * + * The memory log area allows a kernel to cache new records and data + * in memory without allocating space in the actual filesystem to hold + * the records and data. In the event that a filesystem becomes full, + * any records remaining in memory can be flushed to the memory log + * area. This allows the kernel to immediately return success. */ #define HAMMER_VOL_MAXCLUSTERS 32768 /* 1-layer */ #define HAMMER_VOL_MAXSUPERCLUSTERS 16384 /* 2-layer */ @@ -149,10 +161,20 @@ typedef struct hammer_fsbuf_head *hammer_fsbuf_head_t; #define HAMMER_VOL_METAELMS_1LYR HAMMER_ALIST_METAELMS_32K_1LYR #define HAMMER_VOL_METAELMS_2LYR HAMMER_ALIST_METAELMS_16K_2LYR +#define HAMMER_BOOT_MINBYTES (32*1024) +#define HAMMER_BOOT_NOMBYTES (64LL*1024*1024) +#define HAMMER_BOOT_MAXBYTES (256LL*1024*1024) + +#define HAMMER_MEM_MINBYTES (256*1024) +#define HAMMER_MEM_NOMBYTES (1LL*1024*1024*1024) +#define HAMMER_MEM_MAXBYTES (64LL*1024*1024*1024) + struct hammer_volume_ondisk { struct hammer_fsbuf_head head; - int64_t vol_beg; /* byte offset of first cl/supercl in volume */ - int64_t vol_end; /* byte offset of volume EOF */ + int64_t vol_bot_beg; /* byte offset of boot area or 0 */ + int64_t vol_mem_beg; /* byte offset of memory log or 0 */ + int64_t vol_clo_beg; /* byte offset of first cl/supercl in volume */ + int64_t vol_clo_end; /* byte offset of volume EOF */ int64_t vol_locked; /* reserved clusters are >= this offset */ uuid_t vol_fsid; /* identify filesystem */ diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 0ac3bd062e..d8d9c941ce 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.4 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ */ #include "hammer.h" @@ -55,8 +55,11 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap) struct vnode *vp; vp = ap->a_vp; - if ((ip = vp->v_data) != NULL) - hammer_unload_inode(ip, NULL); + if ((ip = vp->v_data) != NULL) { + vp->v_data = NULL; + ip->vp = NULL; + hammer_rel_inode(ip, 1); + } return(0); } @@ -82,7 +85,7 @@ hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) return(error); } error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp); - hammer_rel_inode(ip); + hammer_rel_inode(ip, 0); return (error); } @@ -118,6 +121,8 @@ hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp) /* vnode locked by getnewvnode() */ /* make related vnode dirty if inode dirty? */ hammer_unlock(&ip->lock); + if (vp->v_type == VREG) + vinitvmio(vp, ip->ino_rec.ino_size); break; } @@ -131,6 +136,7 @@ hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp) vput(vp); } } + *vpp = vp; return(error); } @@ -179,7 +185,7 @@ loop: cursor.key_beg.delete_tid = 0; cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; cursor.key_beg.obj_type = 0; - cursor.flags = HAMMER_BTREE_GET_RECORD | HAMMER_BTREE_GET_DATA; + cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA; *errorp = hammer_btree_lookup(&cursor); @@ -223,16 +229,17 @@ loop: * disk. */ int -hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap, - struct ucred *cred, struct hammer_inode *dip, +hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, + struct ucred *cred, hammer_inode_t dip, struct hammer_inode **ipp) { - struct hammer_mount *hmp; - struct hammer_inode *ip; + hammer_mount_t hmp; + hammer_inode_t ip; hmp = trans->hmp; ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO); - ip->obj_id = ++hmp->last_ino; + ip->obj_id = hammer_alloc_tid(trans); + kprintf("object id %llx\n", ip->obj_id); KKASSERT(ip->obj_id != 0); ip->obj_asof = HAMMER_MAX_TID; /* XXX */ ip->hmp = hmp; @@ -247,8 +254,8 @@ hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap, ip->ino_rec.ino_size = 0; ip->ino_rec.ino_nlinks = 0; /* XXX */ - ip->ino_rec.base.rec_id = ++hmp->rootvol->ondisk->vol0_recid; - hammer_modify_volume(hmp->rootvol); + kprintf("rootvol %p ondisk %p\n", hmp->rootvol, hmp->rootvol->ondisk); + ip->ino_rec.base.rec_id = hammer_alloc_recid(trans); KKASSERT(ip->ino_rec.base.rec_id != 0); ip->ino_rec.base.base.obj_id = ip->obj_id; ip->ino_rec.base.base.key = 0; @@ -273,17 +280,23 @@ hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap, hammer_ref(&ip->lock); if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { hammer_unref(&ip->lock); - panic("hammer_create_inode: duplicate obj_id"); + panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id); } *ipp = ip; return(0); } +/* + * Release a reference on an inode and unload it if told to flush + */ void -hammer_rel_inode(struct hammer_inode *ip) +hammer_rel_inode(struct hammer_inode *ip, int flush) { - /* XXX check last ref */ hammer_unref(&ip->lock); + if (flush) + ip->flags |= HAMMER_INODE_FLUSH; + if (ip->lock.refs == 0 && (ip->flags & HAMMER_INODE_FLUSH)) + hammer_unload_inode(ip, NULL); } /* @@ -294,7 +307,8 @@ hammer_rel_inode(struct hammer_inode *ip) int hammer_unload_inode(struct hammer_inode *ip, void *data __unused) { - KKASSERT(ip->lock.refs == 0); + KASSERT(ip->lock.refs == 0, + ("hammer_unload_inode: %d refs\n", ip->lock.refs)); KKASSERT(ip->vp == NULL); hammer_ref(&ip->lock); RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip); @@ -318,35 +332,6 @@ hammer_modify_inode(struct hammer_transaction *trans, ip->last_tid = trans->tid; } -/************************************************************************ - * HAMMER INODE MERGED-RECORD FUNCTIONS * - ************************************************************************ - * - * These functions augment the B-Tree scanning functions in hammer_btree.c - * by merging in-memory records with on-disk records. - */ - -hammer_record_ondisk_t -hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) -{ - KKASSERT(0); - return(NULL); -} - -hammer_record_ondisk_t -hammer_ip_next(hammer_cursor_t cursor) -{ - KKASSERT(0); - return(NULL); -} - -int -hammer_ip_resolve_data(hammer_cursor_t cursor) -{ - KKASSERT(0); - return(NULL); -} - /* * Access the filesystem buffer containing the cluster-relative byte * offset, validate the buffer type, load *bufferp and return a diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 5addc7f4a6..8f01efb887 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.2 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.3 2007/11/20 07:16:28 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -286,10 +286,12 @@ hammer_io_deallocate(struct buf *bp) /* * First, ref the structure to prevent either the buffer or the - * structure from going away. + * structure from going away or being unexpectedly flushed. */ hammer_ref(&io->io.lock); + kprintf("iodeallocate bp %p\n", bp); + /* * Buffers can have active references from cached hammer_node's, * even if those nodes are themselves passively cached. Attempt @@ -310,7 +312,13 @@ hammer_io_deallocate(struct buf *bp) KKASSERT(io->io.released); hammer_io_disassociate(io); bp->b_flags &= ~B_LOCKED; + kprintf("iodeallocate bp %p - unlocked and dissed\n", bp); + /* + * Perform final rights on the structure. This can cause + * a chain reaction - e.g. last buffer -> last cluster -> + * last supercluster -> last volume. + */ switch(io->io.type) { case HAMMER_STRUCTURE_VOLUME: hammer_rel_volume(&io->volume, 1); @@ -325,14 +333,18 @@ hammer_io_deallocate(struct buf *bp) hammer_rel_buffer(&io->buffer, 1); break; } - /* NOTE: io may be invalid (kfree'd) here */ } else { /* - * Otherwise tell the kernel not to destroy the buffer + * Otherwise tell the kernel not to destroy the buffer. + * + * We have to unref the structure without performing any + * final rights to it to avoid a deadlock. */ bp->b_flags |= B_LOCKED; - hammer_unlock(&io->io.lock); + hammer_unref(&io->io.lock); + kprintf("iodeallocate bp %p - locked\n", bp); } + crit_exit(); } diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 6f8e303dbf..d9a619d747 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,19 +31,20 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.2 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.3 2007/11/20 07:16:28 dillon Exp $ */ #include "hammer.h" -static int hammer_add_record(hammer_transaction_t trans, +static int hammer_mem_add(hammer_transaction_t trans, hammer_record_t record); +static int hammer_mem_search(hammer_cursor_t cursor, hammer_inode_t ip); /* * Red-black tree support. */ static int -hammer_rec_rb_compare(struct hammer_record *rec1, struct hammer_record *rec2) +hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) { if (rec1->rec.base.base.rec_type < rec2->rec.base.base.rec_type) return(-1); @@ -63,7 +64,7 @@ hammer_rec_rb_compare(struct hammer_record *rec1, struct hammer_record *rec2) } static int -hammer_rec_compare(struct hammer_base_elm *info, struct hammer_record *rec) +hammer_rec_compare(hammer_base_elm_t info, hammer_record_t rec) { /* * A key1->rec_type of 0 matches any record type. @@ -110,6 +111,98 @@ RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare); RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, hammer_rec_compare, hammer_base_elm_t); +/* + * Allocate a record for the caller to finish filling in + */ +hammer_record_t +hammer_alloc_mem_record(struct hammer_transaction *trans, hammer_inode_t ip) +{ + hammer_record_t record; + + record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); + record->ip = ip; + return (record); +} + +/* + * Release a memory record. If the record is marked for defered deletion, + * destroy the record when the last reference goes away. + */ +void +hammer_rel_mem_record(struct hammer_record **recordp) +{ + hammer_record_t rec; + + if ((rec = *recordp) != NULL) { + if (hammer_islastref(&rec->lock)) { + hammer_unref(&rec->lock); + if (rec->flags & HAMMER_RECF_DELETED) + hammer_free_mem_record(rec); + } else { + hammer_unref(&rec->lock); + } + *recordp = NULL; + } +} + +/* + * Free a record. Clean the structure up even though we are throwing it + * away as a sanity check. The actual free operation is delayed while + * the record is referenced. + */ +void +hammer_free_mem_record(hammer_record_t record) +{ + if (record->lock.refs) { + record->flags |= HAMMER_RECF_DELETED; + return; + } + + if (record->flags & HAMMER_RECF_ONRBTREE) { + RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, record); + record->flags &= ~HAMMER_RECF_ONRBTREE; + } + if (record->flags & HAMMER_RECF_ALLOCDATA) { + kfree(record->data, M_HAMMER); + record->flags &= ~HAMMER_RECF_ALLOCDATA; + } + record->data = NULL; + kfree(record, M_HAMMER); +} + +/* + * Lookup an in-memory record given the key specified in the cursor. Works + * just like hammer_btree_lookup() but operates on an inode's in-memory + * record list. + */ +static +int +hammer_mem_search(hammer_cursor_t cursor, hammer_inode_t ip) +{ + int error; + + if (cursor->iprec) + hammer_rel_mem_record(&cursor->iprec); + cursor->ip = ip; + cursor->iprec = hammer_rec_rb_tree_RB_LOOKUP_INFO( + &ip->rec_tree, &cursor->key_beg); + if (cursor->iprec == NULL) { + error = ENOENT; + } else { + hammer_ref(&cursor->iprec->lock); + error = 0; + } + return(error); +} + +/************************************************************************ + * HAMMER IN-MEMORY RECORD FUNCTIONS * + ************************************************************************ + * + * These functions manipulate in-memory records. Such records typically + * exist prior to being committed to disk or indexed via the on-disk B-Tree. + */ + /* * Add a directory entry (dip,ncp) which references inode (ip). * @@ -119,15 +212,15 @@ RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, * all 0's when synching to disk, which is not handled here. */ int -hammer_add_directory(struct hammer_transaction *trans, +hammer_ip_add_directory(struct hammer_transaction *trans, struct hammer_inode *dip, struct namecache *ncp, struct hammer_inode *ip) { - struct hammer_record *record; + hammer_record_t record; int error; int bytes; - record = hammer_alloc_ip_record(trans, dip); + record = hammer_alloc_mem_record(trans, dip); bytes = ncp->nc_nlen + 1; @@ -146,76 +239,403 @@ hammer_add_directory(struct hammer_transaction *trans, bcopy(ncp->nc_name, record->data, bytes); } record->data_len = bytes; - ++dip->ino_rec.ino_nlinks; - hammer_modify_inode(trans, dip, HAMMER_INODE_RDIRTY); - error = hammer_add_record(trans, record); + ++ip->ino_rec.ino_nlinks; + hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); + error = hammer_mem_add(trans, record); return(error); } /* - * Allocate a record for the caller to finish filling in + * Delete the directory entry and update the inode link count. The + * cursor must be seeked to the directory entry record being deleted. + * + * NOTE: HAMMER_CURSOR_DELETE may not have been set. XXX remove flag. */ -struct hammer_record * -hammer_alloc_ip_record(struct hammer_transaction *trans, hammer_inode_t ip) +int +hammer_ip_del_directory(struct hammer_transaction *trans, + hammer_cursor_t cursor, struct hammer_inode *dip, + struct hammer_inode *ip) { - hammer_record_t record; + int error; - record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); - record->last_tid = trans->tid; - record->ip = ip; - return (record); + if (cursor->record == &cursor->iprec->rec) { + /* + * The directory entry was in-memory, just scrap the + * record. + */ + hammer_free_mem_record(cursor->iprec); + error = 0; + } else { + /* + * The directory entry was on-disk, mark the record and + * B-Tree entry as deleted. The B-Tree entry does not + * have to be reindexed because a 'current' delete transid + * will wind up in the same position as the live record. + */ + KKASSERT(ip->flags & HAMMER_INODE_ONDISK); + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); + if (error == 0) { + cursor->node->ondisk->elms[cursor->index].base.delete_tid = trans->tid; + cursor->record->base.base.delete_tid = trans->tid; + hammer_modify_node(cursor->node); + hammer_modify_buffer(cursor->record_buffer); + + } + } + + /* + * One less link + */ + if (error == 0) { + --ip->ino_rec.ino_nlinks; + hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); + } + return(error); } /* - * Free a record. Clean the structure up even though we are throwing it - * away as a sanity check. + * Add a data record to the filesystem. + * + * This is called via the strategy code, typically when the kernel wants to + * flush a buffer cache buffer, so this operation goes directly to the disk. */ -void -hammer_free_ip_record(struct hammer_record *record) +int +hammer_ip_add_data(hammer_transaction_t trans, hammer_inode_t ip, + int64_t offset, void *data, int bytes) { - if (record->flags & HAMMER_RECF_ONRBTREE) { - RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, record); - record->flags &= ~HAMMER_RECF_ONRBTREE; - } - if (record->flags & HAMMER_RECF_ALLOCDATA) { - kfree(record->data, M_HAMMER); - record->flags &= ~HAMMER_RECF_ALLOCDATA; - } - record->data = NULL; - kfree(record, M_HAMMER); + panic("hammer_ip_add_data"); } /* - * Add the record to the inode's rec_tree. Directory entries + * Add the record to the inode's rec_tree. The low 32 bits of a directory + * entry's key is used to deal with hash collisions in the upper 32 bits. + * A unique 64 bit key is generated in-memory and may be regenerated a + * second time when the directory record is flushed to the on-disk B-Tree. */ static int -hammer_add_record(struct hammer_transaction *trans, hammer_record_t record) +hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) { while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){ - hammer_free_ip_record(record); + hammer_free_mem_record(record); return (EEXIST); } + if (++trans->hmp->namekey_iterator == 0) + ++trans->hmp->namekey_iterator; record->rec.base.base.key &= ~(0xFFFFFFFFLL); - record->rec.base.base.key |= trans->hmp->namekey_iterator++; + record->rec.base.base.key |= trans->hmp->namekey_iterator; } record->flags |= HAMMER_RECF_ONRBTREE; return(0); } -#if 0 +/************************************************************************ + * HAMMER INODE MERGED-RECORD FUNCTIONS * + ************************************************************************ + * + * These functions augment the B-Tree scanning functions in hammer_btree.c + * by merging in-memory records with on-disk records. + */ + +/* + * Locate a particular record either in-memory or on-disk. + * + * NOTE: This is basically a standalone routine, hammer_ip_next() may + * NOT be called to iterate results. + */ +int +hammer_ip_lookup(hammer_cursor_t cursor, struct hammer_inode *ip) +{ + int error; + + /* + * If the element is in-memory return it without searching the + * on-disk B-Tree + */ + error = hammer_mem_search(cursor, ip); + if (error == 0) { + cursor->record = &cursor->iprec->rec; + return(error); + } + if (error != ENOENT) + return(error); + + /* + * If the inode has on-disk components search the on-disk B-Tree. + */ + if ((ip->flags & HAMMER_INODE_ONDISK) == 0) + return(error); + error = hammer_btree_lookup(cursor); + if (error == 0) + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); + return(error); +} + +/* + * Locate the first record within the cursor's key_beg/key_end range, + * restricted to a particular inode. 0 is returned on success, ENOENT + * if no records matched the requested range, or some other error. + * + * When 0 is returned hammer_ip_next() may be used to iterate additional + * records within the requested range. + */ +int +hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) +{ + int error; + + /* + * Clean up fields and setup for merged scan + */ + cursor->flags &= ~(HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM); + cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; + if (cursor->iprec) + hammer_rel_mem_record(&cursor->iprec); + + /* + * Search the on-disk B-Tree + */ + if (ip->flags & HAMMER_INODE_ONDISK) { + error = hammer_btree_lookup(cursor); + if (error && error != ENOENT) + return(error); + if (error == 0) + cursor->flags &= ~HAMMER_CURSOR_DISKEOF; + } + + /* + * Search the in-memory record list (Red-Black tree) + */ + error = hammer_mem_search(cursor, ip); + if (error && error != ENOENT) + return(error); + if (error == 0) + cursor->flags &= ~HAMMER_CURSOR_MEMEOF; + + /* + * This will return the first matching record. + */ + return(hammer_ip_next(cursor)); +} + +/* + * Retrieve the next record in a merged iteration within the bounds of the + * cursor. This call may be made multiple times after the cursor has been + * initially searched with hammer_ip_first(). + * + * 0 is returned on success, ENOENT if no further records match the + * requested range, or some other error code is returned. + */ +int +hammer_ip_next(hammer_cursor_t cursor) +{ + hammer_btree_elm_t elm; + hammer_record_t rec; + int error; + int r; + + /* + * Load the current on-disk and in-memory record. If we ate any + * records we have to get the next one. + * + * Get the next on-disk record + */ + if (cursor->flags & HAMMER_CURSOR_ATEDISK) { + if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { + error = hammer_btree_iterate(cursor); + if (error == 0) + cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + else + cursor->flags |= HAMMER_CURSOR_DISKEOF; + } + } + + /* + * Get the next in-memory record. Records marked for defered + * deletion must be skipped. + */ + if (cursor->flags & HAMMER_CURSOR_ATEMEM) { + if ((cursor->flags & HAMMER_CURSOR_MEMEOF) == 0) { + rec = cursor->iprec; + do { + rec = hammer_rec_rb_tree_RB_NEXT(rec); + } while(rec && (rec->flags & HAMMER_RECF_DELETED)); + if (rec) { + cursor->flags &= ~HAMMER_CURSOR_ATEMEM; + hammer_ref(&rec->lock); + } else { + cursor->flags |= HAMMER_CURSOR_MEMEOF; + } + hammer_rel_mem_record(&cursor->iprec); + cursor->iprec = rec; + } + } + + /* + * Extract either the disk or memory record depending on their + * relative position. + */ + error = 0; + switch(cursor->flags & (HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF)) { + case 0: + /* + * Both entries valid + */ + elm = &cursor->node->ondisk->elms[cursor->index]; + r = hammer_btree_cmp(&elm->base, + &cursor->iprec->rec.base.base); + if (r < 0) { + error = hammer_btree_extract(cursor, + HAMMER_CURSOR_GET_RECORD); + cursor->flags |= HAMMER_CURSOR_ATEDISK; + break; + } + /* fall through to the memory entry */ + case HAMMER_CURSOR_DISKEOF: + /* + * Only the memory entry is valid + */ + cursor->record = &cursor->iprec->rec; + cursor->flags |= HAMMER_CURSOR_ATEMEM; + break; + case HAMMER_CURSOR_MEMEOF: + /* + * Only the disk entry is valid + */ + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); + cursor->flags |= HAMMER_CURSOR_ATEDISK; + break; + default: + /* + * Neither entry is valid + * + * XXX error not set properly + */ + cursor->record = NULL; + error = ENOENT; + break; + } + return(error); +} + +/* + * Resolve the cursor->data pointer for the current cursor position in + * a merged iteration. + */ +int +hammer_ip_resolve_data(hammer_cursor_t cursor) +{ + int error; + + if (cursor->iprec && cursor->record == &cursor->iprec->rec) { + cursor->data = cursor->iprec->data; + error = 0; + } else { + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA); + } + return(error); +} + /* - * Delete records belonging to the specified range. Deal with edge and - * overlap cases. This function sets the delete tid and breaks adds - * up to two records to deal with edge cases, leaving the range as a gap. - * The caller will then add records as appropriate. + * Delete all records within the specified range for inode ip. + * + * NOTE: An unaligned range will cause new records to be added to cover + * the edge cases. + * + * NOTE: ran_end is inclusive (e.g. 0,1023 instead of 0,1024). */ int -hammer_delete_records(struct hammer_transaction *trans, - struct hammer_inode *ip, - hammer_base_elm_t ran_beg, hammer_base_elm_t ran_end) +hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, + int64_t ran_beg, int64_t ran_end) { + struct hammer_cursor cursor; + hammer_record_ondisk_t rec; + hammer_base_elm_t base; + int error; + int64_t off; + + hammer_init_cursor_ip(&cursor, ip); + + cursor.key_beg.obj_id = ip->obj_id; + cursor.key_beg.create_tid = ip->obj_asof; + cursor.key_beg.delete_tid = 0; + cursor.key_beg.obj_type = 0; + cursor.key_beg.key = ran_beg; + cursor.key_end = cursor.key_beg; + if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { + cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; + cursor.key_end.rec_type = HAMMER_RECTYPE_DB; + cursor.key_end.key = ran_end; + } else { + cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; + cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; + if (ran_end + MAXPHYS < ran_end) + cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; + else + cursor.key_end.key = ran_end + MAXPHYS; + } + + error = hammer_ip_first(&cursor, ip); + + /* + * Iterate through matching records and mark them as deleted. + */ + while (error == 0) { + rec = cursor.record; + base = &rec->base.base; + + KKASSERT(base->delete_tid == 0); + + /* + * There may be overlap cases for regular file data. Also + * remember the key for a regular file record is the offset + * of the last byte of the record (base + len - 1), NOT the + * base offset. + */ + if (base->rec_type == HAMMER_RECTYPE_DATA) { + off = base->key - rec->base.data_len + 1; + /* + * Check the left edge case + */ + if (off < ran_beg) { + panic("hammer left edge case\n"); + } + + /* + * Check the right edge case. Note that the + * record can be completely out of bounds, which + * terminates the search. + * + * base->key is (base_offset + bytes - 1), ran_end + * works the same way. + */ + if (base->key > ran_end) { + if (base->key - rec->base.data_len + 1 > ran_end) { + kprintf("right edge OOB\n"); + break; + } + panic("hammer right edge case\n"); + } + } + + /* + * Mark the record and B-Tree entry as deleted + */ + if (cursor.record == &cursor.iprec->rec) { + hammer_free_mem_record(cursor.iprec); + + } else { + cursor.node->ondisk->elms[cursor.index].base.delete_tid = trans->tid; + cursor.record->base.base.delete_tid = trans->tid; + hammer_modify_node(cursor.node); + hammer_modify_buffer(cursor.record_buffer); + } + error = hammer_ip_next(&cursor); + } + hammer_done_cursor(&cursor); + if (error == ENOENT) + error = 0; + return(error); } -#endif diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 740cde76b1..66a915a8c0 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.4 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -245,7 +245,7 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname) goto late_failure; } volume->vol_no = ondisk->vol_no; - volume->cluster_base = ondisk->vol_beg; + volume->cluster_base = ondisk->vol_clo_beg; volume->vol_clsize = ondisk->vol_clsize; volume->vol_flags = ondisk->vol_flags; RB_INIT(&volume->rb_clus_root); @@ -309,11 +309,11 @@ hammer_unload_volume(hammer_volume_t volume, void *data __unused) * Sync clusters, sync volume */ + /* * Clean up the root cluster, which is held unlocked in the root * volume. */ - hammer_ref(&volume->io.lock); if (hmp->rootvol == volume) { if ((rootcl = hmp->rootcl) != NULL) hmp->rootcl = NULL; @@ -321,10 +321,25 @@ hammer_unload_volume(hammer_volume_t volume, void *data __unused) } /* - * Flush the volume + * Unload clusters and super-clusters. Unloading a super-cluster + * also unloads related clusters, but the filesystem may not be + * using super-clusters so unload clusters anyway. + */ + RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL, + hammer_unload_cluster, NULL); + RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL, + hammer_unload_supercl, NULL); + + /* + * Release our buffer and flush anything left in the buffer cache. */ - KKASSERT(volume->io.lock.refs == 1); hammer_io_release(&volume->io, 1); + + /* + * There should be no references on the volume. + */ + KKASSERT(volume->io.lock.refs == 0); + volume->ondisk = NULL; if (volume->devvp) { if (ronly) { @@ -578,6 +593,19 @@ hammer_load_supercl(hammer_supercl_t supercl, int isnew) return (error); } +/* + * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue. + */ +int +hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused) +{ + KKASSERT(supercl->io.lock.refs == 0); + hammer_ref(&supercl->io.lock); + hammer_io_release(&supercl->io, 1); + hammer_rel_supercl(supercl, 1); + return(0); +} + /* * Release a super-cluster. We have to deal with several places where * another thread can ref the super-cluster. @@ -755,6 +783,21 @@ hammer_load_cluster(hammer_cluster_t cluster, int isnew) return (error); } +/* + * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue. + */ +int +hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused) +{ + hammer_ref(&cluster->io.lock); + RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL, + hammer_unload_buffer, NULL); + KKASSERT(cluster->io.lock.refs == 1); + hammer_io_release(&cluster->io, 1); + hammer_rel_cluster(cluster, 1); + return(0); +} + /* * Reference a cluster that is either already referenced or via a specially * handled pointer (aka rootcl). @@ -940,6 +983,20 @@ hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type) return (error); } +/* + * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue. + */ +int +hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused) +{ + hammer_ref(&buffer->io.lock); + hammer_flush_buffer_nodes(buffer); + hammer_io_release(&buffer->io, 1); + KKASSERT(buffer->io.lock.refs == 1); + hammer_rel_buffer(buffer, 1); + return(0); +} + /* * Reference a buffer that is either already referenced or via a specially * handled pointer (aka cursor->buffer). @@ -1117,6 +1174,7 @@ hammer_ref_node(hammer_node_t node) int error; hammer_ref(&node->lock); + error = 0; if (node->ondisk == NULL) { hammer_lock_ex(&node->lock); if (node->ondisk == NULL) { @@ -1235,7 +1293,6 @@ hammer_cache_node(hammer_node_t node, struct hammer_node **cache) *cache = node; } } - hammer_rel_node(node); } void diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index b9f8e812a1..5c0e52d87e 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.3 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.4 2007/11/20 07:16:28 dillon Exp $ */ /* * HAMMER structural locking @@ -148,6 +148,7 @@ void hammer_unref(struct hammer_lock *lock) { crit_enter(); + KKASSERT(lock->refs > 0); --lock->refs; crit_exit(); } diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index 3879b0baef..ce83a588a1 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.2 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.3 2007/11/20 07:16:28 dillon Exp $ */ #include "hammer.h" @@ -40,25 +40,55 @@ void hammer_start_transaction(struct hammer_transaction *trans, struct hammer_mount *hmp) { - struct timespec ts; + int error; - getnanotime(&ts); trans->hmp = hmp; - trans->tid = ts.tv_sec * 1000000000LL + ts.tv_nsec; - if (trans->tid < hmp->last_tid) - trans->tid = hmp->last_tid; - hmp->last_tid = trans->tid + 1; + trans->rootvol = hammer_get_root_volume(hmp, &error); + KKASSERT(error == 0); + trans->tid = hammer_alloc_tid(trans); } void hammer_abort_transaction(struct hammer_transaction *trans) { + hammer_rel_volume(trans->rootvol, 0); KKASSERT(0); } void hammer_commit_transaction(struct hammer_transaction *trans) { - KKASSERT(0); + hammer_rel_volume(trans->rootvol, 0); } +hammer_tid_t +hammer_alloc_tid(hammer_transaction_t trans) +{ + hammer_volume_ondisk_t ondisk; + struct timespec ts; + hammer_tid_t tid; + + getnanotime(&ts); + tid = ts.tv_sec * 1000000000LL + ts.tv_nsec; + ondisk = trans->rootvol->ondisk; + if (tid < ondisk->vol0_nexttid) + tid = ondisk->vol0_nexttid; + if (tid == 0xFFFFFFFFFFFFFFFFULL) + panic("hammer_start_transaction: Ran out of TIDs!"); + ondisk->vol0_nexttid = tid + 1; + hammer_modify_volume(trans->rootvol); + + return(tid); +} + +hammer_tid_t +hammer_alloc_recid(hammer_transaction_t trans) +{ + hammer_volume_ondisk_t ondisk; + hammer_tid_t recid; + + ondisk = trans->rootvol->ondisk; + recid = ++ondisk->vol0_recid; + hammer_modify_volume(trans->rootvol); + return(recid); +} diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 41f2e9fcbb..f39c6279e5 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.4 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.5 2007/11/20 07:16:28 dillon Exp $ */ #include @@ -87,7 +87,8 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, struct ucred *cred) { struct hammer_mount_info info; - struct hammer_mount *hmp; + hammer_mount_t hmp; + hammer_volume_t rootvol; struct vnode *rootvp; const char *upath; /* volume name in userspace */ char *path; /* volume name in system space */ @@ -106,6 +107,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, mp->mnt_data = (qaddr_t)hmp; hmp->mp = mp; hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK | M_ZERO); + hmp->namekey_iterator = mycpu->gd_time_seconds; RB_INIT(&hmp->rb_vols_root); RB_INIT(&hmp->rb_inos_root); @@ -154,6 +156,18 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops); + /* + * The root volume's ondisk pointer is only valid if we hold a + * reference to it. + */ + rootvol = hammer_get_root_volume(hmp, &error); + if (error) + goto done; + ksnprintf(mp->mnt_stat.f_mntfromname, + sizeof(mp->mnt_stat.f_mntfromname), "%s", + rootvol->ondisk->vol_name); + hammer_rel_volume(rootvol, 0); + /* * Locate the root directory using the root cluster's B-Tree as a * starting point. The root directory uses an obj_id of 1. @@ -162,10 +176,12 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, * in hmp->rootvp (need to flush it on unmount). */ error = hammer_vfs_vget(mp, 1, &rootvp); - if (error == 0) - vput(rootvp); + if (error) + goto done; + vput(rootvp); /*vn_unlock(hmp->rootvp);*/ +done: /* * Cleanup and return. */ diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 21a048933e..011948c424 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.3 2007/11/19 00:53:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.4 2007/11/20 07:16:28 dillon Exp $ */ #include @@ -313,7 +313,7 @@ static int hammer_vop_close(struct vop_close_args *ap) { - return EOPNOTSUPP; + return (vop_stdclose(ap)); } /* @@ -356,19 +356,23 @@ hammer_vop_ncreate(struct vop_ncreate_args *ap) * Add the new filesystem object to the directory. This will also * bump the inode's link count. */ - error = hammer_add_directory(&trans, dip, nch->ncp, nip); + error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); /* * Finish up. */ if (error) { - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); hammer_abort_transaction(&trans); *ap->a_vpp = NULL; } else { hammer_commit_transaction(&trans); error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); + if (error == 0) { + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *ap->a_vpp); + } } return (error); } @@ -448,6 +452,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) dip = VTOI(ap->a_dvp); ncp = ap->a_nch->ncp; namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen); + kprintf("hammer_vop_nresolve %s dip %p\n", ncp->nc_name, dip); hammer_init_cursor_ip(&cursor, dip); cursor.key_beg.obj_id = dip->obj_id; @@ -462,24 +467,23 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) /* * Scan all matching records (the chain), locate the one matching - * the requested path component. info->last_error contains the - * error code on search termination and could be 0, ENOENT, or - * something else. + * the requested path component. * * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - rec = hammer_ip_first(&cursor, dip); - while (rec) { - if (hammer_ip_resolve_data(&cursor) != 0) /* sets last_error */ + error = hammer_ip_first(&cursor, dip); + while (error == 0) { + error = hammer_ip_resolve_data(&cursor); + if (error) break; + rec = cursor.record; if (ncp->nc_nlen == rec->entry.base.data_len && - bcmp(ncp->nc_name, (void *)cursor.data, ncp->nc_nlen) == 0) { + bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) { break; } - rec = hammer_ip_next(&cursor); + error = hammer_ip_next(&cursor); } - error = cursor.last_error; if (error == 0) { error = hammer_vfs_vget(dip->hmp->mp, rec->entry.obj_id, &vp); if (error == 0) { @@ -491,6 +495,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) cache_setvp(ap->a_nch, NULL); } hammer_done_cursor(&cursor); + kprintf("hammer_vop_nresolve error %d\n", error); return (error); } @@ -546,7 +551,7 @@ hammer_vop_nlink(struct vop_nlink_args *ap) * dip nor ip are referenced or locked, but their vnodes are * referenced. This function will bump the inode's link count. */ - error = hammer_add_directory(&trans, dip, nch->ncp, ip); + error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip); /* * Finish up. @@ -598,19 +603,23 @@ hammer_vop_nmkdir(struct vop_nmkdir_args *ap) * Add the new filesystem object to the directory. This will also * bump the inode's link count. */ - error = hammer_add_directory(&trans, dip, nch->ncp, nip); + error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); /* * Finish up. */ if (error) { - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); hammer_abort_transaction(&trans); *ap->a_vpp = NULL; } else { hammer_commit_transaction(&trans); error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); + if (error == 0) { + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *ap->a_vpp); + } } return (error); } @@ -654,19 +663,23 @@ hammer_vop_nmknod(struct vop_nmknod_args *ap) * Add the new filesystem object to the directory. This will also * bump the inode's link count. */ - error = hammer_add_directory(&trans, dip, nch->ncp, nip); + error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip); /* * Finish up. */ if (error) { - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); hammer_abort_transaction(&trans); *ap->a_vpp = NULL; } else { hammer_commit_transaction(&trans); error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp); - hammer_rel_inode(nip); + hammer_rel_inode(nip, 0); + if (error == 0) { + cache_setunresolved(ap->a_nch); + cache_setvp(ap->a_nch, *ap->a_vpp); + } } return (error); } @@ -678,7 +691,8 @@ static int hammer_vop_open(struct vop_open_args *ap) { - return EOPNOTSUPP; + kprintf("hammer_vop_open\n"); + return(vop_stdopen(ap)); } /* @@ -708,6 +722,7 @@ static int hammer_vop_readdir(struct vop_readdir_args *ap) { + kprintf("hammer_vop_readdir\n"); return EOPNOTSUPP; } @@ -762,7 +777,7 @@ hammer_vop_nrename(struct vop_nrename_args *ap) ip = VTOI(fncp->nc_vp); KKASSERT(ip != NULL); - error = hammer_add_directory(&trans, tdip, tncp, ip); + error = hammer_ip_add_directory(&trans, tdip, tncp, ip); /* * Locate the record in the originating directory and remove it. @@ -788,31 +803,29 @@ hammer_vop_nrename(struct vop_nrename_args *ap) /* * Scan all matching records (the chain), locate the one matching - * the requested path component. info->last_error contains the - * error code on search termination and could be 0, ENOENT, or - * something else. + * the requested path component. * * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - rec = hammer_ip_first(&cursor, fdip); - while (rec) { + error = hammer_ip_first(&cursor, fdip); + while (error == 0) { if (hammer_ip_resolve_data(&cursor) != 0) break; + rec = cursor.record; if (fncp->nc_nlen == rec->entry.base.data_len && bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) { break; } - rec = hammer_ip_next(&cursor); + error = hammer_ip_next(&cursor); } - error = cursor.last_error; /* * If all is ok we have to get the inode so we can adjust nlinks. */ if (error) goto done; - error = hammer_del_directory(&trans, &cursor, fdip, ip); + error = hammer_ip_del_directory(&trans, &cursor, fdip, ip); if (error == 0) { cache_rename(ap->a_fnch, ap->a_tnch); cache_setvp(ap->a_tnch, ip->vp); @@ -903,7 +916,7 @@ hammer_vop_setattr(struct vop_setattr_args *ap) switch(ap->a_vp->v_type) { case VREG: case VDATABASE: - error = hammer_delete_range(&trans, ip, + error = hammer_ip_delete_range(&trans, ip, vap->va_size, 0x7FFFFFFFFFFFFFFFLL); break; @@ -1016,6 +1029,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) struct bio *bio; struct buf *bp; int64_t rec_offset; + int64_t ran_end; int error; int boff; int roff; @@ -1033,28 +1047,43 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) cursor.key_beg.obj_id = ip->obj_id; cursor.key_beg.create_tid = ip->obj_asof; cursor.key_beg.delete_tid = 0; - cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; cursor.key_beg.obj_type = 0; cursor.key_beg.key = bio->bio_offset; cursor.key_end = cursor.key_beg; - cursor.key_end.key = bio->bio_offset + bp->b_bufsize - 1; + if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { + cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; + cursor.key_end.rec_type = HAMMER_RECTYPE_DB; + cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; + } else { + ran_end = bio->bio_offset + bp->b_bufsize - 1; + cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; + cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; + if (ran_end + MAXPHYS < ran_end) + cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; + else + cursor.key_end.key = ran_end + MAXPHYS; + } - rec = hammer_ip_first(&cursor, ip); + error = hammer_ip_first(&cursor, ip); boff = 0; - while (rec) { - if (hammer_ip_resolve_data(&cursor) != 0) + while (error == 0) { + error = hammer_ip_resolve_data(&cursor); + if (error) break; + rec = cursor.record; base = &rec->base.base; - rec_offset = base->key - rec->data.base.data_len; + rec_offset = base->key - rec->data.base.data_len + 1; /* - * Zero-fill any gap + * Calculate the gap, if any, and zero-fill it. */ n = (int)(rec_offset - (bio->bio_offset + boff)); if (n > 0) { + if (n > bp->b_bufsize - boff) + n = bp->b_bufsize - boff; kprintf("zfill %d bytes\n", n); bzero((char *)bp->b_data + boff, n); boff += n; @@ -1064,6 +1093,9 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) /* * Calculate the data offset in the record and the number * of bytes we can copy. + * + * Note there is a degenerate case here where boff may + * already be at bp->b_bufsize. */ roff = -n; n = rec->data.base.data_len - roff; @@ -1074,14 +1106,13 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) boff += n; if (boff == bp->b_bufsize) break; - rec = hammer_ip_next(&cursor); + error = hammer_ip_next(&cursor); } hammer_done_cursor(&cursor); /* * There may have been a gap after the last record */ - error = cursor.last_error; if (error == ENOENT) error = 0; if (error == 0 && boff != bp->b_bufsize) { @@ -1121,15 +1152,20 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap) * Delete any records overlapping our range. This function will * properly */ - error = hammer_delete_range(&trans, ip, bio->bio_offset, - bio->bio_offset + bp->b_bufsize - 1); + if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { + error = hammer_ip_delete_range(&trans, ip, bio->bio_offset, + bio->bio_offset); + } else { + error = hammer_ip_delete_range(&trans, ip, bio->bio_offset, + bio->bio_offset + bp->b_bufsize - 1); + } /* * Add a single record to cover the write */ if (error == 0) { - error = hammer_add_data(&trans, ip, bio->bio_offset, - bp->b_data, bp->b_bufsize); + error = hammer_ip_add_data(&trans, ip, bio->bio_offset, + bp->b_data, bp->b_bufsize); } /* @@ -1198,17 +1234,18 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred, * The hammer_ip_*() functions merge in-memory records with on-disk * records for the purposes of the search. */ - rec = hammer_ip_first(&cursor, dip); - while (rec) { - if (hammer_ip_resolve_data(&cursor) != 0) + error = hammer_ip_first(&cursor, dip); + while (error == 0) { + error = hammer_ip_resolve_data(&cursor); + if (error) break; + rec = cursor.record; if (ncp->nc_nlen == rec->entry.base.data_len && bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) { break; } - rec = hammer_ip_next(&cursor); + error = hammer_ip_next(&cursor); } - error = cursor.last_error; /* * If all is ok we have to get the inode so we can adjust nlinks. @@ -1216,7 +1253,7 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred, if (error == 0) { ip = hammer_get_inode(dip->hmp, rec->entry.obj_id, &error); if (error == 0) - error = hammer_del_directory(&trans, &cursor, dip, ip); + error = hammer_ip_del_directory(&trans, &cursor, dip, ip); if (error == 0) { cache_setunresolved(nch); cache_setvp(nch, NULL); @@ -1224,7 +1261,7 @@ hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred, if (ip->vp) cache_inval_vp(ip->vp, CINV_DESTROY); } - hammer_rel_inode(ip); + hammer_rel_inode(ip, 0); error = hammer_vfs_vget(dip->hmp->mp, rec->entry.obj_id, &vp); if (error == 0) { -- 2.41.0