From 195c19a1d7454923528d161b86af2537c9535dcd Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 30 Nov 2007 00:16:56 +0000 Subject: [PATCH] HAMMER 9/many - btree removal cases, mount nohistory Add a 'nohistory' mount option that will cause HAMMER to not retain any history. This option is primarily for testing of btree removal and hinted radix tree bitmap frees and reallocations. Flesh out the btree node removal code. We don't try to rebalance the tree yet but we do attempt to remove empty nodes. Add workarounds for a GCC-4 bug involving overflow tests on integers. --- sbin/mount_hammer/mount_hammer.8 | 11 +- sbin/mount_hammer/mount_hammer.c | 92 +++++++++++++- sys/vfs/hammer/hammer.h | 12 +- sys/vfs/hammer/hammer_btree.c | 212 +++++++++++++++++++------------ sys/vfs/hammer/hammer_cursor.c | 90 ++++++++++++- sys/vfs/hammer/hammer_cursor.h | 3 +- sys/vfs/hammer/hammer_disk.h | 4 +- sys/vfs/hammer/hammer_inode.c | 17 ++- sys/vfs/hammer/hammer_mount.h | 8 +- sys/vfs/hammer/hammer_object.c | 160 ++++++++++++++++------- sys/vfs/hammer/hammer_ondisk.c | 20 +-- sys/vfs/hammer/hammer_vfsops.c | 34 ++++- sys/vfs/hammer/hammer_vnops.c | 6 +- 13 files changed, 488 insertions(+), 181 deletions(-) diff --git a/sbin/mount_hammer/mount_hammer.8 b/sbin/mount_hammer/mount_hammer.8 index f3e245b201..60077960d8 100644 --- a/sbin/mount_hammer/mount_hammer.8 +++ b/sbin/mount_hammer/mount_hammer.8 @@ -30,7 +30,7 @@ .\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" $DragonFly: src/sbin/mount_hammer/mount_hammer.8,v 1.1 2007/10/10 19:35:19 dillon Exp $ +.\" $DragonFly: src/sbin/mount_hammer/mount_hammer.8,v 1.2 2007/11/30 00:16:54 dillon Exp $ .Dd October 10, 2007 .Dt MOUNT_HAMMER 8 .Os @@ -53,7 +53,14 @@ The options are as follow: .Bl -tag -width indent .It Fl T Ar time Mount the filesystem as-of a particular time. The mount will automatically -be made read-only. +be made read-only. The time may be specified two ways. A single integer +followed by 's', 'm', 'h', 'D', 'M', or 'Y' issues a historical mount as-of +a number of seconds, minutes, hours, days, months, or years ago. A time +specification of yyyymmdd[:hhmmss][.fractional] specifies an exact as-of +timestamp in local (not GM) time. +Set the TZ environment variable prior to running +.Nm +if you wish to specify the time by some other means. .It Fl o Ar mount-options Specify mount options, see .Xr mount 8 . diff --git a/sbin/mount_hammer/mount_hammer.c b/sbin/mount_hammer/mount_hammer.c index f90980bd0a..571e82f1b1 100644 --- a/sbin/mount_hammer/mount_hammer.c +++ b/sbin/mount_hammer/mount_hammer.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sbin/mount_hammer/mount_hammer.c,v 1.1 2007/10/10 19:35:19 dillon Exp $ + * $DragonFly: src/sbin/mount_hammer/mount_hammer.c,v 1.2 2007/11/30 00:16:54 dillon Exp $ */ #include @@ -52,10 +52,16 @@ #include #include #include +#include #include "mntopts.h" -static struct mntopt mopts[] = { MOPT_STDOPTS, MOPT_NULL }; +static void hammer_parsetime(u_int64_t *tidp, const char *timestr); + +#define MOPT_HAMMEROPTS \ + { "history", 1, HMNT_NOHISTORY, 1 } + +static struct mntopt mopts[] = { MOPT_STDOPTS, MOPT_HAMMEROPTS, MOPT_NULL }; static void usage(void); @@ -69,15 +75,18 @@ main(int ac, char **av) int ch; char *mountpt; + bzero(&info, sizeof(info)); + info.asof = 0; mount_flags = 0; + info.hflags = 0; + while ((ch = getopt(ac, av, "o:T:")) != -1) { switch(ch) { case 'T': - /* parse time */ - errx(1, "-T option not currently supported"); + hammer_parsetime(&info.asof, optarg); break; case 'o': - getmntopts(optarg, mopts, &mount_flags, 0); + getmntopts(optarg, mopts, &mount_flags, &info.hflags); break; default: usage(); @@ -118,10 +127,79 @@ main(int ac, char **av) exit(0); } +/* + * Parse a timestamp for the mount point + * + * yyyymmddhhmmss + * -N[s/h/d/m/y] + */ +static +void +hammer_parsetime(u_int64_t *tidp, const char *timestr) +{ + struct tm tm; + time_t t; + int32_t n; + char c; + double seconds = 0; + + t = time(NULL); + + if (*timestr == 0) + usage(); + + if (isalpha(timestr[strlen(timestr)-1])) { + if (sscanf(timestr, "%d%c", &n, &c) != 2) + usage(); + switch(c) { + case 'Y': + n *= 365; + goto days; + case 'M': + n *= 30; + /* fall through */ + case 'D': + days: + n *= 24; + /* fall through */ + case 'h': + n *= 60; + /* fall through */ + case 'm': + n *= 60; + /* fall through */ + case 's': + t -= n; + break; + default: + usage(); + } + } else { + localtime_r(&t, &tm); + seconds = (double)tm.tm_sec; + tm.tm_year -= 1900; + tm.tm_mon -= 1; + n = sscanf(timestr, "%4d%2d%2d:%2d%2d%lf", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &seconds); + tm.tm_mon += 1; + tm.tm_year += 19000; + tm.tm_sec = (int)seconds; + t = mktime(&tm); + } + localtime_r(&t, &tm); + printf("mount_hammer as-of %s", asctime(&tm)); + *tidp = (u_int64_t)t * 1000000000 + + (seconds - (int)seconds) * 1000000000; +} + static void usage(void) { - errx(1, "mount_hammer [-T time] [-o options] " - "volume [volume...] mount_pt"); + fprintf(stderr, "mount_hammer [-T time] [-o options] " + "volume [volume...] mount_pt"); + fprintf(stderr, " time: +n[s/m/h/D/M/Y]\n" + " time: yyyymmdd[:hhmmss]\n"); + exit(1); } diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 0bab2f9ba7..24bd0c3911 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.11 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.12 2007/11/30 00:16:56 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -375,6 +375,9 @@ struct hammer_mount { struct hammer_volume *rootvol; struct hammer_cluster *rootcl; char *zbuf; /* HAMMER_BUFSIZE bytes worth of all-zeros */ + int hflags; + int ronly; + int nvolumes; uuid_t fsid; udev_t fsid_udev; hammer_tid_t asof; @@ -418,12 +421,14 @@ int hammer_ip_lookup(hammer_cursor_t cursor, hammer_inode_t ip); int hammer_ip_first(hammer_cursor_t cursor, hammer_inode_t ip); int hammer_ip_next(hammer_cursor_t cursor); int hammer_ip_resolve_data(hammer_cursor_t cursor); +int hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid); + hammer_record_t hammer_alloc_mem_record(hammer_inode_t ip); void hammer_rel_mem_record(struct hammer_record **recordp); void hammer_free_mem_record(hammer_record_t record); -int hammer_cursor_up(hammer_cursor_t cursor); +int hammer_cursor_up(hammer_cursor_t cursor, int nonblock); int hammer_cursor_toroot(hammer_cursor_t cursor); int hammer_cursor_down(hammer_cursor_t cursor); @@ -450,6 +455,7 @@ int64_t hammer_directory_namekey(void *name, int len); int hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp); int hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip); + void hammer_done_cursor(hammer_cursor_t cursor); void hammer_mem_done(hammer_cursor_t cursor); @@ -507,8 +513,6 @@ void *hammer_alloc_data(struct hammer_cluster *cluster, int32_t bytes, int *errorp, struct hammer_buffer **bufferp); void *hammer_alloc_record(struct hammer_cluster *cluster, int *errorp, struct hammer_buffer **bufferp); -void hammer_free_btree_ptr(struct hammer_buffer *buffer, - hammer_node_ondisk_t node); void hammer_free_data_ptr(struct hammer_buffer *buffer, void *data, int bytes); void hammer_free_record_ptr(struct hammer_buffer *buffer, diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 7e5f6098e2..ae5b2d34ba 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.8 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.9 2007/11/30 00:16:56 dillon Exp $ */ /* @@ -91,7 +91,7 @@ static int btree_search(hammer_cursor_t cursor, int flags); static int btree_split_internal(hammer_cursor_t cursor); static int btree_split_leaf(hammer_cursor_t cursor); -static int btree_remove(hammer_node_t node, int index); +static int btree_remove(hammer_cursor_t cursor); static int btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm); #if 0 static int btree_rebalance(hammer_cursor_t cursor); @@ -155,9 +155,12 @@ hammer_btree_iterate(hammer_cursor_t cursor) * * XXX this could be optimized by storing the information in * the parent reference. + * + * XXX we can lose the node lock temporarily, this could mess + * up our scan. */ if (cursor->index == node->count) { - error = hammer_cursor_up(cursor); + error = hammer_cursor_up(cursor, 0); if (error) break; node = cursor->node->ondisk; @@ -430,14 +433,14 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) * The cursor is positioned such that the current element is the one * to be deleted. * - * The caller must call hammer_btree_lookup() with the HAMMER_CURSOR_DELETE - * flag set and that call must return 0 before this function can be - * called. + * On return the cursor will be positioned after the deleted element and + * MAY point to an internal node. It will be suitable for the continuation + * of an iteration but not for an insertion or deletion. * - * It is possible that we will be asked to delete the last element in a - * leaf. This case only occurs if the downward search was unable to - * rebalance us, which in turn can occur if our parent has inter-cluster - * elements. So the 0-element case for a leaf is allowed. + * Deletions will attempt to partially rebalance the B-Tree in an upward + * direction. It is possible to end up with empty leafs. An empty internal + * node is impossible (worst case: it has one element pointing to an empty + * leaf). */ int hammer_btree_delete(hammer_cursor_t cursor) @@ -475,6 +478,7 @@ hammer_btree_delete(hammer_cursor_t cursor) (ondisk->count - i - 1) * sizeof(ondisk->elms[0])); } --ondisk->count; + hammer_modify_node(node); if (cursor->parent != NULL) { /* * Adjust parent's notion of the leaf's count. subtree_count @@ -491,26 +495,20 @@ hammer_btree_delete(hammer_cursor_t cursor) } /* - * If the leaf is empty try to remove the subtree reference - * in at (parent, parent_index). This will unbalance the - * tree. + * It is possible, but not desireable, to stop here. If the element + * count drops to 0 (which is allowed for a leaf), try recursively + * remove the B-Tree node. * - * Note that internal nodes must have at least one element - * so their boundary information is properly laid out. If - * we would cause our parent to become empty we try to - * recurse up the tree, but if that doesn't work we just - * leave the tree with an empty leaf. + * XXX rebalancing calls would go here too. + * + * This may reposition the cursor at one of the parent's of the + * current node. */ if (ondisk->count == 0) { - error = btree_remove(cursor->parent, cursor->parent_index); - if (error == 0) { - hammer_free_btree(node->cluster, node->node_offset); - } else if (error == EAGAIN) { - hammer_modify_node(node); + error = btree_remove(cursor); + if (error == EAGAIN) error = 0; - } /* else a real error occured XXX */ } else { - hammer_modify_node(node); error = 0; } return(error); @@ -566,7 +564,7 @@ btree_search(hammer_cursor_t cursor, int flags) error = hammer_cursor_toroot(cursor); if (error) goto done; - error = hammer_cursor_up(cursor); + error = hammer_cursor_up(cursor, 0); if (error) goto done; cluster = cursor->node->cluster; @@ -578,7 +576,7 @@ btree_search(hammer_cursor_t cursor, int flags) */ while (hammer_btree_cmp(&cursor->key_beg, cursor->left_bound) < 0 || hammer_btree_cmp(&cursor->key_beg, cursor->right_bound) >= 0) { - error = hammer_cursor_up(cursor); + error = hammer_cursor_up(cursor, 0); if (error) goto done; } @@ -607,7 +605,7 @@ btree_search(hammer_cursor_t cursor, int flags) break; if (cursor->parent->ondisk->count != HAMMER_BTREE_INT_ELMS) break; - error = hammer_cursor_up(cursor); + error = hammer_cursor_up(cursor, 0); /* cluster and node are now may become stale */ if (error) goto done; @@ -635,7 +633,7 @@ btree_search(hammer_cursor_t cursor, int flags) if (cursor->parent == NULL) break; KKASSERT(cursor->node->ondisk->count != 0); - error = hammer_cursor_up(cursor); + error = hammer_cursor_up(cursor, 0); /* cluster and node are now may become stale */ if (error) goto done; @@ -886,6 +884,7 @@ btree_split_internal(hammer_cursor_t cursor) made_root = 0; parent = cursor->parent; parent_index = cursor->parent_index; + KKASSERT(parent->cluster == node->cluster); } /* @@ -905,7 +904,7 @@ btree_split_internal(hammer_cursor_t cursor) if (new_node == NULL) { if (made_root) { hammer_unlock(&parent->lock); - hammer_free_btree(node->cluster, parent->node_offset); + hammer_free_btree(parent->cluster, parent->node_offset); hammer_rel_node(parent); } return(error); @@ -1072,6 +1071,7 @@ btree_split_leaf(hammer_cursor_t cursor) made_root = 0; parent = cursor->parent; parent_index = cursor->parent_index; + KKASSERT(parent->cluster == leaf->cluster); } /* @@ -1090,7 +1090,7 @@ btree_split_leaf(hammer_cursor_t cursor) if (new_leaf == NULL) { if (made_root) { hammer_unlock(&parent->lock); - hammer_free_btree(leaf->cluster, parent->node_offset); + hammer_free_btree(parent->cluster, parent->node_offset); hammer_rel_node(parent); } return(error); @@ -1188,79 +1188,131 @@ btree_split_leaf(hammer_cursor_t cursor) } /* - * Remove the element at (node, index). If the internal node would become - * empty passively recurse up the tree. + * Attempt to remove the empty B-Tree node at (cursor->node). Returns 0 + * on success, EAGAIN if we could not acquire the necessary locks, or some + * other error. * - * A locked internal node is passed to this function, the node remains - * locked on return. Leaf nodes cannot be passed to this function. + * On return the cursor may end up pointing at an internal node, suitable + * for further iteration but not for insertion or deletion. * - * Returns EAGAIN if we were unable to acquire the needed locks. The caller - * does not deal with the empty leaf until determines whether this recursion - * has succeeded or not. + * cursor->node may be an internal node or a leaf node. */ int -btree_remove(hammer_node_t node, int index) +btree_remove(hammer_cursor_t cursor) { hammer_node_ondisk_t ondisk; + hammer_btree_elm_t elm; + hammer_node_t save; + hammer_node_t node; hammer_node_t parent; int error; - - ondisk = node->ondisk; - KKASSERT(ondisk->count > 0); + int i; /* - * Remove the element, shifting remaining elements left one. - * Note that our move must include the right-boundary element. + * If we are at the root of the root cluster there is nothing to + * remove, but an internal node at the root of a cluster is not + * allowed to be empty so convert it to a leaf node. */ - if (ondisk->count != 1) { - bcopy(&ondisk->elms[index+1], &ondisk->elms[index], - (ondisk->count - index) * sizeof(ondisk->elms[0])); - --ondisk->count; - hammer_modify_node(node); + if (cursor->parent == NULL) { + ondisk = cursor->node->ondisk; + KKASSERT(ondisk->parent == 0); + ondisk->type = HAMMER_BTREE_TYPE_LEAF; + ondisk->count = 0; + hammer_modify_node(cursor->node); + kprintf("EMPTY ROOT OF ROOT CLUSTER -> LEAF\n"); return(0); } /* - * Internal nodes cannot drop to 0 elements, so remove the node - * from ITS parent. If the node is the root node, convert it to - * an empty leaf node (which can drop to 0 elements). + * Retain a reference to cursor->node, ex-lock again (2 locks now) + * so we do not lose the lock when we cursor around. */ - if (ondisk->parent == 0) { - ondisk->count = 0; - ondisk->type = HAMMER_BTREE_TYPE_LEAF; - hammer_modify_node(node); - return(0); - } + save = cursor->node; + hammer_ref_node(save); + hammer_lock_ex(&save->lock); /* - * Try to remove the node from its parent. Return EAGAIN if we - * cannot. + * We need to be able to lock the parent of the parent. Do this + * non-blocking and return EAGAIN if the lock cannot be acquired. + * non-blocking is required in order to avoid a deadlock. + * + * After we cursor up, parent is moved to node and the new parent + * is the parent of the parent. */ - parent = hammer_get_node(node->cluster, ondisk->parent, &error); - if (hammer_lock_ex_try(&parent->lock)) { - hammer_rel_node(parent); - return(EAGAIN); + error = hammer_cursor_up(cursor, 1); + if (error) { + kprintf("BTREE_REMOVE: Cannot lock parent\n"); + hammer_unlock(&save->lock); + hammer_rel_node(save); + return(error); } - ondisk = parent->ondisk; - for (index = 0; index < ondisk->count; ++index) { - if (ondisk->elms[index].internal.subtree_offset == - node->node_offset) { - break; + + /* + * At this point we want to remove the element at (node, index), + * which is now the (original) parent pointing to the saved node. + * Removing the element allows us to then free the node it was + * pointing to. + * + * However, an internal node is not allowed to have 0 elements, so + * if the count would drop to 0 we have to recurse. It is possible + * for the recursion to fail. + * + * NOTE: The cursor is in an indeterminant position after recursing, + * but will still be suitable for an iteration. + */ + node = cursor->node; + KKASSERT(node->ondisk->count > 0); + if (node->ondisk->count == 1) { + error = btree_remove(cursor); + if (error == 0) { + kprintf("BTREE_REMOVE: Successful!\n"); + hammer_flush_node(save); + hammer_free_btree(save->cluster, save->node_offset); + } else { + kprintf("BTREE_REMOVE: Recursion failed %d\n", error); } + hammer_unlock(&save->lock); + hammer_rel_node(save); + return(error); } - if (index == ondisk->count) { - kprintf("btree_remove: lost parent linkage to node\n"); - error = EIO; - } else { - error = btree_remove(parent, index); - if (error == 0) { - hammer_free_btree(node->cluster, node->node_offset); - /* NOTE: node can be reallocated at any time now */ + + /* + * Remove the element at (node, index) and adjust the parent's + * subtree_count. + */ + kprintf("BTREE_REMOVE: Removing element %d\n", cursor->index); + ondisk = node->ondisk; + i = cursor->index; + bcopy(&ondisk->elms[i+1], &ondisk->elms[i], + (ondisk->count - i) * sizeof(ondisk->elms[0])); + --ondisk->count; + hammer_modify_node(cursor->node); + + /* + * Adjust the parent-parent's (now parent) reference to the parent + * (now node). + */ + if ((parent = cursor->parent) != NULL) { + elm = &parent->ondisk->elms[cursor->parent_index]; + if (elm->internal.subtree_count != ondisk->count) { + elm->internal.subtree_count = ondisk->count; + hammer_modify_node(parent); + } + if (elm->subtree_type != HAMMER_BTREE_TYPE_CLUSTER && + elm->subtree_type != ondisk->type) { + elm->subtree_type = ondisk->type; + hammer_modify_node(parent); } } - hammer_unlock(&parent->lock); - hammer_rel_node(parent); - return (error); + + /* + * Free the saved node. + */ + hammer_flush_node(save); + hammer_free_btree(save->cluster, save->node_offset); + hammer_unlock(&save->lock); + hammer_rel_node(save); + return(0); } /* diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index cacb0b8e51..a0b4f31622 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.4 2007/11/26 21:38:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.5 2007/11/30 00:16:56 dillon Exp $ */ /* @@ -132,6 +132,63 @@ hammer_done_cursor(hammer_cursor_t cursor) cursor->right_bound = NULL; } +/* + * Acquire the parent B-Tree node of the specified node, returning a + * referenced but unlocked node. NULL can be returned with *errorp == 0 + * if node is the root node of the root cluster. + */ +static +hammer_node_t +hammer_get_parent_node(hammer_node_t node, int *errorp) +{ + hammer_cluster_t cluster; + hammer_node_t parent; + + cluster = node->cluster; + if (node->ondisk->parent) { + /* + * Local parent + */ + parent = hammer_get_node(cluster, node->ondisk->parent, errorp); + } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) { + /* + * At cluster root, locate node in parent cluster + */ + hammer_cluster_ondisk_t ondisk; + hammer_cluster_t pcluster; + hammer_volume_t pvolume; + int32_t clu_no; + int32_t vol_no; + + ondisk = cluster->ondisk; + vol_no = ondisk->clu_btree_parent_vol_no; + clu_no = ondisk->clu_btree_parent_clu_no; + + /* + * Acquire the node from (volume, cluster, offset) + */ + pvolume = hammer_get_volume(cluster->volume->hmp, vol_no, + errorp); + if (*errorp) + return (NULL); + pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0); + hammer_rel_volume(pvolume, 0); + if (*errorp) + return (NULL); + parent = hammer_get_node(pcluster, + ondisk->clu_btree_parent_offset, + errorp); + hammer_rel_cluster(pcluster, 0); + } else { + /* + * At root of root cluster, there is no parent. + */ + parent = NULL; + *errorp = 0; + } + return(parent); +} + /* * Load the parent of cursor->node into cursor->parent. There are several * cases. (1) The parent is in the current cluster. (2) We are at the @@ -266,16 +323,37 @@ hammer_load_cursor_parent_cluster(hammer_cursor_t cursor) return(0); } - /* * Cursor up to our parent node. Return ENOENT if we are at the root of * the root cluster. + * + * If doing a nonblocking cursor-up and we are unable to acquire the lock, + * the cursor remains unchanged. */ int -hammer_cursor_up(hammer_cursor_t cursor) +hammer_cursor_up(hammer_cursor_t cursor, int nonblock) { + hammer_node_t save; int error; + /* + * If asked to do this non-blocking acquire a lock on the parent + * now, before we mess with the cursor. + */ + if (nonblock) { + save = hammer_get_parent_node(cursor->parent, &error); + if (error) + return(error); + if (save) { + if (hammer_lock_ex_try(&save->lock) != 0) { + hammer_rel_node(save); + return(EAGAIN); + } + } + } else { + save = NULL; + } + /* * Set the node to its parent. If the parent is NULL we are at * the root of the root cluster and return ENOENT. @@ -292,6 +370,10 @@ hammer_cursor_up(hammer_cursor_t cursor) } else { error = hammer_load_cursor_parent(cursor); } + if (save) { + hammer_unlock(&save->lock); + hammer_rel_node(save); + } return(error); } @@ -314,7 +396,7 @@ hammer_cursor_toroot(hammer_cursor_t cursor) * Parent is root of cluster? */ if (cursor->parent->ondisk->parent == 0) - return (hammer_cursor_up(cursor)); + return (hammer_cursor_up(cursor, 0)); /* * Ok, reload the cursor with the root of the cluster, then diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index 28b6928687..29a36afb7a 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.3 2007/11/20 22:55:40 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.4 2007/11/30 00:16:56 dillon Exp $ */ /* @@ -108,4 +108,5 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_ATEMEM 0x0200 #define HAMMER_CURSOR_DISKEOF 0x0400 #define HAMMER_CURSOR_MEMEOF 0x0800 +#define HAMMER_CURSOR_DELBTREE 0x1000 /* ip_delete from b-tree */ diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index dd5309951a..f688dbe99c 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.10 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.11 2007/11/30 00:16:56 dillon Exp $ */ #ifndef _SYS_UUID_H_ @@ -547,6 +547,8 @@ typedef union hammer_record_ondisk *hammer_record_ondisk_t; ((HAMMER_BUFSIZE - sizeof(struct hammer_fsbuf_head) - 32) / \ sizeof(union hammer_record_ondisk)) +#define HAMMER_RECORD_SIZE (64+32) + struct hammer_fsbuf_recs { struct hammer_fsbuf_head head; char unused[32]; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 336f5a8fb9..0249a7570e 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.9 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.10 2007/11/30 00:16:56 dillon Exp $ */ #include "hammer.h" @@ -326,10 +326,11 @@ hammer_update_inode(hammer_inode_t ip) /* * Locate the record on-disk and mark it as deleted. Both the B-Tree - * node and the record must be marked deleted. Adjusting delete_tid - * does not effect the element position in the B-Tree. + * node and the record must be marked deleted. The record may or + * may not be physically deleted, depending on the retention policy. * - * If the inode is already deleted on-disk we have nothing to do. + * If the inode has already been deleted on-disk we have nothing + * to do. * * XXX Update the inode record and data in-place if the retention * policy allows it. @@ -350,11 +351,9 @@ hammer_update_inode(hammer_inode_t ip) error = hammer_btree_lookup(&cursor); if (error == 0) { - cursor.record->base.base.delete_tid = ip->last_tid; - cursor.node->ondisk->elms[cursor.index].leaf.base.delete_tid = ip->last_tid; - hammer_modify_buffer(cursor.record_buffer); - hammer_modify_node(cursor.node); - ip->flags |= HAMMER_INODE_DELONDISK; + error = hammer_ip_delete_record(&cursor, ip->last_tid); + if (error == 0) + ip->flags |= HAMMER_INODE_DELONDISK; } hammer_cache_node(cursor.node, &ip->cache); hammer_done_cursor(&cursor); diff --git a/sys/vfs/hammer/hammer_mount.h b/sys/vfs/hammer/hammer_mount.h index a51f9700bd..51c82f074e 100644 --- a/sys/vfs/hammer/hammer_mount.h +++ b/sys/vfs/hammer/hammer_mount.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.2 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.3 2007/11/30 00:16:56 dillon Exp $ */ #ifndef _SYS_TYPES_H_ @@ -48,7 +48,11 @@ struct hammer_mount_info { const char **volumes; /* array of pointers to device names */ int nvolumes; /* number of devices */ + int hflags; /* extended hammer mount flags */ + int unused01; u_int64_t asof; /* asof - HAMMER_MAX_TID is current */ - u_int64_t reserved[16]; + u_int64_t reserved[15]; }; +#define HMNT_NOHISTORY 0x00000001 + diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 306284bd8b..0b6dbddad6 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.7 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.8 2007/11/30 00:16:56 dillon Exp $ */ #include "hammer.h" @@ -376,30 +376,7 @@ hammer_ip_del_directory(struct hammer_transaction *trans, { int error; - if (cursor->record == &cursor->iprec->rec) { - /* - * The directory entry was in-memory, just scrap the - * record. - */ - kprintf("del directory mem record\n"); - hammer_free_mem_record(cursor->iprec); - error = 0; - } else { - /* - * The directory entry was on-disk, mark the record and - * B-Tree entry as deleted. The B-Tree entry does not - * have to be reindexed because a 'current' delete transid - * will wind up in the same position as the live record. - */ - KKASSERT(ip->flags & HAMMER_INODE_ONDISK); - error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); - if (error == 0) { - cursor->node->ondisk->elms[cursor->index].base.delete_tid = trans->tid; - cursor->record->base.base.delete_tid = trans->tid; - hammer_modify_node(cursor->node); - hammer_modify_buffer(cursor->record_buffer); - } - } + error = hammer_ip_delete_record(cursor, trans->tid); /* * One less link. The file may still be open in the OS even after @@ -687,6 +664,7 @@ hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) /* * Clean up fields and setup for merged scan */ + cursor->flags &= ~HAMMER_CURSOR_DELBTREE; cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM; cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; if (cursor->iprec) @@ -754,15 +732,22 @@ hammer_ip_next(hammer_cursor_t cursor) * Load the current on-disk and in-memory record. If we ate any * records we have to get the next one. * + * If we deleted the last on-disk record we had scanned ATEDISK will + * be clear and DELBTREE will be set, forcing a call to iterate. The + * fact that ATEDISK is clear causes iterate to re-test the 'current' + * element. If ATEDISK is set, iterate will skip the 'current' + * element. + * * Get the next on-disk record */ - if (cursor->flags & HAMMER_CURSOR_ATEDISK) { + if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_DELBTREE)) { if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { error = hammer_btree_iterate(cursor); if (error == 0) cursor->flags &= ~HAMMER_CURSOR_ATEDISK; else - cursor->flags |= HAMMER_CURSOR_DISKEOF; + cursor->flags |= HAMMER_CURSOR_DISKEOF | + HAMMER_CURSOR_ATEDISK; } } @@ -891,21 +876,26 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, cursor.key_beg.delete_tid = 0; cursor.key_beg.obj_type = 0; - /* - * The key in the B-Tree is (base+bytes), so the first possible - * matching key is ran_beg + 1. - */ - cursor.key_beg.key = ran_beg + 1; cursor.key_end = cursor.key_beg; if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { + cursor.key_beg.key = ran_beg; cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; cursor.key_end.rec_type = HAMMER_RECTYPE_DB; cursor.key_end.key = ran_end; isregfile = 0; } else { + /* + * The key in the B-Tree is (base+bytes), so the first possible + * matching key is ran_beg + 1. + */ + int64_t tmp64; + + cursor.key_beg.key = ran_beg + 1; cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; - if (ran_end + MAXPHYS + 1 < ran_end) + + tmp64 = ran_end + MAXPHYS + 1; /* work around GCC-4 bug */ + if (tmp64 < ran_end) cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; else cursor.key_end.key = ran_end + MAXPHYS + 1; @@ -929,7 +919,9 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, * of the last byte of the record (base + len - 1), NOT the * base offset. */ + kprintf("delete_range rec_type %02x\n", base->rec_type); if (base->rec_type == HAMMER_RECTYPE_DATA) { + kprintf("delete_range loop key %016llx\n", base->key - rec->base.data_len); off = base->key - rec->base.data_len; /* * Check the left edge case. We currently do not @@ -948,8 +940,12 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, * base->key is exclusive of the right edge while * ran_end is inclusive of the right edge. The * (key - data_len) left boundary is inclusive. + * + * XXX theory-check this test at some point, are + * we missing a + 1 somewhere? Note that ran_end + * could overflow. */ - if (base->key > ran_end + 1) { + if (base->key > ran_end) { if (base->key - rec->base.data_len > ran_end) { kprintf("right edge OOB\n"); break; @@ -959,17 +955,15 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, } /* - * Mark the record and B-Tree entry as deleted + * Mark the record and B-Tree entry as deleted. This will + * also physically delete the B-Tree entry, record, and + * data if the retention policy dictates. The function + * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() + * uses to perform a fixup. */ - if (cursor.record == &cursor.iprec->rec) { - hammer_free_mem_record(cursor.iprec); - } else { - cursor.node->ondisk-> - elms[cursor.index].base.delete_tid = trans->tid; - cursor.record->base.base.delete_tid = trans->tid; - hammer_modify_node(cursor.node); - hammer_modify_buffer(cursor.record_buffer); - } + error = hammer_ip_delete_record(&cursor, trans->tid); + if (error) + break; error = hammer_ip_next(&cursor); } hammer_done_cursor(&cursor); @@ -978,3 +972,81 @@ hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, return(error); } +/* + * Delete the record at the current cursor + */ +int +hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid) +{ + hammer_btree_elm_t elm; + hammer_mount_t hmp; + int error; + + /* + * In-memory (unsynchronized) records can simply be freed. + */ + cursor->flags &= ~HAMMER_CURSOR_DELBTREE; + if (cursor->record == &cursor->iprec->rec) { + hammer_free_mem_record(cursor->iprec); + return(0); + } + + /* + * On-disk records are marked as deleted by updating their delete_tid. + */ + error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); + elm = NULL; + hmp = cursor->node->cluster->volume->hmp; + + if (error == 0) { + elm = &cursor->node->ondisk->elms[cursor->index]; + cursor->record->base.base.delete_tid = tid; + elm->leaf.base.delete_tid = tid; + hammer_modify_buffer(cursor->record_buffer); + hammer_modify_node(cursor->node); + } + + /* + * If we were mounted with the nohistory option, we physically + * delete the record. + */ + if (error == 0 && (hmp->hflags & HMNT_NOHISTORY)) { + int32_t rec_offset; + int32_t data_offset; + int32_t data_len; + hammer_cluster_t cluster; + + rec_offset = elm->leaf.rec_offset; + data_offset = elm->leaf.data_offset; + data_len = elm->leaf.data_len; + kprintf("hammer_ip_delete_record: %08x %08x/%d\n", + rec_offset, data_offset, data_len); + cluster = cursor->node->cluster; + hammer_ref_cluster(cluster); + + error = hammer_btree_delete(cursor); + if (error == 0) { + /* + * This forces a fixup for the iteration because + * the cursor is now either sitting at the 'next' + * element or sitting at the end of a leaf. + */ + if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { + cursor->flags |= HAMMER_CURSOR_DELBTREE; + cursor->flags &= ~HAMMER_CURSOR_ATEDISK; + } + hammer_free_record(cluster, rec_offset); + if (data_offset - rec_offset < 0 || + data_offset - rec_offset >= HAMMER_RECORD_SIZE) { + hammer_free_data(cluster, data_offset,data_len); + } + } + hammer_rel_cluster(cluster, 0); + if (error) { + kprintf("hammer_ip_delete_record: unable to physically delete the record!\n"); + error = 0; + } + } + return(error); +} + diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 6f66efccc7..51f68b7ec6 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.8 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.9 2007/11/30 00:16:56 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -1579,24 +1579,6 @@ hammer_alloc_record(hammer_cluster_t cluster, return(item); } -/* - * Free HAMMER elements based on either a hammer_buffer and element pointer - * or a cluster-relative byte offset. - */ -void -hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node) -{ - int32_t elm_no; - hammer_alist_t live; - - elm_no = node - &buffer->ondisk->btree.nodes[0]; - KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES); - elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; - live = &buffer->cluster->alist_btree; - hammer_alist_free(live, elm_no, 1); - hammer_modify_cluster(buffer->cluster); -} - void hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes) { diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index de325fde82..2d7b82cd92 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.7 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.8 2007/11/30 00:16:56 dillon Exp $ */ #include @@ -103,19 +103,41 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, /* * Interal mount data structure */ - hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); - mp->mnt_data = (qaddr_t)hmp; - hmp->mp = mp; + if (mp->mnt_flag & MNT_UPDATE) { + hmp = (void *)mp->mnt_data; + KKASSERT(hmp != NULL); + } else { + hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO); + mp->mnt_data = (qaddr_t)hmp; + hmp->mp = mp; + hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK|M_ZERO); + hmp->namekey_iterator = mycpu->gd_time_seconds; + } + hmp->hflags = info.hflags; if (info.asof) { mp->mnt_flag |= MNT_RDONLY; hmp->asof = info.asof; } else { hmp->asof = HAMMER_MAX_TID; } - hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK | M_ZERO); - hmp->namekey_iterator = mycpu->gd_time_seconds; + + /* + * Re-open read-write if originally read-only, or vise-versa XXX + */ + if (mp->mnt_flag & MNT_UPDATE) { + if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + kprintf("HAMMER read-write -> read-only XXX\n"); + hmp->ronly = 1; + } else if (hmp->ronly && (mp->mnt_flag & MNT_RDONLY) == 0) { + kprintf("HAMMER read-only -> read-write XXX\n"); + hmp->ronly = 0; + } + return(0); + } + RB_INIT(&hmp->rb_vols_root); RB_INIT(&hmp->rb_inos_root); + hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0); /* * Load volumes diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index e79898778a..1ae9964961 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.8 2007/11/27 07:48:52 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.9 2007/11/30 00:16:56 dillon Exp $ */ #include @@ -1213,6 +1213,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) struct buf *bp; int64_t rec_offset; int64_t ran_end; + int64_t tmp64; int error; int boff; int roff; @@ -1243,7 +1244,8 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) ran_end = bio->bio_offset + bp->b_bufsize; cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; - if (ran_end + MAXPHYS + 1 < ran_end) + tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */ + if (tmp64 < ran_end) cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; else cursor.key_end.key = ran_end + MAXPHYS + 1; -- 2.41.0