From: Matthew Dillon Date: Mon, 7 Jul 2008 03:49:51 +0000 (+0000) Subject: HAMMER 60F/Many: Mirroring X-Git-Tag: v2.0.1~211 X-Git-Url: https://gitweb.dragonflybsd.org/dragonfly.git/commitdiff_plain/a56cb01252f6dac3d110d607b5deff341d3c5910 HAMMER 60F/Many: Mirroring * Properly propagate mirror_tid when splitting B-Tree nodes. * Arrange for the fsid reported as st_dev to be based on the shared_uuid, so the same value is reported on the slaves as on the master. Munge the fsid based on the asof timestamp to try to make snapshots look different from current filesystems, so programs like 'diff' don't get confused. This allows one to run (tar cf - directory@@ | md5) on a slave and get the same answer as he got on the master. Note, however, that the tar cannot include the root directory of the PFS or master because the root directory inode is not mirrored (only files and directories underneath it), and will mess up the MD5. * Properly update vol0_next_tid when writing to a mirroring slave. * Attempt to regenerate the inode for the root PFS when doing a name lookup based on the current latest snapshot. This may need more work. --- diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index ed5f47fbdf..ea2f7219e0 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.104 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.105 2008/07/07 03:49:50 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -178,6 +178,7 @@ struct hammer_pseudofs_inmem { RB_ENTRY(hammer_pseudofs_inmem) rb_node; struct hammer_lock lock; u_int32_t localization; + udev_t fsid_udev; struct hammer_pseudofs_data pfsd; }; @@ -677,7 +678,6 @@ struct hammer_mount { u_int check_interrupt; uuid_t fsid; - udev_t fsid_udev; struct hammer_io_list volu_list; /* dirty undo buffers */ struct hammer_io_list undo_list; /* dirty undo buffers */ struct hammer_io_list data_list; /* dirty data buffers */ @@ -1091,6 +1091,8 @@ int hammer_crc_test_volume(hammer_volume_ondisk_t ondisk); int hammer_crc_test_btree(hammer_node_ondisk_t ondisk); int hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf); void hkprintf(const char *ctl, ...); +udev_t hammer_fsid_to_udev(uuid_t *uuid); + int hammer_blocksize(int64_t file_offset); int64_t hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2); diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 25ee834b22..cd6ff2b927 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.64 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.65 2008/07/07 03:49:50 dillon Exp $ */ /* @@ -1389,6 +1389,7 @@ btree_split_internal(hammer_cursor_t cursor) ondisk = parent->ondisk; ondisk->count = 1; ondisk->parent = 0; + ondisk->mirror_tid = node->ondisk->mirror_tid; ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; ondisk->elms[0].base = hmp->root_btree_beg; ondisk->elms[0].base.btype = node->ondisk->type; @@ -1443,6 +1444,7 @@ btree_split_internal(hammer_cursor_t cursor) new_node->ondisk->count = ondisk->count - split; new_node->ondisk->parent = parent->node_offset; new_node->ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; + new_node->ondisk->mirror_tid = ondisk->mirror_tid; KKASSERT(ondisk->type == new_node->ondisk->type); hammer_cursor_split_node(node, new_node, split); @@ -1470,6 +1472,7 @@ btree_split_internal(hammer_cursor_t cursor) parent_elm->internal.base = elm->base; /* separator P */ parent_elm->internal.base.btype = new_node->ondisk->type; parent_elm->internal.subtree_offset = new_node->node_offset; + parent_elm->internal.mirror_tid = new_node->ondisk->mirror_tid; ++ondisk->count; hammer_modify_node_done(parent); hammer_cursor_inserted_element(parent, parent_index + 1); @@ -1622,6 +1625,7 @@ btree_split_leaf(hammer_cursor_t cursor) ondisk = parent->ondisk; ondisk->count = 1; ondisk->parent = 0; + ondisk->mirror_tid = leaf->ondisk->mirror_tid; ondisk->type = HAMMER_BTREE_TYPE_INTERNAL; ondisk->elms[0].base = hmp->root_btree_beg; ondisk->elms[0].base.btype = leaf->ondisk->type; @@ -1672,6 +1676,7 @@ btree_split_leaf(hammer_cursor_t cursor) new_leaf->ondisk->count = ondisk->count - split; new_leaf->ondisk->parent = parent->node_offset; new_leaf->ondisk->type = HAMMER_BTREE_TYPE_LEAF; + new_leaf->ondisk->mirror_tid = ondisk->mirror_tid; KKASSERT(ondisk->type == new_leaf->ondisk->type); hammer_modify_node_done(new_leaf); hammer_cursor_split_node(leaf, new_leaf, split); @@ -1703,6 +1708,7 @@ btree_split_leaf(hammer_cursor_t cursor) hammer_make_separator(&elm[-1].base, &elm[0].base, &parent_elm->base); parent_elm->internal.base.btype = new_leaf->ondisk->type; parent_elm->internal.subtree_offset = new_leaf->node_offset; + parent_elm->internal.mirror_tid = new_leaf->ondisk->mirror_tid; mid_boundary = &parent_elm->base; ++ondisk->count; hammer_modify_node_done(parent); @@ -2155,6 +2161,7 @@ hammer_btree_do_propagation(hammer_cursor_t cursor, hammer_inode_t ip, * re-locked. */ mirror_tid = cursor->node->ondisk->mirror_tid; + KKASSERT(mirror_tid != 0); ncursor = kmalloc(sizeof(*ncursor), M_HAMMER, M_WAITOK | M_ZERO); hammer_dup_cursor(cursor, ncursor); error = hammer_btree_mirror_propagate(ncursor, mirror_tid); diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index fc1696ad57..e927dc4576 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.46 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.47 2008/07/07 03:49:50 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -663,11 +663,9 @@ struct hammer_symlink_data { * the node to be used as a mirroring master or slave. * * When operating as a slave CD's into the node automatically become read-only - * and as-of sync_beg_tid. Synchronization runs must complete to - * sync_end_tid before it can be cycled into sync_beg_tid. No pruning can - * occur beyond sync_beg_tid. + * and as-of sync_end_tid. * - * When operating as a master the read PFSD info sets sync_beg_tid to + * When operating as a master the read PFSD info sets sync_end_tid to * the most recently flushed TID. * * sync_low_tid is not yet used but will represent the highest pruning @@ -675,8 +673,8 @@ struct hammer_symlink_data { */ struct hammer_pseudofs_data { hammer_tid_t sync_low_tid; /* full history beyond this point */ - hammer_tid_t sync_beg_tid; /* last completed sync (snapshot pt) */ - hammer_tid_t sync_end_tid; /* currently running sync end pt */ + hammer_tid_t sync_beg_tid; /* earliest tid w/ full history avail */ + hammer_tid_t sync_end_tid; /* current synchronizatoin point */ u_int64_t sync_beg_ts; /* real-time of last completed sync */ u_int64_t sync_end_ts; /* initiation of current sync cycle */ uuid_t shared_uuid; /* shared uuid (match required) */ diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index d4b42b8634..c0a53d897b 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.94 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.95 2008/07/07 03:49:50 dillon Exp $ */ #include "hammer.h" @@ -670,6 +670,8 @@ retry: pfsm = kmalloc(sizeof(*pfsm), M_HAMMER, M_WAITOK | M_ZERO); pfsm->localization = ip->obj_localization; + pfsm->pfsd.unique_uuid = trans->rootvol->ondisk->vol_fsid; + pfsm->pfsd.shared_uuid = pfsm->pfsd.unique_uuid; hammer_init_cursor(trans, &cursor, NULL, NULL); cursor.key_beg.localization = ip->obj_localization + @@ -699,6 +701,7 @@ retry: hammer_done_cursor(&cursor); if (error == 0) { + pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid); hammer_ref(&pfsm->lock); if (RB_INSERT(hammer_pfs_rb_tree, &hmp->rb_pfsm_root, pfsm)) { kfree(pfsm, M_HAMMER); @@ -713,8 +716,6 @@ retry: if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { ip->flags |= HAMMER_INODE_RO; ip->flags |= HAMMER_INODE_PFSD; - if (ip->obj_asof > pfsm->pfsd.sync_beg_tid) - ip->obj_asof = pfsm->pfsd.sync_beg_tid; } else if (pfsm->pfsd.master_id >= 0) { ip->flags |= HAMMER_INODE_PFSD; } @@ -739,6 +740,7 @@ hammer_save_pseudofs(hammer_transaction_t trans, hammer_inode_t ip) retry: pfsm = ip->pfsm; + pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid); hammer_init_cursor(trans, &cursor, &ip->cache[1], ip); cursor.key_beg.localization = ip->obj_localization + HAMMER_LOCALIZE_MISC; diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index a9dbe9effd..8cdd83ecbc 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.7 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.8 2008/07/07 03:49:51 dillon Exp $ */ /* * HAMMER mirroring ioctls - serialize and deserialize modifications made @@ -381,6 +381,7 @@ hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec, hammer_transaction_t trans; hammer_buffer_t data_buffer; hammer_off_t ndata_offset; + hammer_tid_t high_tid; void *ndata; int error; int doprop; @@ -454,6 +455,20 @@ hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec, ++trans->hmp->rootvol->ondisk->vol0_stat_inodes; hammer_modify_volume_done(trans->rootvol); } + + /* + * vol0_next_tid must track the highest TID stored in the filesystem. + * We do not need to generate undo for this update. + */ + high_tid = mrec->leaf.base.create_tid; + if (high_tid < mrec->leaf.base.delete_tid) + high_tid = mrec->leaf.base.delete_tid; + if (trans->rootvol->ondisk->vol0_next_tid < high_tid) { + hammer_modify_volume(trans, trans->rootvol, NULL, 0); + trans->rootvol->ondisk->vol0_next_tid = high_tid; + hammer_modify_volume_done(trans->rootvol); + } + if (error == 0 && doprop) hammer_btree_do_propagation(cursor, ip, &mrec->leaf); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 1ce3aa7fb4..78cf32c1eb 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.65 2008/07/05 18:59:27 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.66 2008/07/07 03:49:51 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -208,7 +208,6 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname) brelse(bp); bp = NULL; } - hmp->fsid_udev = dev2udev(vn_todev(volume->devvp)); hmp->mp->mnt_stat.f_blocks += ondisk->vol0_stat_bigblocks * (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE); hmp->mp->mnt_vstat.f_blocks += ondisk->vol0_stat_bigblocks * diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index e1abe632bb..c1dbb3d5bd 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.30 2008/07/05 18:59:28 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.31 2008/07/07 03:49:51 dillon Exp $ */ /* * HAMMER structural locking @@ -590,3 +590,12 @@ hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2) file_offset1, file_offset2); } +udev_t +hammer_fsid_to_udev(uuid_t *uuid) +{ + u_int32_t crc; + + crc = crc32(uuid, sizeof(*uuid)); + return((udev_t)crc); +} + diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 00183a58f1..ca1d6259d4 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.81 2008/07/07 00:24:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.82 2008/07/07 03:49:51 dillon Exp $ */ #include @@ -648,13 +648,23 @@ hammer_vop_getattr(struct vop_getattr_args *ap) struct hammer_inode *ip = VTOI(ap->a_vp); struct vattr *vap = ap->a_vap; - vap->va_fsid = ip->hmp->fsid_udev; - /* - * XXX munge the device if we are in a pseudo-fs, so user utilities - * do not think its the same 'filesystem'. + /* + * We want the fsid to be different when accessing a filesystem + * with different as-of's so programs like diff don't think + * the files are the same. + * + * We also want the fsid to be the same when comparing snapshots, + * or when comparing mirrors (which might be backed by different + * physical devices). HAMMER fsids are based on the PFS's + * shared_uuid field. + * + * XXX there is a chance of collision here. The va_fsid reported + * by stat is different from the more involved fsid used in the + * mount structure. */ - if (ip->obj_localization) - vap->va_fsid += ip->obj_localization; + vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^ + (u_int32_t)(ip->obj_asof >> 32); + vap->va_fileid = ip->ino_leaf.base.obj_id; vap->va_mode = ip->ino_data.mode; vap->va_nlink = ip->ino_data.nlinks; @@ -733,6 +743,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) int i; int nlen; int flags; + int ispfs; int64_t obj_id; u_int32_t localization; @@ -746,6 +757,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) asof = dip->obj_asof; nlen = ncp->nc_nlen; flags = dip->flags; + ispfs = 0; hammer_simple_transaction(&trans, dip->hmp); @@ -823,6 +835,13 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF && bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) { obj_id = cursor.data->entry.obj_id; + + /* + * Force relookups whenever a PFS root is + * accessed. + */ + if (obj_id == HAMMER_OBJID_ROOT) + ispfs = 1; localization = cursor.data->entry.localization; break; } @@ -834,6 +853,15 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) ip = hammer_get_inode(&trans, dip, obj_id, asof, localization, flags, &error); + if (ispfs && asof > ip->pfsm->pfsd.sync_end_tid) { + asof = ip->pfsm->pfsd.sync_end_tid; + hammer_rel_inode(ip, 0); + ip = hammer_get_inode(&trans, dip, obj_id, + asof, localization, + flags, &error); + } + + if (error == 0) { error = hammer_get_vnode(ip, &vp); hammer_rel_inode(ip, 0); @@ -843,6 +871,8 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap) if (error == 0) { vn_unlock(vp); cache_setvp(ap->a_nch, vp); + if (ispfs) + cache_settimeout(ap->a_nch, 0); vrele(vp); } } else if (error == ENOENT) {