HAMMER 60F/Many: Mirroring
authorMatthew Dillon <dillon@dragonflybsd.org>
Mon, 7 Jul 2008 03:49:51 +0000 (03:49 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Mon, 7 Jul 2008 03:49:51 +0000 (03:49 +0000)
* Properly propagate mirror_tid when splitting B-Tree nodes.

* Arrange for the fsid reported as st_dev to be based on the shared_uuid,
  so the same value is reported on the slaves as on the master.

  Munge the fsid based on the asof timestamp to try to make snapshots look
  different from current filesystems, so programs like 'diff' don't get
  confused.

  This allows one to run (tar cf - directory@@<timestamp> | md5) on a
  slave and get the same answer as he got on the master.  Note, however,
  that the tar cannot include the root directory of the PFS or master
  because the root directory inode is not mirrored (only files and directories
  underneath it), and will mess up the MD5.

* Properly update vol0_next_tid when writing to a mirroring slave.

* Attempt to regenerate the inode for the root PFS when doing a name lookup
  based on the current latest snapshot.  This may need more work.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_mirror.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_vnops.c

index ed5f47f..ea2f721 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.104 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.105 2008/07/07 03:49:50 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -178,6 +178,7 @@ struct hammer_pseudofs_inmem {
        RB_ENTRY(hammer_pseudofs_inmem) rb_node;
        struct hammer_lock      lock;
        u_int32_t               localization;
+       udev_t                  fsid_udev;
        struct hammer_pseudofs_data pfsd;
 };
 
@@ -677,7 +678,6 @@ struct hammer_mount {
 
        u_int   check_interrupt;
        uuid_t  fsid;
-       udev_t  fsid_udev;
        struct hammer_io_list volu_list;        /* dirty undo buffers */
        struct hammer_io_list undo_list;        /* dirty undo buffers */
        struct hammer_io_list data_list;        /* dirty data buffers */
@@ -1091,6 +1091,8 @@ int hammer_crc_test_volume(hammer_volume_ondisk_t ondisk);
 int hammer_crc_test_btree(hammer_node_ondisk_t ondisk);
 int hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf);
 void hkprintf(const char *ctl, ...);
+udev_t hammer_fsid_to_udev(uuid_t *uuid);
+
 
 int hammer_blocksize(int64_t file_offset);
 int64_t hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2);
index 25ee834..cd6ff2b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.64 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.65 2008/07/07 03:49:50 dillon Exp $
  */
 
 /*
@@ -1389,6 +1389,7 @@ btree_split_internal(hammer_cursor_t cursor)
                ondisk = parent->ondisk;
                ondisk->count = 1;
                ondisk->parent = 0;
+               ondisk->mirror_tid = node->ondisk->mirror_tid;
                ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
                ondisk->elms[0].base = hmp->root_btree_beg;
                ondisk->elms[0].base.btype = node->ondisk->type;
@@ -1443,6 +1444,7 @@ btree_split_internal(hammer_cursor_t cursor)
        new_node->ondisk->count = ondisk->count - split;
        new_node->ondisk->parent = parent->node_offset;
        new_node->ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
+       new_node->ondisk->mirror_tid = ondisk->mirror_tid;
        KKASSERT(ondisk->type == new_node->ondisk->type);
        hammer_cursor_split_node(node, new_node, split);
 
@@ -1470,6 +1472,7 @@ btree_split_internal(hammer_cursor_t cursor)
        parent_elm->internal.base = elm->base;  /* separator P */
        parent_elm->internal.base.btype = new_node->ondisk->type;
        parent_elm->internal.subtree_offset = new_node->node_offset;
+       parent_elm->internal.mirror_tid = new_node->ondisk->mirror_tid;
        ++ondisk->count;
        hammer_modify_node_done(parent);
        hammer_cursor_inserted_element(parent, parent_index + 1);
@@ -1622,6 +1625,7 @@ btree_split_leaf(hammer_cursor_t cursor)
                ondisk = parent->ondisk;
                ondisk->count = 1;
                ondisk->parent = 0;
+               ondisk->mirror_tid = leaf->ondisk->mirror_tid;
                ondisk->type = HAMMER_BTREE_TYPE_INTERNAL;
                ondisk->elms[0].base = hmp->root_btree_beg;
                ondisk->elms[0].base.btype = leaf->ondisk->type;
@@ -1672,6 +1676,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        new_leaf->ondisk->count = ondisk->count - split;
        new_leaf->ondisk->parent = parent->node_offset;
        new_leaf->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
+       new_leaf->ondisk->mirror_tid = ondisk->mirror_tid;
        KKASSERT(ondisk->type == new_leaf->ondisk->type);
        hammer_modify_node_done(new_leaf);
        hammer_cursor_split_node(leaf, new_leaf, split);
@@ -1703,6 +1708,7 @@ btree_split_leaf(hammer_cursor_t cursor)
        hammer_make_separator(&elm[-1].base, &elm[0].base, &parent_elm->base);
        parent_elm->internal.base.btype = new_leaf->ondisk->type;
        parent_elm->internal.subtree_offset = new_leaf->node_offset;
+       parent_elm->internal.mirror_tid = new_leaf->ondisk->mirror_tid;
        mid_boundary = &parent_elm->base;
        ++ondisk->count;
        hammer_modify_node_done(parent);
@@ -2155,6 +2161,7 @@ hammer_btree_do_propagation(hammer_cursor_t cursor, hammer_inode_t ip,
         * re-locked.
         */
        mirror_tid = cursor->node->ondisk->mirror_tid;
+       KKASSERT(mirror_tid != 0);
        ncursor = kmalloc(sizeof(*ncursor), M_HAMMER, M_WAITOK | M_ZERO);
        hammer_dup_cursor(cursor, ncursor);
        error = hammer_btree_mirror_propagate(ncursor, mirror_tid);
index fc1696a..e927dc4 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.46 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.47 2008/07/07 03:49:50 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -663,11 +663,9 @@ struct hammer_symlink_data {
  * the node to be used as a mirroring master or slave.
  *
  * When operating as a slave CD's into the node automatically become read-only
- * and as-of sync_beg_tid.  Synchronization runs must complete to
- * sync_end_tid before it can be cycled into sync_beg_tid.  No pruning can
- * occur beyond sync_beg_tid.
+ * and as-of sync_end_tid.
  *
- * When operating as a master the read PFSD info sets sync_beg_tid to
+ * When operating as a master the read PFSD info sets sync_end_tid to
  * the most recently flushed TID.
  *
  * sync_low_tid is not yet used but will represent the highest pruning
@@ -675,8 +673,8 @@ struct hammer_symlink_data {
  */
 struct hammer_pseudofs_data {
        hammer_tid_t    sync_low_tid;   /* full history beyond this point */
-       hammer_tid_t    sync_beg_tid;   /* last completed sync (snapshot pt) */
-       hammer_tid_t    sync_end_tid;   /* currently running sync end pt */
+       hammer_tid_t    sync_beg_tid;   /* earliest tid w/ full history avail */
+       hammer_tid_t    sync_end_tid;   /* current synchronizatoin point */
        u_int64_t       sync_beg_ts;    /* real-time of last completed sync */
        u_int64_t       sync_end_ts;    /* initiation of current sync cycle */
        uuid_t          shared_uuid;    /* shared uuid (match required) */
index d4b42b8..c0a53d8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.94 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.95 2008/07/07 03:49:50 dillon Exp $
  */
 
 #include "hammer.h"
@@ -670,6 +670,8 @@ retry:
 
        pfsm = kmalloc(sizeof(*pfsm), M_HAMMER, M_WAITOK | M_ZERO);
        pfsm->localization = ip->obj_localization;
+       pfsm->pfsd.unique_uuid = trans->rootvol->ondisk->vol_fsid;
+       pfsm->pfsd.shared_uuid = pfsm->pfsd.unique_uuid;
 
        hammer_init_cursor(trans, &cursor, NULL, NULL);
        cursor.key_beg.localization = ip->obj_localization +
@@ -699,6 +701,7 @@ retry:
        hammer_done_cursor(&cursor);
 
        if (error == 0) {
+               pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid);
                hammer_ref(&pfsm->lock);
                if (RB_INSERT(hammer_pfs_rb_tree, &hmp->rb_pfsm_root, pfsm)) {
                        kfree(pfsm, M_HAMMER);
@@ -713,8 +716,6 @@ retry:
                if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
                        ip->flags |= HAMMER_INODE_RO;
                        ip->flags |= HAMMER_INODE_PFSD;
-                       if (ip->obj_asof > pfsm->pfsd.sync_beg_tid)
-                               ip->obj_asof = pfsm->pfsd.sync_beg_tid;
                } else if (pfsm->pfsd.master_id >= 0) {
                        ip->flags |= HAMMER_INODE_PFSD;
                }
@@ -739,6 +740,7 @@ hammer_save_pseudofs(hammer_transaction_t trans, hammer_inode_t ip)
 
 retry:
        pfsm = ip->pfsm;
+       pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid);
        hammer_init_cursor(trans, &cursor, &ip->cache[1], ip);
        cursor.key_beg.localization = ip->obj_localization +
                                      HAMMER_LOCALIZE_MISC;
index a9dbe9e..8cdd83e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.7 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.8 2008/07/07 03:49:51 dillon Exp $
  */
 /*
  * HAMMER mirroring ioctls - serialize and deserialize modifications made
@@ -381,6 +381,7 @@ hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec,
        hammer_transaction_t trans;
        hammer_buffer_t data_buffer;
        hammer_off_t ndata_offset;
+       hammer_tid_t high_tid;
        void *ndata;
        int error;
        int doprop;
@@ -454,6 +455,20 @@ hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec,
                ++trans->hmp->rootvol->ondisk->vol0_stat_inodes;
                hammer_modify_volume_done(trans->rootvol);
        }
+
+       /*
+        * vol0_next_tid must track the highest TID stored in the filesystem.
+        * We do not need to generate undo for this update.
+        */
+       high_tid = mrec->leaf.base.create_tid;
+       if (high_tid < mrec->leaf.base.delete_tid)
+               high_tid = mrec->leaf.base.delete_tid;
+       if (trans->rootvol->ondisk->vol0_next_tid < high_tid) {
+               hammer_modify_volume(trans, trans->rootvol, NULL, 0);
+               trans->rootvol->ondisk->vol0_next_tid = high_tid;
+               hammer_modify_volume_done(trans->rootvol);
+       }
+
        if (error == 0 && doprop)
                hammer_btree_do_propagation(cursor, ip, &mrec->leaf);
 
index 1ce3aa7..78cf32c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.65 2008/07/05 18:59:27 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.66 2008/07/07 03:49:51 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -208,7 +208,6 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname)
                        brelse(bp);
                        bp = NULL;
                }
-               hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
                hmp->mp->mnt_stat.f_blocks += ondisk->vol0_stat_bigblocks *
                        (HAMMER_LARGEBLOCK_SIZE / HAMMER_BUFSIZE);
                hmp->mp->mnt_vstat.f_blocks += ondisk->vol0_stat_bigblocks *
index e1abe63..c1dbb3d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.30 2008/07/05 18:59:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.31 2008/07/07 03:49:51 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -590,3 +590,12 @@ hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
              file_offset1, file_offset2);
 }
 
+udev_t
+hammer_fsid_to_udev(uuid_t *uuid)
+{
+       u_int32_t crc;
+
+       crc = crc32(uuid, sizeof(*uuid));
+       return((udev_t)crc);
+}
+
index 00183a5..ca1d625 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.81 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.82 2008/07/07 03:49:51 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -648,13 +648,23 @@ hammer_vop_getattr(struct vop_getattr_args *ap)
        struct hammer_inode *ip = VTOI(ap->a_vp);
        struct vattr *vap = ap->a_vap;
 
-       vap->va_fsid = ip->hmp->fsid_udev;
-       /* 
-        * XXX munge the device if we are in a pseudo-fs, so user utilities
-        * do not think its the same 'filesystem'.
+       /*
+        * We want the fsid to be different when accessing a filesystem
+        * with different as-of's so programs like diff don't think
+        * the files are the same.
+        *
+        * We also want the fsid to be the same when comparing snapshots,
+        * or when comparing mirrors (which might be backed by different
+        * physical devices).  HAMMER fsids are based on the PFS's
+        * shared_uuid field.
+        *
+        * XXX there is a chance of collision here.  The va_fsid reported
+        * by stat is different from the more involved fsid used in the
+        * mount structure.
         */
-       if (ip->obj_localization)
-               vap->va_fsid += ip->obj_localization;
+       vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^
+                      (u_int32_t)(ip->obj_asof >> 32);
+
        vap->va_fileid = ip->ino_leaf.base.obj_id;
        vap->va_mode = ip->ino_data.mode;
        vap->va_nlink = ip->ino_data.nlinks;
@@ -733,6 +743,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        int i;
        int nlen;
        int flags;
+       int ispfs;
        int64_t obj_id;
        u_int32_t localization;
 
@@ -746,6 +757,7 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
        asof = dip->obj_asof;
        nlen = ncp->nc_nlen;
        flags = dip->flags;
+       ispfs = 0;
 
        hammer_simple_transaction(&trans, dip->hmp);
 
@@ -823,6 +835,13 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                        if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
                            bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
                                obj_id = cursor.data->entry.obj_id;
+
+                               /*
+                                * Force relookups whenever a PFS root is
+                                * accessed.
+                                */
+                               if (obj_id == HAMMER_OBJID_ROOT)
+                                       ispfs = 1;
                                localization = cursor.data->entry.localization;
                                break;
                        }
@@ -834,6 +853,15 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                ip = hammer_get_inode(&trans, dip, obj_id,
                                      asof, localization,
                                      flags, &error);
+               if (ispfs && asof > ip->pfsm->pfsd.sync_end_tid) {
+                       asof = ip->pfsm->pfsd.sync_end_tid;
+                       hammer_rel_inode(ip, 0);
+                       ip = hammer_get_inode(&trans, dip, obj_id,
+                                             asof, localization,
+                                             flags, &error);
+               }
+
+
                if (error == 0) {
                        error = hammer_get_vnode(ip, &vp);
                        hammer_rel_inode(ip, 0);
@@ -843,6 +871,8 @@ hammer_vop_nresolve(struct vop_nresolve_args *ap)
                if (error == 0) {
                        vn_unlock(vp);
                        cache_setvp(ap->a_nch, vp);
+                       if (ispfs)
+                               cache_settimeout(ap->a_nch, 0);
                        vrele(vp);
                }
        } else if (error == ENOENT) {