HAMMER 61D/Many: Mirroring features
authorMatthew Dillon <dillon@dragonflybsd.org>
Sat, 12 Jul 2008 02:47:39 +0000 (02:47 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sat, 12 Jul 2008 02:47:39 +0000 (02:47 +0000)
* Split PFS ioctls into their own source file.

* Add additional PFS/mirroring directives:  pfs-upgrade, pfs-downgrade, and
  finish implementing pfs-destroy.  (Yes, that means you can change the
  master/slave mode for a PFS now).

* Consolidate some of the B-Tree deletion code.

* Fix another sync_lock deadlock.

13 files changed:
sys/conf/files
sys/vfs/hammer/Makefile
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_disk.h
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_ioctl.c
sys/vfs/hammer/hammer_ioctl.h
sys/vfs/hammer/hammer_mirror.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_pfs.c [new file with mode: 0644]
sys/vfs/hammer/hammer_prune.c
sys/vfs/hammer/hammer_reblock.c
sys/vfs/hammer/hammer_vnops.c

index 1a15fcf..027f645 100644 (file)
@@ -1,5 +1,5 @@
 # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $
-# $DragonFly: src/sys/conf/files,v 1.222 2008/07/07 22:02:09 nant Exp $
+# $DragonFly: src/sys/conf/files,v 1.223 2008/07/12 02:47:38 dillon Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -1254,6 +1254,7 @@ vfs/hammer/hammer_ioctl.c optional hammer
 vfs/hammer/hammer_mirror.c     optional hammer
 vfs/hammer/hammer_object.c     optional hammer
 vfs/hammer/hammer_ondisk.c     optional hammer
+vfs/hammer/hammer_pfs.c                optional hammer
 vfs/hammer/hammer_prune.c      optional hammer
 vfs/hammer/hammer_reblock.c    optional hammer
 vfs/hammer/hammer_recover.c    optional hammer
index 82f7605..f16616c 100644 (file)
@@ -1,5 +1,5 @@
 #
-# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.12 2008/06/26 04:06:22 dillon Exp $
+# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.13 2008/07/12 02:47:39 dillon Exp $
 
 KMOD=  hammer
 SRCS=  hammer_vfsops.c hammer_vnops.c hammer_inode.c \
@@ -7,7 +7,8 @@ SRCS=   hammer_vfsops.c hammer_vnops.c hammer_inode.c \
        hammer_cursor.c hammer_btree.c hammer_transaction.c \
        hammer_object.c hammer_recover.c hammer_ioctl.c \
        hammer_blockmap.c hammer_freemap.c hammer_undo.c \
-       hammer_reblock.c hammer_flusher.c hammer_mirror.c
+       hammer_reblock.c hammer_flusher.c hammer_mirror.c \
+       hammer_pfs.c
 
 NOMAN=
 
index 0023c7e..d29ab91 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.112 2008/07/11 05:44:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.113 2008/07/12 02:47:39 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -179,12 +179,15 @@ struct hammer_pseudofs_inmem {
        struct hammer_lock      lock;
        u_int32_t               localization;
        hammer_tid_t            create_tid;
+       int                     flags;
        udev_t                  fsid_udev;
        struct hammer_pseudofs_data pfsd;
 };
 
 typedef struct hammer_pseudofs_inmem *hammer_pseudofs_inmem_t;
 
+#define HAMMER_PFSM_DELETED    0x0001
+
 /*
  * Cache object ids.  A fixed number of objid cache structures are
  * created to reserve object id's for newly created files in multiples
@@ -804,7 +807,8 @@ int hammer_ip_resolve_data(hammer_cursor_t cursor);
 int    hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
                        hammer_tid_t tid);
 int    hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
-                       int64_t *stat_bytes);
+                       hammer_tid_t delete_tid, u_int32_t delete_ts,
+                       int track, int64_t *stat_bytes);
 int    hammer_ip_check_directory_empty(hammer_transaction_t trans,
                        hammer_inode_t ip);
 int    hammer_sync_hmp(hammer_mount_t hmp, int waitfor);
@@ -1035,6 +1039,7 @@ int  hammer_mkroot_pseudofs(hammer_transaction_t trans, struct ucred *cred,
                        hammer_pseudofs_inmem_t pfsm);
 int  hammer_save_pseudofs(hammer_transaction_t trans,
                        hammer_pseudofs_inmem_t pfsm);
+int  hammer_unload_pseudofs(hammer_transaction_t trans, u_int32_t localization);
 void hammer_rel_pseudofs(hammer_mount_t hmp, hammer_pseudofs_inmem_t pfsm);
 int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                        struct ucred *cred);
@@ -1076,6 +1081,12 @@ int hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                        struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs);
 int hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                         struct hammer_ioc_pseudofs_rw *pfs);
+int hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs);
+int hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs);
+int hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs);
 
 int hammer_signal_check(hammer_mount_t hmp);
 
index 847e37b..1f6a1f0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.49 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.50 2008/07/12 02:47:39 dillon Exp $
  */
 
 #ifndef VFS_HAMMER_DISK_H_
@@ -698,6 +698,7 @@ struct hammer_pseudofs_data {
 typedef struct hammer_pseudofs_data *hammer_pseudofs_data_t;
 
 #define HAMMER_PFSD_SLAVE      0x00000001
+#define HAMMER_PFSD_DELETED    0x80000000
 
 /*
  * Rollup various structures embedded as record data
@@ -706,6 +707,7 @@ union hammer_data_ondisk {
        struct hammer_entry_data entry;
        struct hammer_inode_data inode;
        struct hammer_symlink_data symlink;
+       struct hammer_pseudofs_data pfsd;
 };
 
 typedef union hammer_data_ondisk *hammer_data_ondisk_t;
index 0f3ec74..41ed72b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.99 2008/07/11 01:22:29 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.100 2008/07/12 02:47:39 dillon Exp $
  */
 
 #include "hammer.h"
@@ -115,6 +115,21 @@ hammer_inode_info_cmp_all_history(hammer_inode_t ip, void *data)
        return(0);
 }
 
+/*
+ * Used by hammer_unload_pseudofs() to locate all inodes associated with
+ * a particular PFS.
+ */
+static int
+hammer_inode_pfs_cmp(hammer_inode_t ip, void *data)
+{
+       u_int32_t localization = *(u_int32_t *)data;
+       if (ip->obj_localization > localization)
+               return(1);
+       if (ip->obj_localization < localization)
+               return(-1);
+       return(0);
+}
+
 /*
  * RB-Tree support for pseudofs structures
  */
@@ -713,10 +728,15 @@ retry:
        if (*errorp == 0) {
                *errorp = hammer_ip_resolve_data(&cursor);
                if (*errorp == 0) {
-                       bytes = cursor.leaf->data_len;
-                       if (bytes > sizeof(pfsm->pfsd))
-                               bytes = sizeof(pfsm->pfsd);
-                       bcopy(cursor.data, &pfsm->pfsd, bytes);
+                       if (cursor.data->pfsd.mirror_flags &
+                           HAMMER_PFSD_DELETED) {
+                               *errorp = ENOENT;
+                       } else {
+                               bytes = cursor.leaf->data_len;
+                               if (bytes > sizeof(pfsm->pfsd))
+                                       bytes = sizeof(pfsm->pfsd);
+                               bcopy(cursor.data, &pfsm->pfsd, bytes);
+                       }
                }
        }
        hammer_done_cursor(&cursor);
@@ -823,6 +843,48 @@ hammer_mkroot_pseudofs(hammer_transaction_t trans, struct ucred *cred,
        return(error);
 }
 
+/*
+ * Unload any vnodes & inodes associated with a PFS, return ENOTEMPTY
+ * if we are unable to disassociate all the inodes.
+ */
+static
+int
+hammer_unload_pseudofs_callback(hammer_inode_t ip, void *data)
+{
+       int res;
+
+       hammer_ref(&ip->lock);
+       if (ip->lock.refs == 2 && ip->vp)
+               vclean_unlocked(ip->vp);
+       if (ip->lock.refs == 1 && ip->vp == NULL)
+               res = 0;
+       else
+               res = -1;       /* stop, someone is using the inode */
+       hammer_rel_inode(ip, 0);
+       return(res);
+}
+
+int
+hammer_unload_pseudofs(hammer_transaction_t trans, u_int32_t localization)
+{
+       int res;
+       int try;
+
+       for (try = res = 0; try < 4; ++try) {
+               res = hammer_ino_rb_tree_RB_SCAN(&trans->hmp->rb_inos_root,
+                                          hammer_inode_pfs_cmp,
+                                          hammer_unload_pseudofs_callback,
+                                          &localization);
+               if (res == 0 && try > 1)
+                       break;
+               hammer_flusher_sync(trans->hmp);
+       }
+       if (res != 0)
+               res = ENOTEMPTY;
+       return(res);
+}
+
+
 /*
  * Release a reference on a PFS
  */
@@ -2085,11 +2147,14 @@ done:
         * buffers.  Otherwise a buffer cache deadlock can occur when
         * doing things like creating tens of thousands of tiny files.
         *
-        * The finalization lock is already being held by virtue of the
-        * flusher calling us.
+        * We must release our cursor lock to avoid a 3-way deadlock
+        * due to the exclusive sync lock the finalizer must get.
         */
-        if (hammer_flusher_meta_limit(hmp))
+        if (hammer_flusher_meta_limit(hmp)) {
+               hammer_unlock_cursor(cursor, 0);
                 hammer_flusher_finalize(trans, 0);
+               hammer_lock_cursor(cursor, 0);
+       }
 
        return(error);
 }
index 5af7cb2..f3b3d4e 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.26 2008/07/09 10:29:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.27 2008/07/12 02:47:39 dillon Exp $
  */
 
 #include "hammer.h"
@@ -83,6 +83,24 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
                                    (struct hammer_ioc_pseudofs_rw *)data);
                }
                break;
+       case HAMMERIOC_UPG_PSEUDOFS:
+               if (error == 0) {
+                       error = hammer_ioc_upgrade_pseudofs(&trans, ip, 
+                                   (struct hammer_ioc_pseudofs_rw *)data);
+               }
+               break;
+       case HAMMERIOC_DGD_PSEUDOFS:
+               if (error == 0) {
+                       error = hammer_ioc_downgrade_pseudofs(&trans, ip,
+                                   (struct hammer_ioc_pseudofs_rw *)data);
+               }
+               break;
+       case HAMMERIOC_RMR_PSEUDOFS:
+               if (error == 0) {
+                       error = hammer_ioc_destroy_pseudofs(&trans, ip,
+                                   (struct hammer_ioc_pseudofs_rw *)data);
+               }
+               break;
        case HAMMERIOC_MIRROR_READ:
                if (error == 0) {
                        error = hammer_ioc_mirror_read(&trans, ip,
index ad2c2aa..6783621 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.19 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.20 2008/07/12 02:47:39 dillon Exp $
  */
 /*
  * HAMMER ioctl's.  This file can be #included from userland
@@ -318,6 +318,9 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t;
 #define HAMMERIOC_GET_PSEUDOFS _IOWR('h',6,struct hammer_ioc_pseudofs_rw)
 #define HAMMERIOC_MIRROR_READ  _IOWR('h',7,struct hammer_ioc_mirror_rw)
 #define HAMMERIOC_MIRROR_WRITE _IOWR('h',8,struct hammer_ioc_mirror_rw)
+#define HAMMERIOC_UPG_PSEUDOFS _IOWR('h',9,struct hammer_ioc_pseudofs_rw)
+#define HAMMERIOC_DGD_PSEUDOFS _IOWR('h',10,struct hammer_ioc_pseudofs_rw)
+#define HAMMERIOC_RMR_PSEUDOFS _IOWR('h',11,struct hammer_ioc_pseudofs_rw)
 
 #endif
 
index 74daef4..cc4c8a7 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.12 2008/07/11 05:44:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.13 2008/07/12 02:47:39 dillon Exp $
  */
 /*
  * HAMMER mirroring ioctls - serialize and deserialize modifications made
@@ -60,7 +60,7 @@ static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
                                struct hammer_ioc_mrecord_skip *mrec,
                                struct hammer_ioc_mirror_rw *mirror,
                                u_int32_t localization);
-static int hammer_mirror_delete_at_cursor(hammer_cursor_t cursor,
+static int hammer_mirror_delete_to(hammer_cursor_t cursor,
                                struct hammer_ioc_mirror_rw *mirror);
 static int hammer_mirror_localize_data(hammer_data_ondisk_t data,
                                hammer_btree_leaf_elm_t leaf);
@@ -488,19 +488,7 @@ hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
         */
        cursor->key_end = mrec->skip_beg;
        cursor->flags |= HAMMER_CURSOR_BACKEND;
-
-       error = hammer_btree_iterate(cursor);
-       while (error == 0) {
-               error = hammer_mirror_delete_at_cursor(cursor, mirror);
-               if (error == 0)
-                       error = hammer_btree_iterate(cursor);
-       }
-
-       /*
-        * ENOENT just means we hit the end of our iteration.
-        */
-       if (error == ENOENT)
-               error = 0;
+       error = hammer_mirror_delete_to(cursor, mirror);
 
        /*
         * Now skip past the skip (which is the whole point point of
@@ -566,15 +554,7 @@ hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
        cursor->key_end = mrec->leaf.base;
        cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
        cursor->flags |= HAMMER_CURSOR_BACKEND;
-
-       error = hammer_btree_iterate(cursor);
-       while (error == 0) {
-               error = hammer_mirror_delete_at_cursor(cursor, mirror);
-               if (error == 0)
-                       error = hammer_btree_iterate(cursor);
-       }
-       if (error == ENOENT)
-               error = 0;
+       error = hammer_mirror_delete_to(cursor, mirror);
 
        /*
         * Locate the record.
@@ -647,14 +627,7 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
        cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
        cursor->flags |= HAMMER_CURSOR_BACKEND;
 
-       error = hammer_btree_iterate(cursor);
-       while (error == 0) {
-               error = hammer_mirror_delete_at_cursor(cursor, mirror);
-               if (error == 0)
-                       error = hammer_btree_iterate(cursor);
-       }
-       if (error == ENOENT)
-               error = 0;
+       error = hammer_mirror_delete_to(cursor, mirror);
 
        /*
         * Locate the record and get past it by setting ATEDISK.
@@ -679,52 +652,37 @@ hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
  * As part of the mirror write we iterate across swaths of records
  * on the target which no longer exist on the source, and mark them
  * deleted.
+ *
+ * The caller has indexed the cursor and set up key_end.  We iterate
+ * through to key_end.
  */
 static
 int
-hammer_mirror_delete_at_cursor(hammer_cursor_t cursor,
-                              struct hammer_ioc_mirror_rw *mirror)
+hammer_mirror_delete_to(hammer_cursor_t cursor,
+                      struct hammer_ioc_mirror_rw *mirror)
 {
-       hammer_transaction_t trans;
-       hammer_btree_elm_t elm;
+       hammer_btree_leaf_elm_t elm;
        int error;
 
-       if ((error = hammer_cursor_upgrade(cursor)) != 0)
-               return(error);
-
-       elm = &cursor->node->ondisk->elms[cursor->index];
-       KKASSERT(elm->leaf.base.btype == HAMMER_BTREE_TYPE_RECORD);
-
-       trans = cursor->trans;
-       hammer_sync_lock_sh(trans);
-
-       if (elm->leaf.base.delete_tid == 0) {
-               /*
-                * We don't know when the originator deleted the element
-                * because it was destroyed, tid_end works.
-                */
-               KKASSERT(elm->base.create_tid < mirror->tid_end);
-               hammer_modify_node(trans, cursor->node, elm, sizeof(*elm));
-               elm->base.delete_tid = mirror->tid_end;
-               elm->leaf.delete_ts = time_second;
-               hammer_modify_node_done(cursor->node);
-
-               /*
-                * Track a count of active inodes.
-                */
-               if (elm->base.obj_type == HAMMER_RECTYPE_INODE) {
-                       hammer_modify_volume_field(trans,
-                                                  trans->rootvol,
-                                                  vol0_stat_inodes);
-                       --trans->hmp->rootvol->ondisk->vol0_stat_inodes;
-                       hammer_modify_volume_done(trans->rootvol);
+       error = hammer_btree_iterate(cursor);
+       while (error == 0) {
+               elm = &cursor->node->ondisk->elms[cursor->index].leaf;
+               KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
+               if (elm->base.delete_tid == 0) {
+                       error = hammer_delete_at_cursor(cursor,
+                                                       HAMMER_DELETE_ADJUST,
+                                                       mirror->tid_end,
+                                                       time_second,
+                                                       1, NULL);
+                       if (error == 0)
+                               cursor->flags |= HAMMER_CURSOR_ATEDISK;
                }
+               if (error == 0)
+                       error = hammer_btree_iterate(cursor);
        }
-       hammer_sync_unlock(trans);
-
-       cursor->flags |= HAMMER_CURSOR_ATEDISK;
-
-       return(0);
+       if (error == ENOENT)
+               error = 0;
+       return(error);
 }
 
 /*
@@ -748,55 +706,31 @@ hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec)
 }
 
 /*
- * Update a record in-place.  Only the delete_tid can change.
+ * Update a record in-place.  Only the delete_tid can change, and
+ * only from zero to non-zero.
  */
 static
 int
 hammer_mirror_update(hammer_cursor_t cursor,
                     struct hammer_ioc_mrecord_rec *mrec)
 {
-       hammer_transaction_t trans;
-       hammer_btree_leaf_elm_t elm;
        int error;
 
-       if ((error = hammer_cursor_upgrade(cursor)) != 0)
-               return(error);
-
-       elm = cursor->leaf;
-       trans = cursor->trans;
-
-       if (mrec->leaf.base.delete_tid == 0) {
-               kprintf("mirror_write: object %016llx:%016llx deleted on "
-                       "target, not deleted on source\n",
-                       elm->base.obj_id, elm->base.key);
-               return(0);
-       }
-       hammer_sync_lock_sh(trans);
-
-       KKASSERT(elm->base.create_tid < mrec->leaf.base.delete_tid);
-       hammer_modify_node(trans, cursor->node, elm, sizeof(*elm));
-       elm->base.delete_tid = mrec->leaf.base.delete_tid;
-       elm->delete_ts = mrec->leaf.delete_ts;
-       hammer_modify_node_done(cursor->node);
-
        /*
-        * Cursor is left on the current element, we want to skip it now.
+        * This case shouldn't occur.
         */
-       cursor->flags |= HAMMER_CURSOR_ATEDISK;
+       if (mrec->leaf.base.delete_tid == 0)
+               return(0);
 
        /*
-        * Track a count of active inodes.
+        * Mark the record deleted on the mirror target.
         */
-       if (elm->base.obj_type == HAMMER_RECTYPE_INODE) {
-               hammer_modify_volume_field(trans,
-                                          trans->rootvol,
-                                          vol0_stat_inodes);
-               --trans->hmp->rootvol->ondisk->vol0_stat_inodes;
-               hammer_modify_volume_done(trans->rootvol);
-       }
-       hammer_sync_unlock(trans);
-
-       return(0);
+       error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST,
+                                       mrec->leaf.base.delete_tid,
+                                       mrec->leaf.delete_ts,
+                                       1, NULL);
+       cursor->flags |= HAMMER_CURSOR_ATEDISK;
+       return(error);
 }
 
 /*
@@ -876,8 +810,9 @@ hammer_mirror_write(hammer_cursor_t cursor,
        /*
         * Track a count of active inodes.
         */
-       if (error == 0 && mrec->leaf.base.delete_tid == 0 &&
-           mrec->leaf.base.obj_type == HAMMER_RECTYPE_INODE) {
+       if (error == 0 &&
+           mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE &&
+           mrec->leaf.base.delete_tid == 0) {
                hammer_modify_volume_field(trans,
                                           trans->rootvol,
                                           vol0_stat_inodes);
@@ -942,96 +877,3 @@ hammer_mirror_localize_data(hammer_data_ondisk_t data,
        return(0);
 }
 
-/*
- * Auto-detect the pseudofs.
- */
-static
-void
-hammer_mirror_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
-{
-       if (pfs->pfs_id == -1)
-               pfs->pfs_id = (int)(ip->obj_localization >> 16);
-}
-
-/*
- * Get mirroring/pseudo-fs information
- */
-int
-hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
-                       struct hammer_ioc_pseudofs_rw *pfs)
-{
-       hammer_pseudofs_inmem_t pfsm;
-       u_int32_t localization;
-       int error;
-
-       hammer_mirror_autodetect(pfs, ip);
-       if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
-               return(EINVAL);
-       localization = (u_int32_t)pfs->pfs_id << 16;
-       pfs->bytes = sizeof(struct hammer_pseudofs_data);
-       pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
-
-       pfsm = hammer_load_pseudofs(trans, localization, &error);
-       if (error) {
-               hammer_rel_pseudofs(trans->hmp, pfsm);
-               return(error);
-       }
-
-       /*
-        * If the PFS is a master the sync tid is set by normal operation
-        * rather then the mirroring code, and will always track the
-        * real HAMMER filesystem.
-        */
-       if (pfsm->pfsd.master_id >= 0)
-               pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
-
-       /*
-        * Copy out to userland.
-        */
-       error = 0;
-       if (pfs->ondisk && error == 0)
-               error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
-       hammer_rel_pseudofs(trans->hmp, pfsm);
-       return(error);
-}
-
-/*
- * Set mirroring/pseudo-fs information
- */
-int
-hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
-                       struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
-{
-       hammer_pseudofs_inmem_t pfsm;
-       int error;
-       u_int32_t localization;
-
-       error = 0;
-       hammer_mirror_autodetect(pfs, ip);
-       if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
-               error = EINVAL;
-       if (pfs->bytes != sizeof(pfsm->pfsd))
-               error = EINVAL;
-       if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
-               error = EINVAL;
-       if (error == 0 && pfs->ondisk) {
-               /*
-                * Load the PFS so we can modify our in-core copy.
-                */
-               localization = (u_int32_t)pfs->pfs_id << 16;
-               pfsm = hammer_load_pseudofs(trans, localization, &error);
-               error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
-
-               /*
-                * Save it back, create a root inode if we are in master
-                * mode and no root exists.
-                */
-               if (error == 0)
-                       error = hammer_mkroot_pseudofs(trans, cred, pfsm);
-               if (error == 0)
-                       error = hammer_save_pseudofs(trans, pfsm);
-               hammer_rel_pseudofs(trans->hmp, pfsm);
-       }
-       return(error);
-}
-
index e7bb5ae..3a10ae8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.86 2008/07/11 01:22:29 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.87 2008/07/12 02:47:39 dillon Exp $
  */
 
 #include "hammer.h"
@@ -1925,6 +1925,9 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
         * On-disk records are marked as deleted by updating their delete_tid.
         * This does not effect their position in the B-Tree (which is based
         * on their create_tid).
+        *
+        * Frontend B-Tree operations track inodes so we tell 
+        * hammer_delete_at_cursor() not to.
         */
        error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_LEAF);
        elm = NULL;
@@ -1933,7 +1936,9 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
                error = hammer_delete_at_cursor(
                                cursor,
                                HAMMER_DELETE_ADJUST | hammer_nohistory(ip),
-                               NULL);
+                               cursor->trans->tid,
+                               cursor->trans->time32,
+                               0, NULL);
        }
        return(error);
 }
@@ -1944,12 +1949,17 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip,
  *
  * The cursor must be properly positioned for an iteration on return but
  * may be pointing at an internal element.
+ *
+ * An element can be un-deleted by passing a delete_tid of 0 with
+ * HAMMER_DELETE_ADJUST.
  */
 int
 hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
-                       int64_t *stat_bytes)
+                       hammer_tid_t delete_tid, u_int32_t delete_ts,
+                       int track, int64_t *stat_bytes)
 {
        struct hammer_btree_leaf_elm save_leaf;
+       hammer_transaction_t trans;
        hammer_btree_leaf_elm_t leaf;
        hammer_node_t node;
        hammer_btree_elm_t elm;
@@ -1957,31 +1967,42 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
        int32_t data_len;
        u_int16_t rec_type;
        int error;
+       int icount;
        int doprop;
 
        error = hammer_cursor_upgrade(cursor);
        if (error)
                return(error);
 
+       trans = cursor->trans;
        node = cursor->node;
        elm = &node->ondisk->elms[cursor->index];
        leaf = &elm->leaf;
        KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
 
+       hammer_sync_lock_sh(trans);
+       doprop = 0;
+       icount = 0;
+
        /*
         * Adjust the delete_tid.  Update the mirror_tid propagation field
         * as well.
         */
-       hammer_sync_lock_sh(cursor->trans);
-       doprop = 0;
        if (delete_flags & HAMMER_DELETE_ADJUST) {
-               hammer_modify_node(cursor->trans, node, elm, sizeof(*elm));
-               elm->leaf.base.delete_tid = cursor->trans->tid;
-               elm->leaf.delete_ts = cursor->trans->time32;
+               if (elm->base.rec_type == HAMMER_RECTYPE_INODE) {
+                       if (elm->leaf.base.delete_tid == 0 && delete_tid)
+                               icount = -1;
+                       if (elm->leaf.base.delete_tid && delete_tid == 0)
+                               icount = 1;
+               }
+
+               hammer_modify_node(trans, node, elm, sizeof(*elm));
+               elm->leaf.base.delete_tid = delete_tid;
+               elm->leaf.delete_ts = delete_ts;
                hammer_modify_node_done(node);
 
                if (elm->leaf.base.delete_tid > node->ondisk->mirror_tid) {
-                       hammer_modify_node_field(cursor->trans, node, mirror_tid);
+                       hammer_modify_node_field(trans, node, mirror_tid);
                        node->ondisk->mirror_tid = elm->leaf.base.delete_tid;
                        hammer_modify_node_done(node);
                        doprop = 1;
@@ -2021,6 +2042,10 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
                        save_leaf = elm->leaf;
                        leaf = &save_leaf;
                }
+               if (elm->base.rec_type == HAMMER_RECTYPE_INODE &&
+                   elm->leaf.base.delete_tid == 0) {
+                       icount = -1;
+               }
 
                error = hammer_btree_delete(cursor);
                if (error == 0) {
@@ -2039,7 +2064,7 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
                        case HAMMER_ZONE_LARGE_DATA:
                        case HAMMER_ZONE_SMALL_DATA:
                        case HAMMER_ZONE_META:
-                               hammer_blockmap_free(cursor->trans,
+                               hammer_blockmap_free(trans,
                                                     data_offset, data_len);
                                break;
                        default:
@@ -2048,18 +2073,42 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
                }
        }
 
+       /*
+        * Track inode count and next_tid.  This is used by the mirroring
+        * and PFS code.  icount can be negative, zero, or positive.
+        */
+       if (error == 0 && track) {
+               if (icount) {
+                       hammer_modify_volume_field(trans, trans->rootvol,
+                                                  vol0_stat_inodes);
+                       trans->rootvol->ondisk->vol0_stat_inodes += icount;
+                       hammer_modify_volume_done(trans->rootvol);
+               }
+               if (trans->rootvol->ondisk->vol0_next_tid < delete_tid) {
+                       hammer_modify_volume(trans, trans->rootvol, NULL, 0);
+                       trans->rootvol->ondisk->vol0_next_tid = delete_tid;
+                       hammer_modify_volume_done(trans->rootvol);
+               }
+       }
+
        /*
         * mirror_tid propagation occurs if the node's mirror_tid had to be
         * updated while adjusting the delete_tid.
         *
         * This occurs when deleting even in nohistory mode, but does not
         * occur when pruning an already-deleted node.
+        *
+        * cursor->ip is NULL when called from the pruning, mirroring,
+        * and pfs code.  If non-NULL propagation will be conditionalized
+        * on whether the PFS is in no-history mode or not.
         */
        if (doprop) {
-               KKASSERT(cursor->ip != NULL);
-               hammer_btree_do_propagation(cursor, cursor->ip->pfsm, leaf);
+               if (cursor->ip)
+                       hammer_btree_do_propagation(cursor, cursor->ip->pfsm, leaf);
+               else
+                       hammer_btree_do_propagation(cursor, NULL, leaf);
        }
-       hammer_sync_unlock(cursor->trans);
+       hammer_sync_unlock(trans);
        return (error);
 }
 
diff --git a/sys/vfs/hammer/hammer_pfs.c b/sys/vfs/hammer/hammer_pfs.c
new file mode 100644 (file)
index 0000000..cf4f28e
--- /dev/null
@@ -0,0 +1,410 @@
+/*
+ * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
+ * 
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@backplane.com>
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1 2008/07/12 02:47:39 dillon Exp $
+ */
+/*
+ * HAMMER PFS ioctls - Manage pseudo-fs configurations
+ */
+
+#include "hammer.h"
+
+static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs,
+                               hammer_inode_t ip);
+static int hammer_pfs_rollback(hammer_transaction_t trans,
+                               hammer_pseudofs_inmem_t pfsm,
+                               hammer_tid_t trunc_tid);
+static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor,
+                               hammer_tid_t trunc_tid);
+
+/*
+ * Get mirroring/pseudo-fs information
+ *
+ * NOTE: The ip used for ioctl is not necessarily related to the PFS
+ */
+int
+hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       u_int32_t localization;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+       pfs->bytes = sizeof(struct hammer_pseudofs_data);
+       pfs->version = HAMMER_IOC_PSEUDOFS_VERSION;
+
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if (error) {
+               hammer_rel_pseudofs(trans->hmp, pfsm);
+               return(error);
+       }
+
+       /*
+        * If the PFS is a master the sync tid is set by normal operation
+        * rather then the mirroring code, and will always track the
+        * real HAMMER filesystem.
+        */
+       if (pfsm->pfsd.master_id >= 0)
+               pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
+
+       /*
+        * Copy out to userland.
+        */
+       error = 0;
+       if (pfs->ondisk && error == 0)
+               error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd));
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       return(error);
+}
+
+/*
+ * Set mirroring/pseudo-fs information
+ *
+ * NOTE: The ip used for ioctl is not necessarily related to the PFS
+ */
+int
+hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       u_int32_t localization;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+       if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION)
+               error = EINVAL;
+       localization = (u_int32_t)pfs->pfs_id << 16;
+
+       if (error == 0 && pfs->ondisk) {
+               /*
+                * Load the PFS so we can modify our in-core copy.  Ignore
+                * ENOENT errors.
+                */
+               pfsm = hammer_load_pseudofs(trans, localization, &error);
+               error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd));
+
+               /*
+                * Save it back, create a root inode if we are in master
+                * mode and no root exists.
+                */
+               if (error == 0)
+                       error = hammer_mkroot_pseudofs(trans, cred, pfsm);
+               if (error == 0)
+                       error = hammer_save_pseudofs(trans, pfsm);
+               hammer_rel_pseudofs(trans->hmp, pfsm);
+       }
+       return(error);
+}
+
+/*
+ * Upgrade a slave to a master
+ *
+ * This is fairly easy to do, but we must physically undo any partial syncs
+ * for transaction ids > sync_end_tid.  Effective, we must do a partial
+ * rollback.
+ *
+ * NOTE: The ip used for ioctl is not necessarily related to the PFS
+ */
+int
+hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       u_int32_t localization;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+       if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
+               return(error);
+
+       /*
+        * A master id must be set when upgrading
+        */
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0 &&
+           pfsm->pfsd.master_id < 0) {
+               error = EINVAL;
+       }
+       if (error == 0) {
+               if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
+                       error = hammer_pfs_rollback(trans, pfsm,
+                                           pfsm->pfsd.sync_end_tid + 1);
+                       if (error == 0) {
+                               pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE;
+                               error = hammer_save_pseudofs(trans, pfsm);
+                       }
+               }
+       }
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       if (error == EINTR) {
+               pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
+               error = 0;
+       }
+       return (error);
+}
+
+/*
+ * Downgrade a master to a slave
+ *
+ * This is really easy to do, just set the SLAVE flag.  The master_id is
+ * left intact.
+ *
+ * We also leave sync_end_tid intact... the field is not used in master
+ * mode (vol0_next_tid overrides it), but if someone switches to master
+ * mode accidently and then back to slave mode we don't want it to change.  
+ * Eventually it will be used as the cross-synchronization TID in
+ * multi-master mode, and we don't want to mess with it for that feature
+ * either.
+ *
+ * NOTE: The ip used for ioctl is not necessarily related to the PFS
+ */
+int
+hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       u_int32_t localization;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+       if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
+               return(error);
+
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if (error == 0) {
+               if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
+                       pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
+                       error = hammer_save_pseudofs(trans, pfsm);
+               }
+       }
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       return (error);
+}
+
+/*
+ * Destroy a PFS
+ *
+ * We can destroy a PFS by scanning and deleting all of its records in the
+ * B-Tree.  The hammer utility will delete the softlink in the primary
+ * filesystem.
+ *
+ * NOTE: The ip used for ioctl is not necessarily related to the PFS
+ */
+int
+hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                       struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       u_int32_t localization;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+
+       if ((error = hammer_unload_pseudofs(trans, localization)) != 0)
+               return(error);
+
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if (error == 0) {
+               error = hammer_pfs_rollback(trans, pfsm, 0);
+               if (error == 0) {
+                       pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED;
+                       error = hammer_save_pseudofs(trans, pfsm);
+               }
+       }
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       if (error == EINTR) {
+               pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
+               error = 0;
+       }
+       return(error);
+}
+
+/*
+ * Auto-detect the pseudofs and do basic bounds checking.
+ */
+static
+int
+hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip)
+{
+       int error = 0;
+
+       if (pfs->pfs_id == -1)
+               pfs->pfs_id = (int)(ip->obj_localization >> 16);
+       if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS)
+               error = EINVAL;
+       if (pfs->bytes < sizeof(struct hammer_pseudofs_data))
+               error = EINVAL;
+       return(error);
+}
+
+/*
+ * Rollback the specified PFS to (trunc_tid - 1), removing everything
+ * greater or equal to trunc_tid.  The PFS must not have been in no-mirror
+ * mode or the MIRROR_FILTERED scan will not work properly.
+ *
+ * This is typically used to remove any partial syncs when upgrading a
+ * slave to a master.  It can theoretically also be used to rollback
+ * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN
+ * PRUNED, and to points that are older only if they are on a retained
+ * (pruning softlink) boundary.
+ *
+ * Rollbacks destroy information.  If you don't mind inode numbers changing
+ * a better way would be to cpdup a snapshot back onto the master.
+ */
+static
+int
+hammer_pfs_rollback(hammer_transaction_t trans,
+                   hammer_pseudofs_inmem_t pfsm,
+                   hammer_tid_t trunc_tid)
+{
+       struct hammer_cmirror cmirror;
+       struct hammer_cursor cursor;
+       struct hammer_base_elm key_cur;
+       int error;
+
+       bzero(&cmirror, sizeof(cmirror));
+       bzero(&key_cur, sizeof(key_cur));
+       key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization;
+       key_cur.obj_id = HAMMER_MIN_OBJID;
+       key_cur.key = HAMMER_MIN_KEY;
+       key_cur.create_tid = 1;
+       key_cur.rec_type = HAMMER_MIN_RECTYPE;
+
+retry:
+       error = hammer_init_cursor(trans, &cursor, NULL, NULL);
+       if (error) {
+               hammer_done_cursor(&cursor);
+               goto failed;
+       }
+       cursor.key_beg = key_cur;
+       cursor.key_end.localization = HAMMER_MAX_LOCALIZATION +
+                                     pfsm->localization;
+       cursor.key_end.obj_id = HAMMER_MAX_OBJID;
+       cursor.key_end.key = HAMMER_MAX_KEY;
+       cursor.key_end.create_tid = HAMMER_MAX_TID;
+       cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
+
+       cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
+       cursor.flags |= HAMMER_CURSOR_BACKEND;
+
+       /*
+        * Do an optimized scan of only records created or modified
+        * >= trunc_tid, so we can fix up those records.  We must
+        * still check the TIDs but this greatly reduces the size of
+        * the scan.
+        */
+       cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
+       cursor.cmirror = &cmirror;
+       cmirror.mirror_tid = trunc_tid;
+
+       error = hammer_btree_first(&cursor);
+       while (error == 0) {
+               /*
+                * Abort the rollback.
+                */
+               if (error == 0) {
+                       error = hammer_signal_check(trans->hmp);
+                       if (error)
+                               break;
+               }
+
+               /*
+                * We only care about leafs.  Internal nodes can be returned
+                * in mirror-filtered mode (they are used to generate SKIP
+                * mrecords), but we don't need them for this code.
+                */
+               if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
+                       key_cur = cursor.node->ondisk->elms[cursor.index].base;
+                       error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
+               }
+
+               if (error == 0)
+                       error = hammer_btree_iterate(&cursor);
+       }
+       if (error == ENOENT)
+               error = 0;
+       hammer_done_cursor(&cursor);
+       if (error == EDEADLK)
+               goto retry;
+failed:
+       return(error);
+}
+
+/*
+ * Helper function - perform rollback on a B-Tree element given trunc_tid.
+ *
+ * If create_tid >= trunc_tid the record is physically destroyed.
+ * If delete_tid >= trunc_tid it will be set to 0, undeleting the record.
+ */
+static
+int
+hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid)
+{
+       hammer_btree_leaf_elm_t elm;
+       hammer_transaction_t trans;
+        int error;
+
+       elm = &cursor->node->ondisk->elms[cursor->index].leaf;
+       if (elm->base.create_tid < trunc_tid &&
+           elm->base.delete_tid < trunc_tid) {
+               return(0);
+       }
+        trans = cursor->trans;
+
+       if (elm->base.create_tid >= trunc_tid) {
+               error = hammer_delete_at_cursor(
+                               cursor, HAMMER_DELETE_DESTROY,
+                               cursor->trans->tid, cursor->trans->time32,
+                               1, NULL);
+       } else if (elm->base.delete_tid >= trunc_tid) {
+               error = hammer_delete_at_cursor(
+                               cursor, HAMMER_DELETE_ADJUST,
+                               0, 0,
+                               1, NULL);
+       } else {
+               error = 0;
+       }
+       return(error);
+}
+
index d92696e..609ab66 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.14 2008/07/11 05:44:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.15 2008/07/12 02:47:39 dillon Exp $
  */
 
 #include "hammer.h"
@@ -78,7 +78,8 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip,
        if ((prune->head.flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms)
                return(EINVAL);
 
-       prune->key_cur.localization = prune->key_end.localization +
+       prune->key_cur.localization = (prune->key_end.localization &
+                                       HAMMER_LOCALIZE_MASK) +
                                      ip->obj_localization;
        prune->key_cur.obj_id = prune->key_end.obj_id;
        prune->key_cur.key = HAMMER_MAX_KEY;
@@ -104,7 +105,8 @@ retry:
                hammer_done_cursor(&cursor);
                goto failed;
        }
-       cursor.key_beg.localization = prune->key_beg.localization +
+       cursor.key_beg.localization = (prune->key_beg.localization &
+                                       HAMMER_LOCALIZE_MASK) +
                                      ip->obj_localization;
        cursor.key_beg.obj_id = prune->key_beg.obj_id;
        cursor.key_beg.key = HAMMER_MIN_KEY;
@@ -125,11 +127,15 @@ retry:
        cursor.flags |= HAMMER_CURSOR_BACKEND;
 
        /*
-        * This flag allows the B-Tree code to clean up loose ends.
+        * This flag allows the B-Tree code to clean up loose ends.  At
+        * the moment (XXX) it also means we have to hold the sync lock
+        * through the iteration.
         */
        cursor.flags |= HAMMER_CURSOR_PRUNING;
 
+       hammer_sync_lock_sh(trans);
        error = hammer_btree_last(&cursor);
+       hammer_sync_unlock(trans);
 
        while (error == 0) {
                /*
@@ -167,13 +173,18 @@ retry:
                         * Acquiring the sync lock guarantees that the
                         * operation will not cross a synchronization
                         * boundary (see the flusher).
+                        *
+                        * We dont need to track inodes or next_tid when
+                        * we are destroying deleted records.
                         */
                        isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY);
 
                        hammer_sync_lock_sh(trans);
                        error = hammer_delete_at_cursor(&cursor,
                                                        HAMMER_DELETE_DESTROY,
-                                                       &prune->stat_bytes);
+                                                       cursor.trans->tid,
+                                                       cursor.trans->time32,
+                                                       0, &prune->stat_bytes);
                        hammer_sync_unlock(trans);
                        if (error)
                                break;
@@ -210,7 +221,9 @@ retry:
                        hammer_lock_cursor(&cursor, 0);
                        seq = hammer_flusher_async(trans->hmp);
                }
+               hammer_sync_lock_sh(trans);
                error = hammer_btree_iterate_reverse(&cursor);
+               hammer_sync_unlock(trans);
        }
        if (error == ENOENT)
                error = 0;
index 8ae2a3e..f8ddf69 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.27 2008/07/11 05:44:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.28 2008/07/12 02:47:39 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -75,6 +75,7 @@ hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
                return(EINVAL);
 
        reblock->key_cur = reblock->key_beg;
+       reblock->key_cur.localization &= HAMMER_LOCALIZE_MASK;
        reblock->key_cur.localization += ip->obj_localization;
 
        checkspace_count = 0;
@@ -93,7 +94,8 @@ retry:
        cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
        cursor.key_beg.obj_type = 0;
 
-       cursor.key_end.localization = reblock->key_end.localization +
+       cursor.key_end.localization = (reblock->key_end.localization &
+                                       HAMMER_LOCALIZE_MASK) +
                                      ip->obj_localization;
        cursor.key_end.obj_id = reblock->key_end.obj_id;
        cursor.key_end.key = HAMMER_MAX_KEY;
index 98bdcc1..8b4eb35 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.86 2008/07/11 05:44:23 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.87 2008/07/12 02:47:39 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -1349,7 +1349,9 @@ hammer_vop_readlink(struct vop_readlink_args *ap)
        /*
         * Shortcut if the symlink data was stuffed into ino_data.
         *
-        * Also expand special "@@PFS%05d" softlinks.
+        * Also expand special "@@PFS%05d" softlinks (expansion only
+        * occurs for non-historical (current) accesses made from the
+        * primary filesystem).
         */
        if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
                char *ptr;
@@ -1357,7 +1359,10 @@ hammer_vop_readlink(struct vop_readlink_args *ap)
 
                ptr = ip->ino_data.ext.symlink;
                bytes = (int)ip->ino_data.size;
-               if (bytes == 10 && strncmp(ptr, "@@PFS", 5) == 0) {
+               if (bytes == 10 &&
+                   ip->obj_asof == HAMMER_MAX_TID &&
+                   ip->obj_localization == 0 &&
+                   strncmp(ptr, "@@PFS", 5) == 0) {
                        hammer_simple_transaction(&trans, ip->hmp);
                        bcopy(ptr + 5, buf, 5);
                        buf[5] = 0;