kernel - Fix excessive mbuf use in nfs_realign()
[dragonfly.git] / sys / vfs / hammer / hammer_pfs.c
index cf4f28e..6699a53 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.1 2008/07/12 02:47:39 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $
  */
 /*
  * HAMMER PFS ioctls - Manage pseudo-fs configurations
@@ -74,11 +74,15 @@ hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 
        /*
         * If the PFS is a master the sync tid is set by normal operation
-        * rather then the mirroring code, and will always track the
+        * rather than the mirroring code, and will always track the
         * real HAMMER filesystem.
+        *
+        * We use flush_tid1, which is the highest fully committed TID.
+        * flush_tid2 is the TID most recently flushed, but the UNDO hasn't
+        * caught up to it yet so a crash will roll us back to flush_tid1.
         */
-       if (pfsm->pfsd.master_id >= 0)
-               pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid;
+       if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0)
+               pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1;
 
        /*
         * Copy out to userland.
@@ -121,11 +125,21 @@ hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                /*
                 * Save it back, create a root inode if we are in master
                 * mode and no root exists.
+                *
+                * We do not create root inodes for slaves, the root inode
+                * must be mirrored from the master.
                 */
-               if (error == 0)
+               if (error == 0 &&
+                   (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
                        error = hammer_mkroot_pseudofs(trans, cred, pfsm);
+               }
                if (error == 0)
                        error = hammer_save_pseudofs(trans, pfsm);
+
+               /*
+                * Wakeup anyone waiting for a TID update for this PFS
+                */
+               wakeup(&pfsm->pfsd.sync_end_tid);
                hammer_rel_pseudofs(trans->hmp, pfsm);
        }
        return(error);
@@ -158,10 +172,6 @@ hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
         * A master id must be set when upgrading
         */
        pfsm = hammer_load_pseudofs(trans, localization, &error);
-       if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0 &&
-           pfsm->pfsd.master_id < 0) {
-               error = EINVAL;
-       }
        if (error == 0) {
                if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) {
                        error = hammer_pfs_rollback(trans, pfsm,
@@ -183,15 +193,12 @@ hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
 /*
  * Downgrade a master to a slave
  *
- * This is really easy to do, just set the SLAVE flag.  The master_id is
- * left intact.
+ * This is really easy to do, just set the SLAVE flag and update sync_end_tid.
  *
- * We also leave sync_end_tid intact... the field is not used in master
- * mode (vol0_next_tid overrides it), but if someone switches to master
- * mode accidently and then back to slave mode we don't want it to change.  
- * Eventually it will be used as the cross-synchronization TID in
- * multi-master mode, and we don't want to mess with it for that feature
- * either.
+ * We previously did not update sync_end_tid in consideration for a slave
+ * upgraded to a master and then downgraded again, but this completely breaks
+ * the case where one starts with a master and then downgrades to a slave,
+ * then upgrades again.
  *
  * NOTE: The ip used for ioctl is not necessarily related to the PFS
  */
@@ -199,6 +206,7 @@ int
 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
                        struct hammer_ioc_pseudofs_rw *pfs)
 {
+       hammer_mount_t hmp = trans->hmp;
        hammer_pseudofs_inmem_t pfsm;
        u_int32_t localization;
        int error;
@@ -213,6 +221,8 @@ hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
        if (error == 0) {
                if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) {
                        pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE;
+                       if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1)
+                               pfsm->pfsd.sync_end_tid = hmp->flush_tid1;
                        error = hammer_save_pseudofs(trans, pfsm);
                }
        }
@@ -260,6 +270,48 @@ hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
        return(error);
 }
 
+/*
+ * Wait for the PFS to sync past the specified TID
+ */
+int
+hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip,
+                        struct hammer_ioc_pseudofs_rw *pfs)
+{
+       hammer_pseudofs_inmem_t pfsm;
+       struct hammer_pseudofs_data pfsd;
+       u_int32_t localization;
+       hammer_tid_t tid;
+       void *waitp;
+       int error;
+
+       if ((error = hammer_pfs_autodetect(pfs, ip)) != 0)
+               return(error);
+       localization = (u_int32_t)pfs->pfs_id << 16;
+
+       if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0)
+               return(error);
+
+       pfsm = hammer_load_pseudofs(trans, localization, &error);
+       if (error == 0) {
+               if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) {
+                       tid = pfsm->pfsd.sync_end_tid;
+                       waitp = &pfsm->pfsd.sync_end_tid;
+               } else {
+                       tid = trans->hmp->flush_tid1;
+                       waitp = &trans->hmp->flush_tid1;
+               }
+               if (tid <= pfsd.sync_end_tid)
+                       tsleep(waitp, PCATCH, "hmrmwt", 0);
+       }
+       hammer_rel_pseudofs(trans->hmp, pfsm);
+       if (error == EINTR) {
+               pfs->head.flags |= HAMMER_IOC_HEAD_INTR;
+               error = 0;
+       }
+       return(error);
+}
+
+
 /*
  * Auto-detect the pseudofs and do basic bounds checking.
  */
@@ -302,6 +354,7 @@ hammer_pfs_rollback(hammer_transaction_t trans,
        struct hammer_cursor cursor;
        struct hammer_base_elm key_cur;
        int error;
+       int seq;
 
        bzero(&cmirror, sizeof(cmirror));
        bzero(&key_cur, sizeof(key_cur));
@@ -311,6 +364,8 @@ hammer_pfs_rollback(hammer_transaction_t trans,
        key_cur.create_tid = 1;
        key_cur.rec_type = HAMMER_MIN_RECTYPE;
 
+       seq = trans->hmp->flusher.act;
+
 retry:
        error = hammer_init_cursor(trans, &cursor, NULL, NULL);
        if (error) {
@@ -353,12 +408,23 @@ retry:
                 * We only care about leafs.  Internal nodes can be returned
                 * in mirror-filtered mode (they are used to generate SKIP
                 * mrecords), but we don't need them for this code.
+                *
+                * WARNING: See warnings in hammer_unlock_cursor() function.
                 */
+               cursor.flags |= HAMMER_CURSOR_ATEDISK;
                if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) {
                        key_cur = cursor.node->ondisk->elms[cursor.index].base;
                        error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid);
                }
 
+               while (hammer_flusher_meta_halflimit(trans->hmp) ||
+                      hammer_flusher_undo_exhausted(trans, 2)) {
+                       hammer_unlock_cursor(&cursor);
+                       hammer_flusher_wait(trans->hmp, seq);
+                       hammer_lock_cursor(&cursor);
+                       seq = hammer_flusher_async_one(trans->hmp);
+               }
+
                if (error == 0)
                        error = hammer_btree_iterate(&cursor);
        }