HAMMER 61E/Many: Stabilization, Performance
authorMatthew Dillon <dillon@dragonflybsd.org>
Sun, 13 Jul 2008 09:32:48 +0000 (09:32 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Sun, 13 Jul 2008 09:32:48 +0000 (09:32 +0000)
* PERFORMANCE: hammer_sync_inode() was generating a new transaction id
  for each inode, causing hammer_btree_do_propagation() to have to
  modify B-Tree nodes all the way to root on a per-file basis when
  syncing a rm -rf.

  Change the code to use the flusher's transaction id so all inodes
  bundled into the same flush group use the same transaction id.

* BUG FIX:  The reblocker was able to blow out the buffer cache with
  dirty data buffers.  Even though HAMMER allows these buffers to be
  flushed to the disk at any time by the kernel, calls to bwillwrite()
  are still needed to prevent a buffer cache deadlock.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_reblock.c

index 4c8d74a..0fee1f1 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.115 2008/07/13 01:12:41 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.116 2008/07/13 09:32:48 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -1032,7 +1032,7 @@ void hammer_rel_inode(hammer_inode_t ip, int flush);
 int hammer_reload_inode(hammer_inode_t ip, void *arg __unused);
 int hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2);
 
-int hammer_sync_inode(hammer_inode_t ip);
+int hammer_sync_inode(hammer_transaction_t trans, hammer_inode_t ip);
 void hammer_test_inode(hammer_inode_t ip);
 void hammer_inode_unloadable_check(hammer_inode_t ip, int getvp);
 
index 3966dad..93eecbd 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.70 2008/07/11 01:22:29 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.71 2008/07/13 09:32:48 dillon Exp $
  */
 
 /*
@@ -2292,6 +2292,12 @@ hammer_btree_mirror_propagate(hammer_cursor_t cursor, hammer_tid_t mirror_tid)
                                   sizeof(elm->mirror_tid));
                elm->mirror_tid = mirror_tid;
                hammer_modify_node_done(node);
+               if (hammer_debug_general & 0x0002) {
+                       kprintf("mirror_propagate: propagate "
+                               "%016llx @%016llx:%d\n",
+                               mirror_tid, node->node_offset, cursor->index);
+               }
+
 
                /*
                 * Adjust the node's mirror_tid aggregator
@@ -2301,6 +2307,11 @@ hammer_btree_mirror_propagate(hammer_cursor_t cursor, hammer_tid_t mirror_tid)
                hammer_modify_node_field(cursor->trans, node, mirror_tid);
                node->ondisk->mirror_tid = mirror_tid;
                hammer_modify_node_done(node);
+               if (hammer_debug_general & 0x0002) {
+                       kprintf("mirror_propagate: propagate "
+                               "%016llx @%016llx\n",
+                               mirror_tid, node->node_offset);
+               }
        }
        if (error == ENOENT)
                error = 0;
index d614c1b..aad7b3c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.38 2008/07/13 01:12:41 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.39 2008/07/13 09:32:48 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -264,6 +264,8 @@ hammer_flusher_flush(hammer_mount_t hmp)
                                flg->total_count, flg->refs);
                }
                hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
+               if (hammer_debug_general & 0x0001)
+                       kprintf("T");
 
                /*
                 * If the previous flush cycle just about exhausted our
@@ -455,7 +457,7 @@ hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
        int error;
 
        hammer_flusher_clean_loose_ios(hmp);
-       error = hammer_sync_inode(ip);
+       error = hammer_sync_inode(trans, ip);
        if (error != EWOULDBLOCK)
                ip->error = error;
        hammer_flush_inode_done(ip);
index cb5452a..05e1c0f 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.101 2008/07/12 23:04:50 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.102 2008/07/13 09:32:48 dillon Exp $
  */
 
 #include "hammer.h"
@@ -2206,9 +2206,8 @@ done:
  * XXX error handling
  */
 int
-hammer_sync_inode(hammer_inode_t ip)
+hammer_sync_inode(hammer_transaction_t trans, hammer_inode_t ip)
 {
-       struct hammer_transaction trans;
        struct hammer_cursor cursor;
        hammer_node_t tmp_node;
        hammer_record_t depend;
@@ -2219,8 +2218,7 @@ hammer_sync_inode(hammer_inode_t ip)
        if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
                return(0);
 
-       hammer_start_transaction_fls(&trans, ip->hmp);
-       error = hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
+       error = hammer_init_cursor(trans, &cursor, &ip->cache[1], ip);
        if (error)
                goto done;
 
@@ -2405,24 +2403,24 @@ hammer_sync_inode(hammer_inode_t ip)
                         * copy of the inode record.  The DELETED flag handles
                         * this, do not set RDIRTY.
                         */
-                       ip->ino_leaf.base.delete_tid = trans.tid;
-                       ip->sync_ino_leaf.base.delete_tid = trans.tid;
-                       ip->ino_leaf.delete_ts = trans.time32;
-                       ip->sync_ino_leaf.delete_ts = trans.time32;
+                       ip->ino_leaf.base.delete_tid = trans->tid;
+                       ip->sync_ino_leaf.base.delete_tid = trans->tid;
+                       ip->ino_leaf.delete_ts = trans->time32;
+                       ip->sync_ino_leaf.delete_ts = trans->time32;
 
 
                        /*
                         * Adjust the inode count in the volume header
                         */
-                       hammer_sync_lock_sh(&trans);
+                       hammer_sync_lock_sh(trans);
                        if (ip->flags & HAMMER_INODE_ONDISK) {
-                               hammer_modify_volume_field(&trans,
-                                                          trans.rootvol,
+                               hammer_modify_volume_field(trans,
+                                                          trans->rootvol,
                                                           vol0_stat_inodes);
                                --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
-                               hammer_modify_volume_done(trans.rootvol);
+                               hammer_modify_volume_done(trans->rootvol);
                        }
-                       hammer_sync_unlock(&trans);
+                       hammer_sync_unlock(trans);
                } else {
                        Debugger("hammer_ip_delete_clean errored");
                }
@@ -2486,10 +2484,10 @@ defer_buffer_flush:
                 * Also set the create_tid in both the frontend and backend
                 * copy of the inode record.
                 */
-               ip->ino_leaf.base.create_tid = trans.tid;
-               ip->ino_leaf.create_ts = trans.time32;
-               ip->sync_ino_leaf.base.create_tid = trans.tid;
-               ip->sync_ino_leaf.create_ts = trans.time32;
+               ip->ino_leaf.base.create_tid = trans->tid;
+               ip->ino_leaf.create_ts = trans->time32;
+               ip->sync_ino_leaf.base.create_tid = trans->tid;
+               ip->sync_ino_leaf.create_ts = trans->time32;
                ip->sync_flags |= HAMMER_INODE_DDIRTY;
                break;
        }
@@ -2521,7 +2519,6 @@ done:
         * do not improperly reuse it.
         */
        hammer_done_cursor(&cursor);
-       hammer_done_transaction(&trans);
        return(error);
 }
 
index a7e733f..f0988e8 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.88 2008/07/12 23:04:50 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.89 2008/07/13 09:32:48 dillon Exp $
  */
 
 #include "hammer.h"
@@ -1054,6 +1054,7 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record)
        if (record->type == HAMMER_MEM_RECORD_DEL) {
                error = hammer_btree_lookup(cursor);
                if (error == 0) {
+                       /* XXX iprec? */
                        error = hammer_ip_delete_record(cursor, record->ip,
                                                        trans->tid);
                        if (error == 0) {
@@ -1989,7 +1990,7 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
 
        /*
         * Adjust the delete_tid.  Update the mirror_tid propagation field
-        * as well.
+        * as well.  delete_tid can be 0 (undelete -- used by mirroring).
         */
        if (delete_flags & HAMMER_DELETE_ADJUST) {
                if (elm->base.rec_type == HAMMER_RECTYPE_INODE) {
@@ -2009,6 +2010,12 @@ hammer_delete_at_cursor(hammer_cursor_t cursor, int delete_flags,
                        node->ondisk->mirror_tid = elm->leaf.base.delete_tid;
                        hammer_modify_node_done(node);
                        doprop = 1;
+                       if (hammer_debug_general & 0x0002) {
+                               kprintf("delete_at_cursor: propagate %016llx"
+                                       " @%016llx\n",
+                                       elm->leaf.base.delete_tid,
+                                       node->node_offset);
+                       }
                }
 
                /*
index 2b3418f..c62892c 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.30 2008/07/13 01:12:41 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.31 2008/07/13 09:32:48 dillon Exp $
  */
 /*
  * HAMMER reblocker - This code frees up fragmented physical space
@@ -145,6 +145,18 @@ retry:
                        seq = hammer_flusher_async(trans->hmp, NULL);
                }
 
+               /*
+                * We allocate data buffers, which atm we don't track
+                * dirty levels for because we allow the kernel to write
+                * them.  But if we allocate too many we can still deadlock
+                * the buffer cache.
+                */
+               if (bd_heatup()) {
+                       hammer_unlock_cursor(&cursor, 0);
+                       bwillwrite(HAMMER_BUFSIZE);
+                       hammer_lock_cursor(&cursor, 0);
+               }
+
                /*
                 * Acquiring the sync_lock prevents the operation from
                 * crossing a synchronization boundary.