HAMMER 61A/Many: Stabilization
authorMatthew Dillon <dillon@dragonflybsd.org>
Thu, 10 Jul 2008 21:23:58 +0000 (21:23 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Thu, 10 Jul 2008 21:23:58 +0000 (21:23 +0000)
* Fix a buffer exhaustion issue.  When creating large numbers of empty files
  a single inode sync of the directory can exhaust the buffer cache because
  not enough other things get dirty enough to force a flush.

  Put a check in the record syncing code and do a partial finalization if
  necessary to avoid deadlocking the buffer cache.

* Fix a panic caused by a missing call to hammer_cursor_deleted_element().

Reported-by: Michael Neumann <mneumann@ntecs.de>,
     Gergo Szakal <bastyaelvtars@gmail.com>

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_vfsops.c

index ea1070d..110530d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.109 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.110 2008/07/10 21:23:58 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -1086,6 +1086,7 @@ void hammer_flusher_async(hammer_mount_t hmp);
 int  hammer_flusher_meta_limit(hammer_mount_t hmp);
 int  hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter);
 void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
+void hammer_flusher_finalize(hammer_transaction_t trans, int final);
 
 int hammer_recover(hammer_mount_t hmp, hammer_volume_t rootvol);
 void hammer_recover_flush_buffers(hammer_mount_t hmp,
index 495702f..6fb8d1f 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.68 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.69 2008/07/10 21:23:58 dillon Exp $
  */
 
 /*
@@ -2185,6 +2185,7 @@ btree_remove(hammer_cursor_t cursor)
                      (ondisk->count - cursor->parent_index) * esize);
                --ondisk->count;
                hammer_modify_node_done(parent);
+               hammer_cursor_deleted_element(parent, cursor->parent_index);
                hammer_flush_node(node);
                hammer_delete_node(cursor->trans, node);
 
index 653d539..ca77e75 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.33 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.34 2008/07/10 21:23:58 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -47,7 +47,6 @@ static void hammer_flusher_slave_thread(void *arg);
 static void hammer_flusher_flush(hammer_mount_t hmp);
 static void hammer_flusher_flush_inode(hammer_inode_t ip,
                                        hammer_transaction_t trans);
-static void hammer_flusher_finalize(hammer_transaction_t trans, int final);
 
 /*
  * Support structures for the flusher threads.
@@ -296,11 +295,8 @@ hammer_flusher_flush(hammer_mount_t hmp)
         * before actually digging into a new cycle, or the new cycle will
         * not have sufficient undo space.
         */
-       if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3)) {
-               hammer_lock_ex(&hmp->flusher.finalize_lock);
+       if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3))
                hammer_flusher_finalize(&hmp->flusher.trans, 0);
-               hammer_unlock(&hmp->flusher.finalize_lock);
-       }
 
        /*
         * Start work threads.
@@ -366,20 +362,10 @@ hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
        while (hmp->flusher.finalize_want)
                tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0);
        if (hammer_flusher_undo_exhausted(trans, 1)) {
-               hmp->flusher.finalize_want = 1;
-               hammer_lock_ex(&hmp->flusher.finalize_lock);
                kprintf("HAMMER: Warning: UNDO area too small!\n");
                hammer_flusher_finalize(trans, 1);
-               hammer_unlock(&hmp->flusher.finalize_lock);
-               hmp->flusher.finalize_want = 0;
-               wakeup(&hmp->flusher.finalize_want);
        } else if (hammer_flusher_meta_limit(trans->hmp)) {
-               hmp->flusher.finalize_want = 1;
-               hammer_lock_ex(&hmp->flusher.finalize_lock);
                hammer_flusher_finalize(trans, 0);
-               hammer_unlock(&hmp->flusher.finalize_lock);
-               hmp->flusher.finalize_want = 0;
-               wakeup(&hmp->flusher.finalize_want);
        }
 }
 
@@ -417,8 +403,11 @@ hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter)
  * If this is the last finalization in a flush group we also synchronize
  * our cached blockmap and set hmp->flusher_undo_start and our cached undo
  * fifo first_offset so the next flush resets the FIFO pointers.
+ *
+ * If this is not final it is being called because too many dirty meta-data
+ * buffers have built up and must be flushed with UNDO synchronization to
+ * avoid a buffer cache deadlock.
  */
-static
 void
 hammer_flusher_finalize(hammer_transaction_t trans, int final)
 {
@@ -432,6 +421,21 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
        hmp = trans->hmp;
        root_volume = trans->rootvol;
 
+       /*
+        * Exclusively lock the flusher.  This guarantees that all dirty
+        * buffers will be idled (have a mod-count of 0).
+        */
+       ++hmp->flusher.finalize_want;
+       hammer_lock_ex(&hmp->flusher.finalize_lock);
+
+       /*
+        * If this isn't the final sync several threads may have hit the
+        * meta-limit at the same time and raced.  Only sync if we really
+        * have to, after acquiring the lock.
+        */
+       if (final == 0 && !hammer_flusher_meta_limit(hmp))
+               goto done;
+
        /*
         * Flush data buffers.  This can occur asynchronously and at any
         * time.  We must interlock against the frontend direct-data write
@@ -573,6 +577,11 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
        }
 
        hammer_sync_unlock(trans);
+
+done:
+       hammer_unlock(&hmp->flusher.finalize_lock);
+       if (--hmp->flusher.finalize_want == 0)
+               wakeup(&hmp->flusher.finalize_want);
 }
 
 /*
index e264db0..a1b158b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.97 2008/07/10 04:44:33 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.98 2008/07/10 21:23:58 dillon Exp $
  */
 
 #include "hammer.h"
@@ -1939,6 +1939,7 @@ hammer_sync_record_callback(hammer_record_t record, void *data)
 {
        hammer_cursor_t cursor = data;
        hammer_transaction_t trans = cursor->trans;
+       hammer_mount_t hmp = trans->hmp;
        int error;
 
        /*
@@ -2072,6 +2073,21 @@ hammer_sync_record_callback(hammer_record_t record, void *data)
        }
 done:
        hammer_flush_record_done(record, error);
+
+       /*
+        * Do partial finalization if we have built up too many dirty
+        * buffers.  Otherwise a buffer cache deadlock can occur when
+        * doing things like creating tens of thousands of tiny files.
+        *
+        * The finalization lock is already being held by virtue of the
+        * flusher calling us.
+        */
+        if (hammer_flusher_meta_limit(hmp)) {
+               hammer_unlock(&hmp->flusher.finalize_lock);
+                hammer_flusher_finalize(trans, 0);
+               hammer_lock_sh(&hmp->flusher.finalize_lock);
+       }
+
        return(error);
 }
 
@@ -2223,11 +2239,7 @@ hammer_sync_inode(hammer_inode_t ip)
 
        /*
         * Now sync related records.  These will typically be directory
-        * entries or delete-on-disk records.
-        *
-        * Not all records will be flushed, but clear XDIRTY anyway.  We
-        * will set it again in the frontend hammer_flush_inode_done() 
-        * if records remain.
+        * entries, records tracking direct-writes, or delete-on-disk records.
         */
        if (error == 0) {
                tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
index 1a3a504..1bfbc98 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.60 2008/07/07 00:24:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.61 2008/07/10 21:23:58 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -760,7 +760,6 @@ hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
         * unlocked while we manipulate the related vnode to avoid a
         * deadlock.
         */
-       kprintf("localization %08x\n", localization);
        ip = hammer_get_inode(&trans, NULL, info.obj_id,
                              info.obj_asof, localization, 0, &error);
        if (ip == NULL) {