HAMMER 53H/Many: Performance tuning, bug fixes
authorMatthew Dillon <dillon@dragonflybsd.org>
Tue, 10 Jun 2008 22:30:21 +0000 (22:30 +0000)
committerMatthew Dillon <dillon@dragonflybsd.org>
Tue, 10 Jun 2008 22:30:21 +0000 (22:30 +0000)
* CHANGE THE ON-MEDIA B-TREE STRUCTURE.  The number of elements per node has
  been increased from 16 to 64.  The intent is to reduce the number of seeks
  required in a heavy random-access loading situation.

* Add a shortcut to the B-Tree node scanning code (requires more testing).
  Instead of scanning linearly we do a power-of-2 narrowing search.

* Only do clustered reads for DATA types.  Do not cluster meta-data (aka
  B-Tree) I/O.  Note that the inode data structure is considered to be
  a DATA type.  Reduce the cluster read side from 256K to 64K to avoid
  blowing out the buffer cache.

* Augment hammer locks so one can discern between a normal lock blockage
  and one that is recovering from a deadlock.

* Change the slave work threads for the flusher to pull their work off a
  single queue.  This fixes an issue where one slave work thread would
  sometimes get a disproportionate percentage of the work and the
  master thread then had to wait for it to finish while the other work
  threads were twiddling their thumbs.

* Adjust the wait reclaims code to solve a long standing performance issue.
  The flusher could get so far behind that the system's buffer cache buffers
  would no longer have any locality of reference to what was being flushed,
  causing a massive drop in performance.

* Do not queue a dirty inode to the flusher unconditionally in the strategy
  write code.  Only do it if system resources appear to be stressed.
  The inode will get flushed when the filesystem syncs.

* Code cleanup.

* Fix a bug reported by Antonio Huete Jimenez related to 0-length writes
  not working properly.

13 files changed:
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_btree.c
sys/vfs/hammer/hammer_btree.h
sys/vfs/hammer/hammer_cursor.c
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_inode.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_object.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_recover.c
sys/vfs/hammer/hammer_subs.c
sys/vfs/hammer/hammer_vfsops.c
sys/vfs/hammer/hammer_vnops.c

index 8b93794..3e019b0 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.79 2008/06/10 08:51:01 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.80 2008/06/10 22:30:21 dillon Exp $
  */
 /*
  * This header file contains structures used internally by the HAMMERFS
@@ -270,8 +270,8 @@ typedef struct hammer_inode *hammer_inode_t;
 #define HAMMER_FLUSH_RECURSION 0x0002
 
 #define HAMMER_RECLAIM_MIN     1000    /* absolute value */
-#define HAMMER_RECLAIM_SLOPCT  20      /* percent of total hammer inodes */
-#define HAMMER_RECLAIM_MAXPCT  50      /* percent of total hammer inodes */
+#define HAMMER_RECLAIM_MID     2000    /* absolute value */
+#define HAMMER_RECLAIM_MAX     3000    /* absolute value */
 
 /*
  * Structure used to represent an unsynchronized record in-memory.  These
@@ -408,6 +408,13 @@ struct hammer_io {
 
 typedef struct hammer_io *hammer_io_t;
 
+#define HAMMER_CLUSTER_SIZE    (64 * 1024)
+#if HAMMER_CLUSTER_SIZE > MAXBSIZE
+#undef  HAMMER_CLUSTER_SIZE
+#define HAMMER_CLUSTER_SIZE    MAXBSIZE
+#endif
+#define HAMMER_CLUSTER_BUFS    (HAMMER_CLUSTER_SIZE / HAMMER_BUFSIZE)
+
 /*
  * In-memory volume representing on-disk buffer
  */
@@ -566,17 +573,7 @@ struct hammer_undo {
 
 typedef struct hammer_undo *hammer_undo_t;
 
-/*
- * Support structures for the flusher threads.
- */
-struct hammer_flusher_info {
-       struct hammer_mount *hmp;
-       TAILQ_HEAD(, hammer_inode) work_list;
-       thread_t        td;
-       int             running;
-};
-
-typedef struct hammer_flusher_info *hammer_flusher_info_t;
+struct hammer_flusher_info;
 
 struct hammer_flusher {
        int             signal;         /* flusher thread sequencer */
@@ -623,6 +620,7 @@ struct hammer_mount {
 
        int     inode_reclaims; /* inodes pending reclaim by flusher */
        int     count_inodes;   /* total number of inodes */
+       int     count_iqueued;  /* inodes queued to flusher */
 
        struct hammer_flusher flusher;
 
@@ -687,6 +685,7 @@ extern int hammer_debug_recover;
 extern int hammer_debug_recover_faults;
 extern int hammer_debug_write_release;
 extern int hammer_count_inodes;
+extern int hammer_count_iqueued;
 extern int hammer_count_reclaiming;
 extern int hammer_count_records;
 extern int hammer_count_record_datas;
@@ -696,6 +695,7 @@ extern int hammer_count_nodes;
 extern int hammer_count_dirtybufs;
 extern int hammer_count_reservations;
 extern int hammer_limit_dirtybufs;
+extern int hammer_limit_iqueued;
 extern int hammer_limit_irecs;
 extern int hammer_limit_recs;
 extern int hammer_bio_count;
@@ -712,8 +712,6 @@ struct hammer_inode *hammer_get_inode(hammer_transaction_t trans,
                        int *errorp);
 void   hammer_put_inode(struct hammer_inode *ip);
 void   hammer_put_inode_ref(struct hammer_inode *ip);
-void   hammer_inode_waitreclaims(hammer_mount_t hmp);
-void   hammer_inode_wakereclaims(hammer_mount_t hmp);
 
 int    hammer_unload_volume(hammer_volume_t volume, void *data __unused);
 int    hammer_adjust_volume_mode(hammer_volume_t volume, void *data __unused);
@@ -737,7 +735,7 @@ int hammer_queue_inodes_flusher(hammer_mount_t hmp, int waitfor);
 hammer_record_t
        hammer_alloc_mem_record(hammer_inode_t ip, int data_len);
 void   hammer_flush_record_done(hammer_record_t record, int error);
-void   hammer_wait_mem_record(hammer_record_t record);
+void   hammer_wait_mem_record_ident(hammer_record_t record, const char *ident);
 void   hammer_rel_mem_record(hammer_record_t record);
 
 int    hammer_cursor_up(hammer_cursor_t cursor);
@@ -747,7 +745,7 @@ int hammer_cursor_upgrade(hammer_cursor_t cursor);
 void   hammer_cursor_downgrade(hammer_cursor_t cursor);
 int    hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node,
                        int index);
-void   hammer_lock_ex(struct hammer_lock *lock);
+void   hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident);
 int    hammer_lock_ex_try(struct hammer_lock *lock);
 void   hammer_lock_sh(struct hammer_lock *lock);
 int    hammer_lock_sh_try(struct hammer_lock *lock);
@@ -902,10 +900,12 @@ int  hammer_create_inode(struct hammer_transaction *trans, struct vattr *vap,
                        struct hammer_inode **ipp);
 void hammer_rel_inode(hammer_inode_t ip, int flush);
 int hammer_reload_inode(hammer_inode_t ip, void *arg __unused);
+int hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2);
 
 int hammer_sync_inode(hammer_inode_t ip);
 void hammer_test_inode(hammer_inode_t ip);
 void hammer_inode_unloadable_check(hammer_inode_t ip, int getvp);
+void hammer_inode_waitreclaims(hammer_mount_t hmp);
 
 int  hammer_ip_add_directory(struct hammer_transaction *trans,
                        hammer_inode_t dip, struct namecache *ncp,
@@ -939,6 +939,7 @@ void hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset);
 void hammer_io_release(struct hammer_io *io, int flush);
 void hammer_io_flush(struct hammer_io *io);
 void hammer_io_waitdep(struct hammer_io *io);
+void hammer_io_wait_all(hammer_mount_t hmp, const char *ident);
 int hammer_io_direct_read(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
                          struct bio *bio);
 int hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf,
@@ -983,6 +984,18 @@ void hkprintf(const char *ctl, ...);
 #endif
 
 static __inline void
+hammer_wait_mem_record(hammer_record_t record)
+{
+       hammer_wait_mem_record_ident(record, "hmmwai");
+}
+
+static __inline void
+hammer_lock_ex(struct hammer_lock *lock)
+{
+       hammer_lock_ex_ident(lock, "hmrlck");
+}
+
+static __inline void
 hammer_modify_node_noundo(hammer_transaction_t trans, hammer_node_t node)
 {
        hammer_modify_buffer(trans, node->buffer, NULL, 0);
index 722b8e5..e54b11f 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.50 2008/06/07 07:41:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.51 2008/06/10 22:30:21 dillon Exp $
  */
 
 /*
@@ -83,6 +83,8 @@
 #include <sys/buf2.h>
 
 static int btree_search(hammer_cursor_t cursor, int flags);
+static int hammer_btree_search_node(hammer_base_elm_t elm,
+                       hammer_node_ondisk_t node);
 static int btree_split_internal(hammer_cursor_t cursor);
 static int btree_split_leaf(hammer_cursor_t cursor);
 static int btree_remove(hammer_cursor_t cursor);
@@ -942,7 +944,13 @@ btree_search(hammer_cursor_t cursor, int flags)
                                cursor->node->node_offset,
                                node->count);
                }
-               for (i = 0; i <= node->count; ++i) {
+
+               /*
+                * Try to shortcut the search before dropping into the
+                * linear loop.  Locate the first node where r <= 1.
+                */
+               i = hammer_btree_search_node(&cursor->key_beg, node);
+               while (i <= node->count) {
                        elm = &node->elms[i];
                        r = hammer_btree_cmp(&cursor->key_beg, &elm->base);
                        if (hammer_debug_btree > 2) {
@@ -956,6 +964,7 @@ btree_search(hammer_cursor_t cursor, int flags)
                                cursor->create_check = elm->base.create_tid - 1;
                                cursor->flags |= HAMMER_CURSOR_CREATE_CHECK;
                        }
+                       ++i;
                }
                if (hammer_debug_btree) {
                        kprintf("SEARCH-I preI=%d/%d r=%d\n",
@@ -1140,7 +1149,12 @@ btree_search(hammer_cursor_t cursor, int flags)
                        node->count);
        }
 
-       for (i = 0; i < node->count; ++i) {
+       /*
+        * Try to shortcut the search before dropping into the
+        * linear loop.  Locate the first node where r <= 1.
+        */
+       i = hammer_btree_search_node(&cursor->key_beg, node);
+       while (i < node->count) {
                elm = &node->elms[i];
 
                r = hammer_btree_cmp(&cursor->key_beg, &elm->leaf.base);
@@ -1158,8 +1172,10 @@ btree_search(hammer_cursor_t cursor, int flags)
 
                if (r < 0)
                        goto failed;
-               if (r > 1)
+               if (r > 1) {
+                       ++i;
                        continue;
+               }
 
                /*
                 * Check our as-of timestamp against the element.
@@ -1167,12 +1183,15 @@ btree_search(hammer_cursor_t cursor, int flags)
                if (flags & HAMMER_CURSOR_ASOF) {
                        if (hammer_btree_chkts(cursor->asof,
                                               &node->elms[i].base) != 0) {
+                               ++i;
                                continue;
                        }
                        /* success */
                } else {
-                       if (r > 0)      /* can only be +1 */
+                       if (r > 0) {    /* can only be +1 */
+                               ++i;
                                continue;
+                       }
                        /* success */
                }
                cursor->index = i;
@@ -1228,6 +1247,36 @@ done:
        return(error);
 }
 
+/*
+ * Heuristical search for the first element whos comparison is <= 1.  May
+ * return an index whos compare result is > 1 but may only return an index
+ * whos compare result is <= 1 if it is the first element with that result.
+ */
+static int
+hammer_btree_search_node(hammer_base_elm_t elm, hammer_node_ondisk_t node)
+{
+       int b;
+       int s;
+       int i;
+       int r;
+
+       /*
+        * Don't bother if the node does not have very many elements
+        */
+       b = 0;
+       s = node->count;
+       while (s - b > 4) {
+               i = b + (s - b) / 2;
+               r = hammer_btree_cmp(elm, &node->elms[i].leaf.base);
+               if (r <= 1) {
+                       s = i;
+               } else {
+                       b = i;
+               }
+       }
+       return(b);
+}
+
 
 /************************************************************************
  *                        SPLITTING AND MERGING                        *
index 2065a0e..72054b6 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.16 2008/05/18 01:48:50 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.17 2008/06/10 22:30:21 dillon Exp $
  */
 
 /*
@@ -164,15 +164,20 @@ union hammer_btree_elm {
 typedef union hammer_btree_elm *hammer_btree_elm_t;
 
 /*
- * B-Tree node (normal or meta)        (16x64 = 1K structure)
+ * B-Tree node (normal or meta)        (64x64 = 4K structure)
  *
- * Each node contains 15 elements.  The last element for an internal node
+ * Each node contains 63 elements.  The last element for an internal node
  * is the right-boundary so internal nodes have one fewer logical elements
  * then leaf nodes.
  *
  * 'count' always refers to the number of elements and is non-inclusive of
  * the right-hand boundary for an internal node.
  *
+ * The use of a fairly large radix is designed to reduce the number of
+ * discrete disk accesses required to locate something.  Keep in mind
+ * that nodes are allocated out of 16K hammer buffers so supported values
+ * are (256-1), (128-1), (64-1), (32-1), or (16-1).
+ *
  * NOTE: The node head for an internal does not contain the subtype
  * (The B-Tree node type for the nodes referenced by its elements). 
  * Instead, each element specifies the subtype (elm->base.subtype).
@@ -183,7 +188,7 @@ typedef union hammer_btree_elm *hammer_btree_elm_t;
  * reserved for left/right leaf linkage fields, flags, and other future
  * features.
  */
-#define HAMMER_BTREE_LEAF_ELMS 15
+#define HAMMER_BTREE_LEAF_ELMS 63
 #define HAMMER_BTREE_INT_ELMS  (HAMMER_BTREE_LEAF_ELMS - 1)
 
 /*
index 36e17e0..7096d95 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.26 2008/05/13 20:46:55 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.27 2008/06/10 22:30:21 dillon Exp $
  */
 
 /*
@@ -203,13 +203,13 @@ hammer_done_cursor(hammer_cursor_t cursor)
         * lock/unlock to wait for the deadlock condition to clear.
         */
        if (cursor->deadlk_node) {
-               hammer_lock_ex(&cursor->deadlk_node->lock);
+               hammer_lock_ex_ident(&cursor->deadlk_node->lock, "hmrdlk");
                hammer_unlock(&cursor->deadlk_node->lock);
                hammer_rel_node(cursor->deadlk_node);
                cursor->deadlk_node = NULL;
        }
        if (cursor->deadlk_rec) {
-               hammer_wait_mem_record(cursor->deadlk_rec);
+               hammer_wait_mem_record_ident(cursor->deadlk_rec, "hmmdlk");
                hammer_rel_mem_record(cursor->deadlk_rec);
                cursor->deadlk_rec = NULL;
        }
index fda28b6..ad46677 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.23 2008/06/10 08:51:01 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.24 2008/06/10 22:30:21 dillon Exp $
  */
 /*
  * HAMMER dependancy flusher thread
@@ -51,7 +51,17 @@ static void hammer_flusher_flush_inode(hammer_inode_t ip,
 static int hammer_must_finalize_undo(hammer_mount_t hmp);
 static void hammer_flusher_finalize(hammer_transaction_t trans, int final);
 
-#define HAMMER_FLUSHER_IMMEDIATE       16
+/*
+ * Support structures for the flusher threads.
+ */
+struct hammer_flusher_info {
+       struct hammer_mount *hmp;
+       thread_t        td;
+       int             startit;
+       TAILQ_HEAD(,hammer_inode) work_list;
+};
+
+typedef struct hammer_flusher_info *hammer_flusher_info_t;
 
 void
 hammer_flusher_sync(hammer_mount_t hmp)
@@ -123,9 +133,9 @@ hammer_flusher_destroy(hammer_mount_t hmp)
         */
        for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) {
                if ((info = hmp->flusher.info[i]) != NULL) {
-                       KKASSERT(info->running == 0);
-                       info->running = -1;
-                       wakeup(&info->running);
+                       KKASSERT(info->startit == 0);
+                       info->startit = -1;
+                       wakeup(&info->startit);
                        while (info->td) {
                                tsleep(&info->td, 0, "hmrwwc", 0);
                        }
@@ -137,6 +147,10 @@ hammer_flusher_destroy(hammer_mount_t hmp)
        KKASSERT(hmp->flusher.count == 0);
 }
 
+/*
+ * The master flusher thread manages the flusher sequence id and
+ * synchronization with the slave work threads.
+ */
 static void
 hammer_flusher_master_thread(void *arg)
 {
@@ -177,6 +191,10 @@ hammer_flusher_master_thread(void *arg)
        lwkt_exit();
 }
 
+/*
+ * The slave flusher thread pulls work off the master flush_list until no
+ * work is left.
+ */
 static void
 hammer_flusher_slave_thread(void *arg)
 {
@@ -188,15 +206,17 @@ hammer_flusher_slave_thread(void *arg)
        hmp = info->hmp;
 
        for (;;) {
-               while (info->running == 0)
-                       tsleep(&info->running, 0, "hmrssw", 0);
-               if (info->running < 0)
+               while (info->startit == 0)
+                       tsleep(&info->startit, 0, "hmrssw", 0);
+               if (info->startit < 0)
                        break;
-               while ((ip = TAILQ_FIRST(&info->work_list)) != NULL) {
-                       TAILQ_REMOVE(&info->work_list, ip, flush_entry);
+               info->startit = 0;
+               while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
+                       if (ip->flush_group != hmp->flusher.act)
+                               break;
+                       TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
                        hammer_flusher_flush_inode(ip, &hmp->flusher.trans);
                }
-               info->running = 0;
                if (--hmp->flusher.running == 0)
                        wakeup(&hmp->flusher.running);
        }
@@ -233,30 +253,30 @@ static void
 hammer_flusher_flush(hammer_mount_t hmp)
 {
        hammer_flusher_info_t info;
-       hammer_inode_t ip;
        hammer_reserve_t resv;
        int i;
+       int n;
+
+       hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
 
        /*
-        * Flush the inodes
+        * Start work threads.
         */
-       hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
        i = 0;
-       while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
-               if (ip->flush_group != hmp->flusher.act)
-                       break;
-               TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
-               info = hmp->flusher.info[i];
-               TAILQ_INSERT_TAIL(&info->work_list, ip, flush_entry);
-               if (info->running == 0) {
-                       ++hmp->flusher.running;
-                       info->running = 1;
-                       wakeup(&info->running);
+       n = hmp->count_iqueued / 64;
+       if (TAILQ_FIRST(&hmp->flush_list)) {
+               for (i = 0; i <= hmp->count_iqueued / 64; ++i) {
+                       if (i == HAMMER_MAX_FLUSHERS ||
+                           hmp->flusher.info[i] == NULL) {
+                               break;
+                       }
+                       info = hmp->flusher.info[i];
+                       if (info->startit == 0) {
+                               ++hmp->flusher.running;
+                               info->startit = 1;
+                               wakeup(&info->startit);
+                       }
                }
-               /*hammer_flusher_flush_inode(ip, &trans);*/
-               ++i;
-               if (i == HAMMER_MAX_FLUSHERS || hmp->flusher.info[i] == NULL)
-                       i = 0;
        }
        while (hmp->flusher.running)
                tsleep(&hmp->flusher.running, 0, "hmrfcc", 0);
@@ -410,10 +430,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
        /*
         * Wait for I/Os to complete
         */
-       crit_enter();
-       while (hmp->io_running_count)
-               tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
-       crit_exit();
+       hammer_io_wait_all(hmp, "hmrfl1");
 
        /*
         * Update the on-disk volume header with new UNDO FIFO end position
@@ -448,10 +465,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
        /*
         * Wait for I/Os to complete
         */
-       crit_enter();
-       while (hmp->io_running_count)
-               tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
-       crit_exit();
+       hammer_io_wait_all(hmp, "hmrfl2");
 
        /*
         * Flush meta-data.  The meta-data will be undone if we crash
index 25000d5..f3338f3 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.69 2008/06/10 08:51:01 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.70 2008/06/10 22:30:21 dillon Exp $
  */
 
 #include "hammer.h"
 #include <sys/buf.h>
 #include <sys/buf2.h>
 
-static int hammer_unload_inode(struct hammer_inode *ip);
-static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
-static int hammer_setup_child_callback(hammer_record_t rec, void *data);
-static int hammer_setup_parent_inodes(hammer_record_t record);
+static int     hammer_unload_inode(struct hammer_inode *ip);
+static void    hammer_flush_inode_core(hammer_inode_t ip, int flags);
+static int     hammer_setup_child_callback(hammer_record_t rec, void *data);
+static int     hammer_setup_parent_inodes(hammer_record_t record);
+static void    hammer_inode_wakereclaims(hammer_mount_t hmp);
 
 #ifdef DEBUG_TRUNCATE
 extern struct hammer_inode *HammerTruncIp;
@@ -110,14 +111,10 @@ hammer_vop_reclaim(struct vop_reclaim_args *ap)
                        ++hammer_count_reclaiming;
                        ++hmp->inode_reclaims;
                        ip->flags |= HAMMER_INODE_RECLAIM;
+                       if (curproc)
+                               hammer_inode_waitreclaims(hmp);
                }
                hammer_rel_inode(ip, 1);
-
-               /*
-                * Do not let too many reclaimed inodes build up.
-                * 
-                */
-               hammer_inode_waitreclaims(hmp);
        }
        return(0);
 }
@@ -245,17 +242,6 @@ loop:
                return(ip);
        }
 
-#if 0
-        /*
-        * Impose a slow-down if HAMMER is heavily backlogged on cleaning
-        * out reclaimed inodes.
-         */
-        if (hmp->inode_reclaims > HAMMER_RECLAIM_MIN &&
-           trans->type != HAMMER_TRANS_FLS) {
-                hammer_inode_waitreclaims(hmp);
-       }
-#endif
-
        /*
         * Allocate a new inode structure and deal with races later.
         */
@@ -747,13 +733,13 @@ hammer_unload_inode(struct hammer_inode *ip)
                hammer_clear_objid(ip);
        --hammer_count_inodes;
        --hmp->count_inodes;
-       if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
-               hammer_inode_wakereclaims(hmp);
 
        if (ip->flags & HAMMER_INODE_RECLAIM) {
                --hammer_count_reclaiming;
                --hmp->inode_reclaims;
                ip->flags &= ~HAMMER_INODE_RECLAIM;
+               if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
+                       hammer_inode_wakereclaims(hmp);
        }
        kfree(ip, M_HAMMER);
 
@@ -1027,6 +1013,8 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
        ip->flush_state = HAMMER_FST_FLUSH;
        ip->flush_group = ip->hmp->flusher.next;
        ++ip->hmp->flusher.group_lock;
+       ++ip->hmp->count_iqueued;
+       ++hammer_count_iqueued;
 
        /*
         * We need to be able to vfsync/truncate from the backend.
@@ -1057,6 +1045,10 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags)
        if (go_count == 0) {
                if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
                        ip->flags |= HAMMER_INODE_REFLUSH;
+
+                       --ip->hmp->count_iqueued;
+                       --hammer_count_iqueued;
+
                        ip->flush_state = HAMMER_FST_SETUP;
                        if (ip->flags & HAMMER_INODE_VHELD) {
                                ip->flags &= ~HAMMER_INODE_VHELD;
@@ -1243,10 +1235,13 @@ hammer_wait_inode(hammer_inode_t ip)
 void
 hammer_flush_inode_done(hammer_inode_t ip)
 {
-       int dorel = 0;
+       hammer_mount_t hmp;
+       int dorel;
 
        KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
 
+       hmp = ip->hmp;
+
        /*
         * Merge left-over flags back into the frontend and fix the state.
         */
@@ -1266,7 +1261,7 @@ hammer_flush_inode_done(hammer_inode_t ip)
        if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
                ip->flags |= HAMMER_INODE_BUFS;
        } else {
-               ip->hmp->rsv_databufs -= ip->rsv_databufs;
+               hmp->rsv_databufs -= ip->rsv_databufs;
                ip->rsv_databufs = 0;
        }
 
@@ -1296,8 +1291,12 @@ hammer_flush_inode_done(hammer_inode_t ip)
                dorel = 1;
        } else {
                ip->flush_state = HAMMER_FST_SETUP;
+               dorel = 0;
        }
 
+       --hmp->count_iqueued;
+       --hammer_count_iqueued;
+
        /*
         * Clean up the vnode ref
         */
@@ -1326,7 +1325,7 @@ hammer_flush_inode_done(hammer_inode_t ip)
        if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
            (ip->flags & HAMMER_INODE_RSV_INODES)) {
                ip->flags &= ~HAMMER_INODE_RSV_INODES;
-               --ip->hmp->rsv_inodes;
+               --hmp->rsv_inodes;
        }
 
        /*
@@ -1557,41 +1556,6 @@ hammer_sync_inode(hammer_inode_t ip)
                ip->sync_flags |= HAMMER_INODE_DDIRTY;
        }
 
-#if 0
-       /*
-        * XXX DISABLED FOR NOW.  With the new reservation support
-        * we cannot resync pending data without confusing the hell
-        * out of the in-memory record tree.
-        */
-       /*
-        * Queue up as many dirty buffers as we can then set a flag to
-        * cause any further BIOs to go to the alternative queue.
-        */
-       if (ip->flags & HAMMER_INODE_VHELD)
-               error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
-       ip->flags |= HAMMER_INODE_WRITE_ALT;
-
-       /*
-        * The buffer cache may contain dirty buffers beyond the inode
-        * state we copied from the frontend to the backend.  Because
-        * we are syncing our buffer cache on the backend, resync
-        * the truncation point and the file size so we don't wipe out
-        * any data.
-        *
-        * Syncing the buffer cache on the frontend has serious problems
-        * because it prevents us from passively queueing dirty inodes
-        * to the backend (the BIO's could stall indefinitely).
-        */
-       if (ip->flags & HAMMER_INODE_TRUNCATED) {
-               ip->sync_trunc_off = ip->trunc_off;
-               ip->sync_flags |= HAMMER_INODE_TRUNCATED;
-       }
-       if (ip->sync_ino_data.size != ip->ino_data.size) {
-               ip->sync_ino_data.size = ip->ino_data.size;
-               ip->sync_flags |= HAMMER_INODE_DDIRTY;
-       }
-#endif
-
        /*
         * If there is a trunction queued destroy any data past the (aligned)
         * truncation point.  Userland will have dealt with the buffer
@@ -1852,35 +1816,33 @@ hammer_test_inode(hammer_inode_t ip)
 }
 
 /*
- * When a HAMMER inode is reclaimed it may have to be queued to the backend
- * for its final sync to disk.  Programs like blogbench can cause the backlog
- * to grow indefinitely.  Put a cap on the number of inodes we allow to be
- * in this state by giving the flusher time to drain.
+ * We need to slow down user processes if we get too large a backlog of
+ * inodes in the flusher.  Even though the frontend can theoretically
+ * get way, way ahead of the flusher, if we let it do that the flusher
+ * will have no buffer cache locality of reference and will have to re-read
+ * everything a second time, causing performance to drop precipitously.
+ *
+ * Reclaims are especially senssitive to this effect because the kernel has
+ * already abandoned the related vnode.
  */
+
 void
 hammer_inode_waitreclaims(hammer_mount_t hmp)
 {
-       int count;
        int delay;
-       int minpt;
-       int maxpt;
 
        while (hmp->inode_reclaims > HAMMER_RECLAIM_MIN) {
-               count = hmp->count_inodes - hmp->inode_reclaims;
-               if (count < 100)
-                       count = 100;
-               minpt = count * HAMMER_RECLAIM_SLOPCT / 100;
-               maxpt = count * HAMMER_RECLAIM_MAXPCT / 100;
-
-               if (hmp->inode_reclaims < minpt)
+               if (hmp->inode_reclaims < HAMMER_RECLAIM_MID) {
+                       hammer_flusher_async(hmp);
                        break;
-               if (hmp->inode_reclaims < maxpt) {
-                       delay = (hmp->inode_reclaims - minpt) * hz /
-                               (maxpt - minpt);
+               }
+               if (hmp->inode_reclaims < HAMMER_RECLAIM_MAX) {
+                       delay = (hmp->inode_reclaims - HAMMER_RECLAIM_MID) *
+                               hz / (HAMMER_RECLAIM_MAX - HAMMER_RECLAIM_MID);
                        if (delay == 0)
                                delay = 1;
                        hammer_flusher_async(hmp);
-                       tsleep(&count, 0, "hmitik", delay);
+                       tsleep(&delay, 0, "hmitik", delay);
                        break;
                }
                hmp->flags |= HAMMER_MOUNT_WAITIMAX;
@@ -1892,13 +1854,8 @@ hammer_inode_waitreclaims(hammer_mount_t hmp)
 void
 hammer_inode_wakereclaims(hammer_mount_t hmp)
 {
-       int maxpt;
-
-       if ((hmp->flags & HAMMER_MOUNT_WAITIMAX) == 0)
-               return;
-       maxpt = hmp->count_inodes * HAMMER_RECLAIM_MAXPCT / 100;
-       if (hmp->inode_reclaims <= HAMMER_RECLAIM_MIN ||
-           hmp->inode_reclaims < maxpt) {
+       if ((hmp->flags & HAMMER_MOUNT_WAITIMAX) &&
+           hmp->inode_reclaims < HAMMER_RECLAIM_MAX) {
                hmp->flags &= ~HAMMER_MOUNT_WAITIMAX;
                wakeup(&hmp->inode_reclaims);
        }
index e037591..6e464df 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.37 2008/06/10 00:40:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.38 2008/06/10 22:30:21 dillon Exp $
  */
 /*
  * IO Primitives and buffer cache management
@@ -82,6 +82,7 @@ hammer_io_disassociate(hammer_io_structure_t iou, int elseit)
        struct buf *bp = iou->io.bp;
 
        KKASSERT(iou->io.modified == 0);
+       KKASSERT(LIST_FIRST(&bp->b_dep) == (void *)iou);
        buf_dep_init(bp);
        iou->io.bp = NULL;
 
@@ -141,15 +142,34 @@ hammer_io_wait(hammer_io_t io)
        }
 }
 
+/*
+ * Wait for all hammer_io-initated write I/O's to complete.  This is not
+ * supposed to count direct I/O's but some can leak through (for
+ * non-full-sized direct I/Os).
+ */
+void
+hammer_io_wait_all(hammer_mount_t hmp, const char *ident)
+{
+       crit_enter();
+       while (hmp->io_running_count)
+               tsleep(&hmp->io_running_count, 0, ident, 0);
+       crit_exit();
+}
+
 #define HAMMER_MAXRA   4
 
 /*
  * Load bp for a HAMMER structure.  The io must be exclusively locked by
  * the caller.
  *
- * Generally speaking HAMMER assumes either an optimized layout or that
- * typical access patterns will be close to the original layout when the
- * information was written.  For this reason we try to cluster all reads.
+ * Generally speaking HAMMER assumes that data is laid out fairly linearly
+ * and will cluster reads.  Conversely meta-data buffers (aka B-Tree nodes)
+ * may be dispersed due to the way the B-Tree insertion mechanism works and
+ * we only do single-buffer reads to avoid blowing out the buffer cache.
+ *
+ * Note that clustering occurs at the device layer, not the logical layer.
+ * If the buffers do not apply to the current operation they may apply to
+ * some other.
  */
 int
 hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit)
@@ -158,16 +178,22 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit)
        int   error;
 
        if ((bp = io->bp) == NULL) {
-#if 1
-               error = cluster_read(devvp, limit, io->offset,
-                                    HAMMER_BUFSIZE, MAXBSIZE, 16, &io->bp);
-#else
-               error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
-#endif
-
+               switch(io->type) {
+               case HAMMER_STRUCTURE_DATA_BUFFER:
+                       error = cluster_read(devvp, limit, io->offset,
+                                            HAMMER_BUFSIZE,
+                                            HAMMER_CLUSTER_SIZE,
+                                            HAMMER_CLUSTER_BUFS, &io->bp);
+                       break;
+               default:
+                       error = bread(devvp, io->offset, HAMMER_BUFSIZE,
+                                     &io->bp);
+                       break;
+               }
                if (error == 0) {
                        bp = io->bp;
                        bp->b_ops = &hammer_bioops;
+                       KKASSERT(LIST_FIRST(&bp->b_dep) == NULL);
                        LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
                        BUF_KERNPROC(bp);
                }
@@ -201,6 +227,7 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io)
                io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
                bp = io->bp;
                bp->b_ops = &hammer_bioops;
+               KKASSERT(LIST_FIRST(&bp->b_dep) == NULL);
                LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
                io->released = 0;
                KKASSERT(io->running == 0);
@@ -348,13 +375,19 @@ hammer_io_release(struct hammer_io *io, int flush)
                }
        } else {
                /*
-                * A released buffer may have been locked when the kernel
-                * tried to deallocate it while HAMMER still had references
-                * on the hammer_buffer.  We must unlock the buffer or
-                * it will just rot.
+                * A released buffer is passively associate with our
+                * hammer_io structure.  The kernel cannot destroy it
+                * without making a bioops call.  If the kernel (B_LOCKED)
+                * or we (reclaim) requested that the buffer be destroyed
+                * we destroy it, otherwise we do a quick get/release to
+                * reset its position in the kernel's LRU list.
+                *
+                * Leaving the buffer passively associated allows us to
+                * use the kernel's LRU buffer flushing mechanisms rather
+                * then rolling our own.
                 */
                crit_enter();
-               if (io->running == 0 && (bp->b_flags & B_LOCKED)) {
+               if (io->running == 0) {
                        regetblk(bp);
                        if ((bp->b_flags & B_LOCKED) || io->reclaim) {
                                io->released = 0;
index 7c8c417..afb8e31 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.64 2008/06/10 00:40:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.65 2008/06/10 22:30:21 dillon Exp $
  */
 
 #include "hammer.h"
@@ -298,11 +298,11 @@ hammer_alloc_mem_record(hammer_inode_t ip, int data_len)
 }
 
 void
-hammer_wait_mem_record(hammer_record_t record)
+hammer_wait_mem_record_ident(hammer_record_t record, const char *ident)
 {
        while (record->flush_state == HAMMER_FST_FLUSH) {
                record->flags |= HAMMER_RECF_WANTED;
-               tsleep(record, 0, "hmrrc2", 0);
+               tsleep(record, 0, ident, 0);
        }
 }
 
index d9b4c80..6a1e772 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.52 2008/06/10 00:40:31 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.53 2008/06/10 22:30:21 dillon Exp $
  */
 /*
  * Manage HAMMER's on-disk structures.  These routines are primarily
@@ -53,7 +53,7 @@ static int hammer_load_node(hammer_node_t node, int isnew);
 /*
  * Red-Black tree support for various structures
  */
-static int
+int
 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
 {
        if (ip1->obj_id < ip2->obj_id)
index e421294..828cc1d 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.22 2008/06/09 04:19:10 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.23 2008/06/10 22:30:21 dillon Exp $
  */
 
 #include "hammer.h"
@@ -430,12 +430,23 @@ static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
 void
 hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume)
 {
+        /*
+         * Flush the buffers out asynchronously, wait for all the I/O to
+        * complete, then do it again to destroy the buffer cache buffer
+        * so it doesn't alias something later on.
+         */
+       RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
+               hammer_recover_flush_buffer_callback, NULL);
+       hammer_io_wait_all(hmp, "hmrrcw");
        RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
                hammer_recover_flush_buffer_callback, NULL);
 
        RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
                hammer_recover_flush_volume_callback, root_volume);
 
+       /*
+        * Finaly, deal with the volume header.
+        */
        if (root_volume->io.recovered) {
                crit_enter();
                while (hmp->io_running_count)
@@ -467,8 +478,13 @@ hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
 {
        if (buffer->io.recovered) {
                buffer->io.recovered = 0;
+               buffer->io.reclaim = 1;
                hammer_io_flush(&buffer->io);
-               hammer_rel_buffer(buffer, 2);
+               hammer_rel_buffer(buffer, 0);
+       } else {
+               hammer_ref(&buffer->io.lock);
+               buffer->io.reclaim = 1;
+               hammer_rel_buffer(buffer, 1);
        }
        return(0);
 }
index 2801c25..d775806 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.23 2008/06/07 07:41:51 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.24 2008/06/10 22:30:21 dillon Exp $
  */
 /*
  * HAMMER structural locking
@@ -41,7 +41,7 @@
 #include <sys/dirent.h>
 
 void
-hammer_lock_ex(struct hammer_lock *lock)
+hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
 {
        thread_t td = curthread;
 
@@ -55,7 +55,7 @@ hammer_lock_ex(struct hammer_lock *lock)
                                        lock->locktd);
                        }
                        ++hammer_contention_count;
-                       tsleep(lock, 0, "hmrlck", 0);
+                       tsleep(lock, 0, ident, 0);
                        if (hammer_debug_locks)
                                kprintf("hammer_lock_ex: try again\n");
                }
index 5706497..ecb716b 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.43 2008/06/10 05:06:20 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.44 2008/06/10 22:30:21 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -58,6 +58,7 @@ int hammer_debug_recover;             /* -1 will disable, +1 will force */
 int hammer_debug_recover_faults;
 int hammer_debug_write_release;                /* if 1 release buffer on strategy */
 int hammer_count_inodes;
+int hammer_count_iqueued;
 int hammer_count_reclaiming;
 int hammer_count_records;
 int hammer_count_record_datas;
@@ -71,6 +72,7 @@ int hammer_stats_record_iterations;
 int hammer_limit_dirtybufs;            /* per-mount */
 int hammer_limit_irecs;                        /* per-inode */
 int hammer_limit_recs;                 /* as a whole XXX */
+int hammer_limit_iqueued;              /* per-mount */
 int hammer_bio_count;
 int64_t hammer_contention_count;
 int64_t hammer_zone_limit;
@@ -103,9 +105,13 @@ SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_irecs, CTLFLAG_RW,
           &hammer_limit_irecs, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_recs, CTLFLAG_RW,
           &hammer_limit_recs, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_iqueued, CTLFLAG_RW,
+          &hammer_limit_iqueued, 0, "");
 
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
           &hammer_count_inodes, 0, "");
+SYSCTL_INT(_vfs_hammer, OID_AUTO, count_iqueued, CTLFLAG_RD,
+          &hammer_count_iqueued, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_reclaiming, CTLFLAG_RD,
           &hammer_count_reclaiming, 0, "");
 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
@@ -174,14 +180,16 @@ static int
 hammer_vfs_init(struct vfsconf *conf)
 {
        if (hammer_limit_irecs == 0)
-               hammer_limit_irecs = nbuf;
+               hammer_limit_irecs = nbuf / 10;
        if (hammer_limit_recs == 0)             /* XXX TODO */
-               hammer_limit_recs = hammer_limit_irecs * 4;
+               hammer_limit_recs = nbuf / 3;
        if (hammer_limit_dirtybufs == 0) {
                hammer_limit_dirtybufs = hidirtybuffers / 2;
                if (hammer_limit_dirtybufs < 100)
                        hammer_limit_dirtybufs = 100;
        }
+       if (hammer_limit_iqueued == 0)
+               hammer_limit_iqueued = desiredvnodes / 5;
        return(0);
 }
 
index 911c13d..7607ab2 100644 (file)
@@ -31,7 +31,7 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.64 2008/06/10 08:06:28 dillon Exp $
+ * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.65 2008/06/10 22:30:21 dillon Exp $
  */
 
 #include <sys/param.h>
@@ -178,8 +178,8 @@ hammer_vop_fsync(struct vop_fsync_args *ap)
 {
        hammer_inode_t ip = VTOI(ap->a_vp);
 
-       hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
        vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
+       hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
        if (ap->a_waitfor == MNT_WAIT)
                hammer_wait_inode(ip);
        return (ip->error);
@@ -233,11 +233,9 @@ hammer_vop_read(struct vop_read_args *ap)
                if (n > ip->ino_data.size - uio->uio_offset)
                        n = (int)(ip->ino_data.size - uio->uio_offset);
                error = uiomove((char *)bp->b_data + offset, n, uio);
-               if (error) {
-                       bqrelse(bp);
-                       break;
-               }
                bqrelse(bp);
+               if (error)
+                       break;
        }
        if ((ip->flags & HAMMER_INODE_RO) == 0 &&
            (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
@@ -287,9 +285,17 @@ hammer_vop_write(struct vop_write_args *ap)
                uio->uio_offset = ip->ino_data.size;
 
        /*
-        * Check for illegal write offsets.  Valid range is 0...2^63-1
+        * Check for illegal write offsets.  Valid range is 0...2^63-1.
+        *
+        * NOTE: the base_off assignment is required to work around what
+        * I consider to be a GCC-4 optimization bug.
         */
-       if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
+       if (uio->uio_offset < 0) {
+               hammer_done_transaction(&trans);
+               return (EFBIG);
+       }
+       base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */
+       if (uio->uio_resid > 0 && base_offset <= 0) {
                hammer_done_transaction(&trans);
                return (EFBIG);
        }
@@ -2031,6 +2037,7 @@ int
 hammer_vop_strategy_write(struct vop_strategy_args *ap)
 {
        hammer_record_t record;
+       hammer_mount_t hmp;
        hammer_inode_t ip;
        struct bio *bio;
        struct buf *bp;
@@ -2040,6 +2047,7 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
        bio = ap->a_bio;
        bp = bio->bio_buf;
        ip = ap->a_vp->v_data;
+       hmp = ip->hmp;
 
        if (ip->flags & HAMMER_INODE_RO) {
                bp->b_error = EROFS;
@@ -2085,12 +2093,14 @@ hammer_vop_strategy_write(struct vop_strategy_args *ap)
        record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data,
                                    bytes, &error);
        if (record) {
-               hammer_io_direct_write(ip->hmp, &record->leaf, bio);
+               hammer_io_direct_write(hmp, &record->leaf, bio);
                hammer_rel_mem_record(record);
-               if (ip->rsv_recs > hammer_limit_irecs / 2)
+               if (hmp->rsv_recs > hammer_limit_recs &&
+                   ip->rsv_recs > hammer_limit_irecs / 10) {
                        hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
-               else
-                       hammer_flush_inode(ip, 0);
+               } else if (ip->rsv_recs > hammer_limit_irecs) {
+                       hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
+               }
        } else {
                bp->b_error = error;
                bp->b_flags |= B_ERROR;
@@ -2257,7 +2267,8 @@ retry:
                        if (ip->vp)
                                cache_inval_vp(ip->vp, CINV_DESTROY);
                }
-               hammer_rel_inode(ip, 0);
+               if (ip)
+                       hammer_rel_inode(ip, 0);
        } else {
                hammer_done_cursor(&cursor);
        }