HAMMER - Massively improve performance
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 20 Aug 2009 06:30:42 +0000 (23:30 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 20 Aug 2009 06:30:42 +0000 (23:30 -0700)
* Now that write pipelining is controlled by ip->rsv_recs and now
  that we have write bursting in the BIOQ subsystem, we do not
  actually want to use bawrite() or cluster_write() because the bio
  could end up waiting for the disk for a long time, and it is locked
  for that entire period.  So go back to using bdwrite().

  This seriously improves HAMMER's ability to access files that were
  just written without having to wait for I/O to complete.

* Reclaim (destroy) buffer cache buffers related to UNDO data, except
  for the UNDO block we will be needing again for the next flush.  We
  don't need to keep old undo buffers in the cache!  This improves
  HAMMER's memory footprint.

sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_flusher.c
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ondisk.c
sys/vfs/hammer/hammer_recover.c
sys/vfs/hammer/hammer_undo.c
sys/vfs/hammer/hammer_vnops.c

index 501750c..2e18d70 100644 (file)
@@ -1111,6 +1111,7 @@ hammer_off_t hammer_undo_lookup(hammer_mount_t hmp, hammer_off_t bmap_off,
 int64_t hammer_undo_used(hammer_transaction_t trans);
 int64_t hammer_undo_space(hammer_transaction_t trans);
 int64_t hammer_undo_max(hammer_mount_t hmp);
+int hammer_undo_reclaim(hammer_io_t io);
 
 void hammer_start_transaction(struct hammer_transaction *trans,
                              struct hammer_mount *hmp);
@@ -1176,7 +1177,7 @@ int hammer_io_read(struct vnode *devvp, struct hammer_io *io,
 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
 int hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset);
 struct buf *hammer_io_release(struct hammer_io *io, int flush);
-void hammer_io_flush(struct hammer_io *io);
+void hammer_io_flush(struct hammer_io *io, int reclaim);
 void hammer_io_wait(struct hammer_io *io);
 void hammer_io_waitdep(struct hammer_io *io);
 void hammer_io_wait_all(hammer_mount_t hmp, const char *ident);
index 28cbe3b..bdc9a6e 100644 (file)
@@ -600,7 +600,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                hammer_ref(&io->lock);
                hammer_io_write_interlock(io);
                KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
-               hammer_io_flush(io);
+               hammer_io_flush(io, 0);
                hammer_io_done_interlock(io);
                hammer_rel_buffer((hammer_buffer_t)io, 0);
                ++count;
@@ -642,7 +642,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                        ++hammer_count_refedbufs;
                hammer_ref(&io->lock);
                KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
-               hammer_io_flush(io);
+               hammer_io_flush(io, hammer_undo_reclaim(io));
                hammer_rel_buffer((hammer_buffer_t)io, 0);
                ++count;
        }
@@ -697,7 +697,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                        root_volume->ondisk->vol0_next_tid = trans->tid;
                hammer_crc_set_volume(root_volume->ondisk);
                hammer_modify_volume_done(root_volume);
-               hammer_io_flush(&root_volume->io);
+               hammer_io_flush(&root_volume->io, 0);
        }
 
        /*
@@ -726,7 +726,7 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final)
                        ++hammer_count_refedbufs;
                hammer_ref(&io->lock);
                KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
-               hammer_io_flush(io);
+               hammer_io_flush(io, 0);
                hammer_rel_buffer((hammer_buffer_t)io, 0);
                ++count;
        }
index a472879..bc37433 100644 (file)
@@ -338,12 +338,14 @@ hammer_io_release(struct hammer_io *io, int flush)
         */
        if (io->modified) {
                if (flush) {
-                       hammer_io_flush(io);
+                       hammer_io_flush(io, 0);
                } else if (bp->b_flags & B_LOCKED) {
                        switch(io->type) {
                        case HAMMER_STRUCTURE_DATA_BUFFER:
+                               hammer_io_flush(io, 0);
+                               break;
                        case HAMMER_STRUCTURE_UNDO_BUFFER:
-                               hammer_io_flush(io);
+                               hammer_io_flush(io, hammer_undo_reclaim(io));
                                break;
                        default:
                                break;
@@ -464,7 +466,7 @@ hammer_io_release(struct hammer_io *io, int flush)
  * potentially modified buffer out.
  */
 void
-hammer_io_flush(struct hammer_io *io)
+hammer_io_flush(struct hammer_io *io, int reclaim)
 {
        struct buf *bp;
 
@@ -496,6 +498,14 @@ hammer_io_flush(struct hammer_io *io)
        }
        io->released = 1;
 
+       if (reclaim) {
+               io->reclaim = 1;
+               if ((bp->b_flags & B_LOCKED) == 0) {
+                       bp->b_flags |= B_LOCKED;
+                       ++hammer_count_io_locked;
+               }
+       }
+
        /*
         * Acquire exclusive access to the bp and then clear the modified
         * state of the buffer prior to issuing I/O to interlock any
index 0a84669..90ecfcd 100644 (file)
@@ -693,7 +693,7 @@ hammer_sync_buffers(hammer_mount_t hmp, hammer_off_t base_offset, int bytes)
                                hammer_io_wait(&buffer->io);
                                if (buffer->io.modified) {
                                        hammer_io_write_interlock(&buffer->io);
-                                       hammer_io_flush(&buffer->io);
+                                       hammer_io_flush(&buffer->io, 0);
                                        hammer_io_done_interlock(&buffer->io);
                                        hammer_io_wait(&buffer->io);
                                }
index 8db61e2..8cbe323 100644 (file)
@@ -500,7 +500,7 @@ hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
                        tsleep(&hmp->io_running_space, 0, "hmrflx", 0);
                crit_exit();
                root_volume->io.recovered = 0;
-               hammer_io_flush(&root_volume->io);
+               hammer_io_flush(&root_volume->io, 0);
                hammer_rel_volume(root_volume, 0);
        }
 }
@@ -520,7 +520,7 @@ hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
        if (volume->io.recovered && volume != root_volume) {
                volume->io.recovered = 0;
                if (root_volume != NULL)
-                       hammer_io_flush(&volume->io);
+                       hammer_io_flush(&volume->io, 0);
                else
                        hammer_io_clear_modify(&volume->io, 1);
                hammer_rel_volume(volume, 0);
@@ -540,7 +540,7 @@ hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
                if (final < 0)
                        hammer_io_clear_modify(&buffer->io, 1);
                else
-                       hammer_io_flush(&buffer->io);
+                       hammer_io_flush(&buffer->io, 0);
                hammer_rel_buffer(buffer, 0);
        } else {
                KKASSERT(buffer->io.lock.refs == 0);
index d9d097c..52bbe01 100644 (file)
@@ -310,6 +310,24 @@ hammer_undo_max(hammer_mount_t hmp)
        return(max_bytes);
 }
 
+/*
+ * Returns 1 if the undo buffer should be reclaimed on release.  The
+ * only undo buffer we do NOT want to reclaim is the one at the current
+ * append offset.
+ */
+int
+hammer_undo_reclaim(hammer_io_t io)
+{
+       hammer_blockmap_t undomap;
+       hammer_off_t next_offset;
+
+       undomap = &io->hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
+       next_offset = undomap->next_offset & ~HAMMER_BUFMASK64;
+       if (((struct hammer_buffer *)io)->zoneX_offset == next_offset)
+               return(0);
+       return(1);
+}
+
 static int
 hammer_und_rb_compare(hammer_undo_t node1, hammer_undo_t node2)
 {
index f7d3209..b3f72f7 100644 (file)
@@ -627,13 +627,20 @@ hammer_vop_write(struct vop_write_args *ap)
                 * WARNING!  blksize is variable.  cluster_write() is
                 * expected to not blow up if it encounters buffers that
                 * do not match the passed blksize.
+                *
+                * NOTE!  Hammer shouldn't need to bawrite()/cluster_write().
+                *        The ip->rsv_recs check should burst-flush the data.
+                *        If we queue it immediately the buf could be left
+                *        locked on the device queue for a very long time.
                 */
                bp->b_flags |= B_AGE;
                if (ap->a_ioflag & IO_SYNC) {
                        bwrite(bp);
                } else if (ap->a_ioflag & IO_DIRECT) {
                        bawrite(bp);
-               } else if (offset + n == blksize) {
+               } else {
+#if 0
+               if (offset + n == blksize) {
                        if (hammer_cluster_enable == 0 ||
                            (ap->a_vp->v_mount->mnt_flag & MNT_NOCLUSTERW)) {
                                bawrite(bp);
@@ -642,6 +649,7 @@ hammer_vop_write(struct vop_write_args *ap)
                                              blksize, seqcount);
                        }
                } else {
+#endif
                        bdwrite(bp);
                }
        }