kernel - Fix cluster_read random I/O heuristic
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 28 Mar 2010 18:23:04 +0000 (11:23 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 28 Mar 2010 18:23:04 +0000 (11:23 -0700)
* cluster_read was improperly accounting for the seqcount in the random
  I/O heuristic, failing to reduce the count in the loop which led to
  fairly maximal sequential read-ahead even for random I/O.

  Properly reduce the seqcount for both the initial buffer and in the
  read-ahead loop, which has the effect of terminating the loop early
  or not running it at all when the I/O is random.

* HAMMER was not scaling the seqcount for the heuristic to BKVASIZE.

* Thanks to Jan for spending the time required to bisect and track down the
  problem!

Reported-by: Jan Lentfer <Jan.Lentfer@web.de>
sys/kern/vfs_cluster.c
sys/vfs/hammer/hammer_vnops.c

index 9e24291..54ad63b 100644 (file)
@@ -139,7 +139,8 @@ cluster_read(struct vnode *vp, off_t filesize, off_t loffset,
                /*
                 * Not sequential, do not do any read-ahead
                 */
-               if (seqcount == 0 || maxra == 0)
+               seqcount -= (bp->b_bufsize + BKVASIZE - 1) / BKVASIZE;
+               if (seqcount <= 0 || maxra == 0)
                        return 0;
 
                /*
@@ -237,12 +238,13 @@ single_block_read:
        if (bp) {
 #if defined(CLUSTERDEBUG)
                if (rcluster)
-                       kprintf("S(%lld,%d,%d) ",
+                       kprintf("S(%lld,%d,%d)\n",
                            bp->b_loffset, bp->b_bcount, seqcount);
 #endif
                if ((bp->b_flags & B_CLUSTER) == 0)
                        vfs_busy_pages(vp, bp);
                bp->b_flags &= ~(B_ERROR|B_INVAL);
+               seqcount -= (bp->b_bufsize + BKVASIZE - 1) / BKVASIZE;
                vn_strategy(vp, &bp->b_bio1);
                error = 0;
                /* bp invalid now */
@@ -257,7 +259,7 @@ single_block_read:
         * will do device-readahead irrespective of what the blocks
         * represent.
         */
-       while (!error && seqcount && maxra > 0 &&
+       while (!error && seqcount > 0 && maxra > 0 &&
               loffset + blksize <= filesize) {
                int nblksread;
                int ntoread;
@@ -306,18 +308,19 @@ single_block_read:
                } else {
                        rbp->b_bio2.bio_offset = doffset;
                }
+               seqcount -= (rbp->b_bufsize + BKVASIZE - 1) / BKVASIZE;
 #if defined(CLUSTERDEBUG)
                if (rcluster) {
                        if (bp)
-                               kprintf("A+(%lld,%d,%lld,%d) ",
+                               kprintf("A+(%lld,%d,%lld,%d) ra=%d\n",
                                    rbp->b_loffset, rbp->b_bcount,
                                    rbp->b_loffset - origoffset,
-                                   seqcount);
+                                   seqcount, maxra);
                        else
-                               kprintf("A(%lld,%d,%lld,%d) ",
+                               kprintf("A-(%lld,%d,%lld,%d) ra=%d\n",
                                    rbp->b_loffset, rbp->b_bcount,
                                    rbp->b_loffset - origoffset,
-                                   seqcount);
+                                   seqcount, maxra);
                }
 #endif
                rbp->b_flags &= ~(B_ERROR|B_INVAL);
index f34e8aa..d5fe582 100644 (file)
@@ -329,8 +329,8 @@ hammer_vop_read(struct vop_read_args *ap)
         * Allow the UIO's size to override the sequential heuristic.
         */
        blksize = hammer_blocksize(uio->uio_offset);
-       seqcount = (uio->uio_resid + (blksize - 1)) / blksize;
-       ioseqcount = ap->a_ioflag >> 16;
+       seqcount = (uio->uio_resid + (BKVASIZE - 1)) / BKVASIZE;
+       ioseqcount = (ap->a_ioflag >> 16);
        if (seqcount < ioseqcount)
                seqcount = ioseqcount;