HAMMER VFS - More cluster_read() fixes
authorMatthew Dillon <dillon@apollo.backplane.com>
Sun, 22 Aug 2010 04:20:04 +0000 (21:20 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sun, 22 Aug 2010 04:20:04 +0000 (21:20 -0700)
* The recent cluster limiting code was improperly using raw block device
  offsets to calculate the largeblock mask used to prevent clustered I/Os
  from crossing a large-block boundary.

  raw block device offsets are not necessarily largeblock aligned, so
  this calculation failed to properly limit cluster_read()s and the
  result was an occassional clustered read would cross-over into an
  incompatible largeblock and later cause an overlapping buffer panic.

* Calculate the proper limit in the hammer_ondisk.c module and refactor
  hammer_io_read().

Reported-by: swildner
sys/vfs/hammer/hammer.h
sys/vfs/hammer/hammer_io.c
sys/vfs/hammer/hammer_ondisk.c

index c719bdf..76914c5 100644 (file)
@@ -1329,8 +1329,7 @@ int hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag,
 
 void hammer_io_init(hammer_io_t io, hammer_volume_t volume,
                        enum hammer_io_type type);
-int hammer_io_read(struct vnode *devvp, struct hammer_io *io,
-                       hammer_off_t limit);
+int hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit);
 void hammer_io_advance(struct hammer_io *io);
 int hammer_io_new(struct vnode *devvp, struct hammer_io *io);
 int hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset);
index 0664346..a959a83 100644 (file)
@@ -76,53 +76,6 @@ hammer_io_init(hammer_io_t io, hammer_volume_t volume, enum hammer_io_type type)
 }
 
 /*
- * Determine if an io can be clustered for the storage cdev.  We have to
- * be careful to avoid creating overlapping buffers.
- *
- * (1) Any clustering is limited to within a largeblock, since going into
- *     an adjacent largeblock will change the zone.
- *
- * (2) The large-data zone can contain mixed buffer sizes.  Other zones
- *     contain only HAMMER_BUFSIZE sized buffer sizes (16K).
- */
-static int
-hammer_io_clusterable(hammer_io_t io, hammer_off_t *limitp)
-{
-       hammer_buffer_t buffer;
-       hammer_off_t eoz;
-
-       /*
-        * Can't cluster non hammer_buffer_t's
-        */
-       if (io->type != HAMMER_STRUCTURE_DATA_BUFFER &&
-           io->type != HAMMER_STRUCTURE_META_BUFFER &&
-           io->type != HAMMER_STRUCTURE_UNDO_BUFFER) {
-               return(0);
-       }
-
-       /*
-        * We cannot cluster the large-data zone.  This primarily targets
-        * the reblocker.  The normal file handling code will still cluster
-        * file reads via file vnodes.
-        */
-       buffer = (void *)io;
-       if ((buffer->zoneX_offset & HAMMER_OFF_ZONE_MASK) ==
-           HAMMER_ZONE_LARGE_DATA) {
-               return(0);
-       }
-
-       /*
-        * Do not allow the cluster operation to cross a largeblock
-        * boundary.
-        */
-       eoz = (io->offset + HAMMER_LARGEBLOCK_SIZE64 - 1) &
-               ~HAMMER_LARGEBLOCK_MASK64;
-       if (*limitp > eoz)
-               *limitp = eoz;
-       return(1);
-}
-
-/*
  * Helper routine to disassociate a buffer cache buffer from an I/O
  * structure.  The buffer is unlocked and marked appropriate for reclamation.
  *
@@ -278,24 +231,24 @@ hammer_io_notmeta(hammer_buffer_t buffer)
  * the caller.
  *
  * This routine is mostly used on meta-data and small-data blocks.  Generally
- * speaking HAMMER assumes some locality of reference and will cluster 
- * a 64K read.
+ * speaking HAMMER assumes some locality of reference and will cluster.
  *
- * Note that the clustering which occurs here is clustering within the
- * block device... typically meta-data and small-file data.  Regular
- * file clustering is different and handled in hammer_vnops.c
+ * Note that the caller (hammer_ondisk.c) may place further restrictions
+ * on clusterability via the limit (in bytes).  Typically large-data
+ * zones cannot be clustered due to their mixed buffer sizes.  This is
+ * not an issue since such clustering occurs in hammer_vnops at the
+ * regular file layer, whereas this is the buffered block device layer.
  */
 int
-hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit)
+hammer_io_read(struct vnode *devvp, struct hammer_io *io, int limit)
 {
        struct buf *bp;
        int   error;
 
        if ((bp = io->bp) == NULL) {
                hammer_count_io_running_read += io->bytes;
-               if (hammer_cluster_enable &&
-                   hammer_io_clusterable(io, &limit)) {
-                       error = cluster_read(devvp, limit,
+               if (hammer_cluster_enable && limit > io->bytes) {
+                       error = cluster_read(devvp, io->offset + limit,
                                             io->offset, io->bytes,
                                             HAMMER_CLUSTER_SIZE,
                                             HAMMER_CLUSTER_SIZE,
index 71b916d..694e85f 100644 (file)
@@ -446,7 +446,7 @@ hammer_load_volume(hammer_volume_t volume)
 
        if (volume->ondisk == NULL) {
                error = hammer_io_read(volume->devvp, &volume->io,
-                                      volume->maxraw_off);
+                                      HAMMER_BUFSIZE);
                if (error == 0) {
                        volume->ondisk = (void *)volume->io.bp->b_data;
                         hammer_ref_interlock_done(&volume->io.lock);
@@ -808,11 +808,30 @@ hammer_load_buffer(hammer_buffer_t buffer, int isnew)
        }
 
        if (buffer->ondisk == NULL) {
+               /*
+                * Issue the read or generate a new buffer.  When reading
+                * the limit argument controls any read-ahead clustering
+                * hammer_io_read() is allowed to do.
+                *
+                * We cannot read-ahead in the large-data zone and we cannot
+                * cross a largeblock boundary as the next largeblock might
+                * use a different buffer size.
+                */
                if (isnew) {
                        error = hammer_io_new(volume->devvp, &buffer->io);
+               } else if ((buffer->zoneX_offset & HAMMER_OFF_ZONE_MASK) ==
+                          HAMMER_ZONE_LARGE_DATA) {
+                       error = hammer_io_read(volume->devvp, &buffer->io,
+                                              buffer->io.bytes);
                } else {
+                       hammer_off_t limit;
+
+                       limit = (buffer->zone2_offset +
+                                HAMMER_LARGEBLOCK_MASK64) &
+                               ~HAMMER_LARGEBLOCK_MASK64;
+                       limit -= buffer->zone2_offset;
                        error = hammer_io_read(volume->devvp, &buffer->io,
-                                              volume->maxraw_off);
+                                              limit);
                }
                if (error == 0)
                        buffer->ondisk = (void *)buffer->io.bp->b_data;