kernel - dm - Add support for BUF_CMD_FLUSH
authorMatthew Dillon <dillon@apollo.backplane.com>
Sat, 14 Aug 2010 19:04:52 +0000 (12:04 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Sat, 14 Aug 2010 19:04:52 +0000 (12:04 -0700)
* Change the nestiobuf_*() code to count the number of sub-bios issued
  instead of counting the resid.  This allows the nestiobuf_*() API to
  be used to BUF_CMD_FLUSH.

* Change the nestiobuf_*() API.  The nestiobuf_start() command is now
  nestiobuf_add().  Add a nestiobuf_init(mbio) and nestiobuf_start(mbio)
  wrapper around the adds.

* Explicitly check for allowed BUF_CMD_* commands in device-mapper.c

* Add a bypass to device-mapper.c to cause certain commands to be sent
  to all tbl's (BUF_CMD_FLUSH).

* Add explicit suppor for BUF_CMD_FLUSH to dm_target_stripe.c

* Change b_resid loading semantics to load upon completion of the
  I/O.

sys/dev/disk/dm/device-mapper.c
sys/dev/disk/dm/dm_target_stripe.c
sys/dev/disk/dm/dm_target_zero.c
sys/kern/vfs_bio.c
sys/sys/buf.h

index bc77449..38e40ba 100644 (file)
@@ -382,6 +382,7 @@ dmstrategy(struct dev_strategy_args *ap)
        cdev_t dev = ap->a_head.a_dev;
        struct bio *bio = ap->a_bio;
        struct buf *bp = bio->bio_buf;
+       int bypass;
 
        dm_dev_t *dmv;
        dm_table_t  *tbl;
@@ -410,8 +411,27 @@ dmstrategy(struct dev_strategy_args *ap)
                return 0;
        } 
 
-       if (bounds_check_with_mediasize(bio, DEV_BSIZE,
-           dm_table_size(&dmv->table_head)) <= 0) {
+       switch(bp->b_cmd) {
+       case BUF_CMD_READ:
+       case BUF_CMD_WRITE:
+       case BUF_CMD_FREEBLKS:
+               bypass = 0;
+               break;
+       case BUF_CMD_FLUSH:
+               bypass = 1;
+               KKASSERT(buf_len == 0);
+               break;
+       default:
+               dm_dev_unbusy(dmv);
+               bp->b_error = EIO;
+               bp->b_resid = bp->b_bcount;
+               biodone(bio);
+               return 0;
+       }
+
+       if (bypass == 0 &&
+           bounds_check_with_mediasize(bio, DEV_BSIZE,
+                                       dm_table_size(&dmv->table_head)) <= 0) {
                dm_dev_unbusy(dmv);
                bp->b_resid = bp->b_bcount;
                biodone(bio);
@@ -421,25 +441,22 @@ dmstrategy(struct dev_strategy_args *ap)
        /* Select active table */
        tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE);
 
-        /* Nested buffers count down to zero therefore I have
-           to set bp->b_resid to maximal value. */
-       bp->b_resid = bp->b_bcount;
+       nestiobuf_init(bio);
 
        /*
         * Find out what tables I want to select.
         */
-       SLIST_FOREACH(table_en, tbl, next)
-       {
-               /* I need need number of bytes not blocks. */
-               table_start = table_en->start * DEV_BSIZE;
+       SLIST_FOREACH(table_en, tbl, next) {
                /*
-                * I have to sub 1 from table_en->length to prevent
-                * off by one error
+                * I need need number of bytes not blocks.
                 */
-               table_end = table_start + (table_en->length)* DEV_BSIZE;
+               table_start = table_en->start * DEV_BSIZE;
+               table_end = table_start + (table_en->length) * DEV_BSIZE;
 
+               /*
+                * Calculate the start and end
+                */
                start = MAX(table_start, buf_start);
-
                end = MIN(table_end, buf_start + buf_len);
 
                aprint_debug("----------------------------------------\n");
@@ -453,25 +470,26 @@ dmstrategy(struct dev_strategy_args *ap)
                     PRIu64"\n", start, end);
                aprint_debug("\n----------------------------------------\n");
 
-               if (start < end) {
-                       /* create nested buffer  */
+               if (bypass) {
                        nestbuf = getpbuf(NULL);
 
-                       nestiobuf_setup(bio, nestbuf, start - buf_start,
-                           (end - start));
-
+                       nestiobuf_add(bio, nestbuf, 0, 0);
+                       nestbuf->b_bio1.bio_offset = 0;
+                       table_en->target->strategy(table_en, nestbuf);
+               } else if (start < end) {
+                       nestbuf = getpbuf(NULL);
+                       nestiobuf_add(bio, nestbuf,
+                                     start - buf_start, (end - start));
                        issued_len += end - start;
 
-                       /* I need number of bytes. */
                        nestbuf->b_bio1.bio_offset = (start - table_start);
-
                        table_en->target->strategy(table_en, nestbuf);
                }
        }
 
        if (issued_len < buf_len)
-               nestiobuf_done(bio, buf_len - issued_len, EINVAL);
-
+               nestiobuf_error(bio, EINVAL);
+       nestiobuf_start(bio);
        dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE);
        dm_dev_unbusy(dmv);
 
index b850be1..44ac46d 100644 (file)
@@ -232,9 +232,9 @@ dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
        struct bio *bio = &bp->b_bio1;
        struct buf *nestbuf;
        uint64_t blkno, blkoff;
-       uint64_t stripe, stripe_blknr;
+       uint64_t stripe, blknr;
        uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
-       int stripe_devnr;
+       int devnr;
 
        tsc = table_en->target_config;
        if (tsc == NULL)
@@ -243,46 +243,71 @@ dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
        /* calculate extent of request */
        KKASSERT(bp->b_resid % DEV_BSIZE == 0);
 
-       blkno = bp->b_bio1.bio_offset / DEV_BSIZE;
-       blkoff = 0;
-       num_blks = bp->b_resid / DEV_BSIZE;
-
-       for (;;) {
-               /* blockno to strip piece nr */
-               stripe = blkno / tsc->stripe_chunksize;
-               stripe_off = blkno % tsc->stripe_chunksize;
-
-               /* where we are inside the strip */
-               stripe_devnr = stripe % tsc->stripe_num;
-               stripe_blknr = stripe / tsc->stripe_num;
-
-               /* how much is left before we hit a boundary */
-               stripe_rest = tsc->stripe_chunksize - stripe_off;
-
-               /* issue this piece on stripe `stripe' */
-               issue_blks = MIN(stripe_rest, num_blks);
-               nestbuf = getpbuf(NULL);
-
-               nestiobuf_setup(bio, nestbuf, blkoff, issue_blks * DEV_BSIZE);
-
-               /* I need number of bytes. */
-               nestbuf->b_bio1.bio_offset =
-                       stripe_blknr * tsc->stripe_chunksize + stripe_off;
-               nestbuf->b_bio1.bio_offset +=
-                       tsc->stripe_devs[stripe_devnr].offset;
-               nestbuf->b_bio1.bio_offset *= DEV_BSIZE;
-
-               vn_strategy(tsc->stripe_devs[stripe_devnr].pdev->pdev_vnode,
-                           &nestbuf->b_bio1);
-
-               blkno += issue_blks;
-               blkoff += issue_blks * DEV_BSIZE;
-               num_blks -= issue_blks;
-
-               if (num_blks <= 0)
-                       break;
+       switch(bp->b_cmd) {
+       case BUF_CMD_READ:
+       case BUF_CMD_WRITE:
+       case BUF_CMD_FREEBLKS:
+               /*
+                * Loop through to individual operations
+                */
+               blkno = bp->b_bio1.bio_offset / DEV_BSIZE;
+               blkoff = 0;
+               num_blks = bp->b_resid / DEV_BSIZE;
+               nestiobuf_init(bio);
+
+               while (num_blks > 0) {
+                       /* blockno to strip piece nr */
+                       stripe = blkno / tsc->stripe_chunksize;
+                       stripe_off = blkno % tsc->stripe_chunksize;
+
+                       /* where we are inside the strip */
+                       devnr = stripe % tsc->stripe_num;
+                       blknr = stripe / tsc->stripe_num;
+
+                       /* how much is left before we hit a boundary */
+                       stripe_rest = tsc->stripe_chunksize - stripe_off;
+
+                       /* issue this piece on stripe `stripe' */
+                       issue_blks = MIN(stripe_rest, num_blks);
+                       nestbuf = getpbuf(NULL);
+
+                       nestiobuf_add(bio, nestbuf, blkoff,
+                                       issue_blks * DEV_BSIZE);
+
+                       /* I need number of bytes. */
+                       nestbuf->b_bio1.bio_offset =
+                               blknr * tsc->stripe_chunksize + stripe_off;
+                       nestbuf->b_bio1.bio_offset +=
+                               tsc->stripe_devs[devnr].offset;
+                       nestbuf->b_bio1.bio_offset *= DEV_BSIZE;
+
+                       vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
+                                   &nestbuf->b_bio1);
+
+                       blkno += issue_blks;
+                       blkoff += issue_blks * DEV_BSIZE;
+                       num_blks -= issue_blks;
+               }
+               nestiobuf_start(bio);
+               break;
+       case BUF_CMD_FLUSH:
+               nestiobuf_init(bio);
+               for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
+                       nestbuf = getpbuf(NULL);
+
+                       nestiobuf_add(bio, nestbuf, 0, 0);
+                       nestbuf->b_bio1.bio_offset = 0;
+                       vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
+                                   &nestbuf->b_bio1);
+               }
+               nestiobuf_start(bio);
+               break;
+       default:
+               bp->b_flags |= B_ERROR;
+               bp->b_error = EIO;
+               biodone(bio);
+               break;
        }
-
        return 0;
 }
 
index 5e41abc..eb0b199 100644 (file)
@@ -132,9 +132,7 @@ dm_target_zero_strategy(dm_table_entry_t * table_en, struct buf * bp)
        /* printf("Zero target read function called %d!!\n", bp->b_bcount); */
 
        memset(bp->b_data, 0, bp->b_bcount);
-       bp->b_resid = 0;        /* nestiobuf_done wants b_resid = 0 to be sure
-                                * that there is no other io to done  */
-
+       bp->b_resid = 0;
        biodone(&bp->b_bio1);
 
        return 0;
index 13b128b..c6bf4e3 100644 (file)
@@ -4551,28 +4551,81 @@ nestiobuf_done(struct bio *mbio, int donebytes, int error)
 
        mbp = mbio->bio_buf;    
 
-       /* If this buf didn't do anything, we are done. */
-       if (donebytes == 0)
-               return;
-
-       KKASSERT(mbp->b_resid >= donebytes);
+       KKASSERT((int)mbio->bio_driver_info > 0);
 
-       /* If an error occured, propagate it to the master buffer */
-       if (error)
+       /*
+        * If an error occured, propagate it to the master buffer.
+        *
+        * Several biodone()s may wind up running concurrently so
+        * use an atomic op to adjust b_flags.
+        */
+       if (error) {
                mbp->b_error = error;
+               atomic_set_int(&mbp->b_flags, B_ERROR);
+       }
 
        /*
         * Decrement the master buf b_resid according to our donebytes, and
         * also check if this is the last missing bit for the whole nestio
         * mess to complete. If so, call biodone() on the master buf mbp.
         */
-       if (atomic_fetchadd_int(&mbp->b_resid, -donebytes) == donebytes) {
+       if (atomic_fetchadd_int((int *)&mbio->bio_driver_info, -1) == 1) {
+               mbp->b_resid = 0;
                biodone(mbio);
        }
 }
 
 /*
- * nestiobuf_setup: setup a "nested" buffer.
+ * Initialize a nestiobuf for use.  Set an initial count of 1 to prevent
+ * the mbio from being biodone()'d while we are still adding sub-bios to
+ * it.
+ */
+void
+nestiobuf_init(struct bio *bio)
+{
+       bio->bio_driver_info = (void *)1;
+}
+
+/*
+ * The BIOs added to the nestedio have already been started, remove the
+ * count that placeheld our mbio and biodone() it if the count would
+ * transition to 0.
+ */
+void
+nestiobuf_start(struct bio *mbio)
+{
+       struct buf *mbp = mbio->bio_buf;
+
+       /*
+        * Decrement the master buf b_resid according to our donebytes, and
+        * also check if this is the last missing bit for the whole nestio
+        * mess to complete. If so, call biodone() on the master buf mbp.
+        */
+       if (atomic_fetchadd_int((int *)&mbio->bio_driver_info, -1) == 1) {
+               if (mbp->b_flags & B_ERROR)
+                       mbp->b_resid = mbp->b_bcount;
+               else
+                       mbp->b_resid = 0;
+               biodone(mbio);
+       }
+}
+
+/*
+ * Set an intermediate error prior to calling nestiobuf_start()
+ */
+void
+nestiobuf_error(struct bio *mbio, int error)
+{
+       struct buf *mbp = mbio->bio_buf;
+
+       if (error) {
+               mbp->b_error = error;
+               atomic_set_int(&mbp->b_flags, B_ERROR);
+       }
+}
+
+/*
+ * nestiobuf_add: setup a "nested" buffer.
  *
  * => 'mbp' is a "master" buffer which is being divided into sub pieces.
  * => 'bp' should be a buffer allocated by getiobuf.
@@ -4580,13 +4633,15 @@ nestiobuf_done(struct bio *mbio, int donebytes, int error)
  * => 'size' is a size in bytes of this nested buffer.
  */
 void
-nestiobuf_setup(struct bio *bio, struct buf *bp, int offset, size_t size)
+nestiobuf_add(struct bio *mbio, struct buf *bp, int offset, size_t size)
 {
-       struct buf *mbp = bio->bio_buf;
+       struct buf *mbp = mbio->bio_buf;
        struct vnode *vp = mbp->b_vp;
 
        KKASSERT(mbp->b_bcount >= offset + size);
 
+       atomic_add_int((int *)&mbio->bio_driver_info, 1);
+
        /* kernel needs to own the lock for it to be released in biodone */
        BUF_KERNPROC(bp);
        bp->b_vp = vp;
@@ -4597,7 +4652,7 @@ nestiobuf_setup(struct bio *bio, struct buf *bp, int offset, size_t size)
        bp->b_bufsize = bp->b_bcount;
 
        bp->b_bio1.bio_track = NULL;
-       bp->b_bio1.bio_caller_info1.ptr = bio;
+       bp->b_bio1.bio_caller_info1.ptr = mbio;
 }
 
 /*
index ea87d42..3678d83 100644 (file)
@@ -452,7 +452,10 @@ void       bio_ops_sync(struct mount *mp);
 void   vm_hold_free_pages(struct buf *bp, vm_offset_t from, vm_offset_t to);
 void   vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to);
 void   nestiobuf_done(struct bio *mbio, int donebytes, int error);
-void   nestiobuf_setup(struct bio *bio, struct buf *bp, int offset, size_t size);
+void   nestiobuf_init(struct bio *mbio);
+void   nestiobuf_add(struct bio *mbio, struct buf *bp, int off, size_t size);
+void   nestiobuf_start(struct bio *mbio);
+void   nestiobuf_error(struct bio *mbio, int error);
 #endif /* _KERNEL */
 #endif /* _KERNEL || _KERNEL_STRUCTURES */
 #endif /* !_SYS_BUF_H_ */