hammer2 - Fix critical bulkfree bug, refactor hammer2_io
authorMatthew Dillon <dillon@apollo.backplane.com>
Tue, 29 Aug 2017 21:55:42 +0000 (14:55 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Tue, 29 Aug 2017 21:55:42 +0000 (14:55 -0700)
* The bulkfree scan code was corrupting storage elements allocated beyond
  the 512GB mark due to not resetting the bulk duplicate subtree detection
  heuristic for each new storage range being evaluated.

  Fixed by zeroing out the heuristic in the ranging loop.

* Refactor hammer2_io.c.  This module was originally designed for completely
  asynchronous operation (with callbacks), but the backend chain code was
  subsequently redesigned to use kernel threads for each target device.  I/O
  operations in the kernel threads were issued synchronously.

  Remove all the asynchronous support code, which is like 50% of the module.
  Get rid of the IOCB mechanism entirely.  Simplify the hammer2_io structure
  and flags interactions and simplify the API.

* Add some temporary debugging to the unmount path.

sys/vfs/hammer2/hammer2.h
sys/vfs/hammer2/hammer2_bulkfree.c
sys/vfs/hammer2/hammer2_flush.c
sys/vfs/hammer2/hammer2_freemap.c
sys/vfs/hammer2/hammer2_io.c
sys/vfs/hammer2/hammer2_strategy.c
sys/vfs/hammer2/hammer2_vfsops.c

index 32e42ae..d07e0e2 100644 (file)
 #include "hammer2_ioctl.h"
 
 struct hammer2_io;
-struct hammer2_iocb;
 struct hammer2_chain;
 struct hammer2_cluster;
 struct hammer2_inode;
@@ -240,7 +239,6 @@ typedef uint32_t hammer2_xid_t;
 RB_HEAD(hammer2_chain_tree, hammer2_chain);
 TAILQ_HEAD(h2_flush_list, hammer2_chain);
 TAILQ_HEAD(h2_core_list, hammer2_chain);
-TAILQ_HEAD(h2_iocb_list, hammer2_iocb);
 
 #define CHAIN_CORE_DELETE_BMAP_ENTRIES \
        (HAMMER2_PBUFSIZE / sizeof(hammer2_blockref_t) / sizeof(uint32_t))
@@ -261,34 +259,6 @@ typedef struct hammer2_chain_core hammer2_chain_core_t;
 
 RB_HEAD(hammer2_io_tree, hammer2_io);
 
-/*
- * IOCB - IO callback (into chain, cluster, or manual request)
- */
-struct hammer2_iocb {
-       TAILQ_ENTRY(hammer2_iocb) entry;
-       void (*callback)(struct hammer2_iocb *iocb);
-       struct hammer2_io       *dio;
-       struct hammer2_chain    *chain;
-       void                    *ptr;
-       off_t                   lbase;
-       int                     lsize;
-       uint32_t                flags;
-       int                     error;
-       int                     btype;
-};
-
-typedef struct hammer2_iocb hammer2_iocb_t;
-
-#define HAMMER2_IOCB_INTERLOCK 0x00000001
-#define HAMMER2_IOCB_ONQ       0x00000002
-#define HAMMER2_IOCB_DONE      0x00000004
-#define HAMMER2_IOCB_INPROG    0x00000008
-#define HAMMER2_IOCB_UNUSED10  0x00000010
-#define HAMMER2_IOCB_QUICK     0x00010000
-#define HAMMER2_IOCB_ZERO      0x00020000
-#define HAMMER2_IOCB_READ      0x00040000
-#define HAMMER2_IOCB_WAKEUP    0x00080000
-
 /*
  * DIO - Management structure wrapping system buffer cache.
  *
@@ -298,8 +268,6 @@ typedef struct hammer2_iocb hammer2_iocb_t;
  */
 struct hammer2_io {
        RB_ENTRY(hammer2_io) rbnode;    /* indexed by device offset */
-       struct h2_iocb_list iocbq;
-       struct spinlock spin;
        struct hammer2_dev *hmp;
        struct buf      *bp;
        off_t           pbase;
@@ -308,6 +276,8 @@ struct hammer2_io {
        int             act;            /* activity */
        int             btype;          /* approximate BREF_TYPE_* */
        int             ticks;
+       int             error;
+       int             unused01;
        uint64_t        dedup_valid;    /* valid for dedup operation */
        uint64_t        dedup_alloc;    /* allocated / de-dupable */
 };
@@ -613,7 +583,6 @@ struct hammer2_cluster {
        int                     nchains;
        int                     error;          /* error code valid on lock */
        int                     focus_index;
-       hammer2_iocb_t          iocb;
        hammer2_chain_t         *focus;         /* current focus (or mod) */
        hammer2_cluster_item_t  array[HAMMER2_MAXCLUSTER];
 };
@@ -1538,35 +1507,27 @@ void hammer2_io_putblk(hammer2_io_t **diop);
 void hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes);
 void hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree);
 char *hammer2_io_data(hammer2_io_t *dio, off_t lbase);
-hammer2_io_t *hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize,
-                               int notgood);
-void hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
-                               hammer2_iocb_t *iocb);
+hammer2_io_t *hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
+                               int lsize, int op);
 void hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref);
 void hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
                                hammer2_off_t data_off, u_int bytes);
 void hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off,
                                u_int bytes);
-void hammer2_io_complete(hammer2_iocb_t *iocb);
 void hammer2_io_callback(struct bio *bio);
-void hammer2_iocb_wait(hammer2_iocb_t *iocb);
 int hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
                                hammer2_io_t **diop);
 int hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
                                hammer2_io_t **diop);
-void hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize);
 int hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
                                hammer2_io_t **diop);
+hammer2_io_t *hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize);
 void hammer2_io_bawrite(hammer2_io_t **diop);
 void hammer2_io_bdwrite(hammer2_io_t **diop);
 int hammer2_io_bwrite(hammer2_io_t **diop);
-int hammer2_io_isdirty(hammer2_io_t *dio);
 void hammer2_io_setdirty(hammer2_io_t *dio);
 void hammer2_io_brelse(hammer2_io_t **diop);
 void hammer2_io_bqrelse(hammer2_io_t **diop);
-int hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp);
-void hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask);
-void hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask);
 
 /*
  * hammer2_thread.c
index b457208..2c7aba9 100644 (file)
@@ -122,17 +122,18 @@ hammer2_bulk_scan(hammer2_chain_t *parent,
         * imploding.
         */
        while ((doabort & HAMMER2_BULK_ABORT) == 0 &&
-              hammer2_chain_scan(parent, &chain, &bref, &first,
-                                 &cache_index,
-                                 HAMMER2_LOOKUP_NODATA |
-                                 HAMMER2_LOOKUP_SHARED) != NULL) {
+               hammer2_chain_scan(parent, &chain, &bref, &first,
+                                  &cache_index,
+                                  HAMMER2_LOOKUP_NODATA |
+                                  HAMMER2_LOOKUP_SHARED) != NULL) {
                /*
                 * Process bref, chain is only non-NULL if the bref
                 * might be recursable (its possible that we sometimes get
                 * a non-NULL chain where the bref cannot be recursed).
                 */
 #if 0
-               kprintf("SCAN %016jx\n", bref.data_off);
+               kprintf("SCAN %p/%p %016jx.%02x\n",
+                       parent, chain, bref.data_off, bref.type);
                int xerr = tsleep(&info->pri, PCATCH, "slp", hz / 10);
                if (xerr == EINTR || xerr == ERESTART) {
                        doabort |= HAMMER2_BULK_ABORT;
@@ -251,7 +252,7 @@ static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo,
 static void h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo);
 static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
                        hammer2_off_t data_off, hammer2_bmap_data_t *live,
-                       hammer2_bmap_data_t *bmap);
+                       hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base);
 
 void
 hammer2_bulkfree_init(hammer2_dev_t *hmp)
@@ -361,8 +362,16 @@ hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
                /*
                 * We have enough ram to represent (incr) bytes of storage.
                 * Each 64KB of ram represents 2GB of storage.
+                *
+                * We must also clean out our de-duplication heuristic for
+                * each (incr) bytes of storage, otherwise we wind up not
+                * scanning meta-data for later areas of storage because
+                * they had already been scanned in earlier areas of storage.
+                * Since the ranging is different, we have to restart
+                * the dedup heuristic too.
                 */
                cbinfo_bmap_init(&cbinfo, size);
+               bzero(cbinfo.dedup, sizeof(*cbinfo.dedup));
                incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE *
                       HAMMER2_FREEMAP_LEVEL1_SIZE;
                if (hmp->voldata.volu_size - cbinfo.sbase < incr)
@@ -797,7 +806,10 @@ h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
                hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0);
                live = &live_chain->data->bmdata[bmapindex];
 
-               h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap);
+               h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
+                                       live_chain->bref.key +
+                                       bmapindex *
+                                       HAMMER2_FREEMAP_LEVEL0_SIZE);
 next:
                data_off += HAMMER2_FREEMAP_LEVEL0_SIZE;
                ++bmap;
@@ -819,7 +831,7 @@ static
 void
 h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
                        hammer2_off_t data_off, hammer2_bmap_data_t *live,
-                       hammer2_bmap_data_t *bmap)
+                       hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base)
 {
        int bindex;
        int scount;
@@ -937,6 +949,10 @@ h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
                /*
                 * Completely empty, reset entire segment
                 */
+#if 0
+               kprintf("hammer2: cleanseg %016jx.%04x (%d)\n",
+                       alloc_base, live->class, live->avail);
+#endif
                live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
                live->class = 0;
                live->linear = 0;
index 66589c4..30987e3 100644 (file)
@@ -1303,6 +1303,8 @@ hammer2_inode_xop_flush(hammer2_thread_t *thr, hammer2_xop_t *arg)
        vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
        vn_unlock(hmp->devvp);
+       if (error)
+               kprintf("error %d cannot sync %s\n", error, hmp->devrepname);
 
        /*
         * The flush code sets CHAIN_VOLUMESYNC to indicate that the
index 92e52a9..097b364 100644 (file)
@@ -670,9 +670,12 @@ success:
 #endif
 
                if ((bmap->bitmapq[i] & pbmmask) == 0) {
-                       hammer2_io_newq(hmp, HAMMER2_BREF_TYPE_FREEMAP_LEAF,
+                       hammer2_io_t *dio;
+
+                       hammer2_io_newnz(hmp, class >> 8,
                                        (*basep + (offset & ~pmask)) |
-                                       pradix, psize);
+                                       pradix, psize, &dio);
+                       hammer2_io_putblk(&dio);
                }
        }
 
index 80a19d5..68e38ec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
+ * Copyright (c) 2013-2017 The DragonFly Project.  All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
  * by Matthew Dillon <dillon@dragonflybsd.org>
 
 #include "hammer2.h"
 
+#define HAMMER2_DOP_READ       1
+#define HAMMER2_DOP_NEW                2
+#define HAMMER2_DOP_NEWNZ      3
+#define HAMMER2_DOP_READQ      4
+
 /*
  * Implements an abstraction layer for synchronous and asynchronous
  * buffered device I/O.  Can be used as an OS-abstraction but the main
@@ -65,6 +70,7 @@ struct hammer2_cleanupcb_info {
        int     count;
 };
 
+#if 0
 static __inline
 uint64_t
 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
@@ -87,10 +93,7 @@ hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
 
        return mask;
 }
-
-#define HAMMER2_GETBLK_GOOD    0
-#define HAMMER2_GETBLK_QUEUED  1
-#define HAMMER2_GETBLK_OWNED   2
+#endif
 
 /*
  * Returns the DIO corresponding to the data|radix, creating it if necessary.
@@ -101,13 +104,14 @@ hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
 static __inline
 hammer2_io_t *
 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
-                int createit)
+                int createit, int *isgoodp)
 {
        hammer2_io_t *dio;
        hammer2_io_t *xio;
        hammer2_key_t lbase;
        hammer2_key_t pbase;
        hammer2_key_t pmask;
+       uint64_t refs;
        int lsize;
        int psize;
 
@@ -122,6 +126,7 @@ hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
                        pbase, lbase, lsize, pmask);
        }
        KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
+       *isgoodp = 0;
 
        /*
         * Access/Allocate the DIO, bump dio->refs to prevent destruction.
@@ -129,32 +134,34 @@ hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
        hammer2_spin_sh(&hmp->io_spin);
        dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
        if (dio) {
-               if ((atomic_fetchadd_64(&dio->refs, 1) &
-                    HAMMER2_DIO_MASK) == 0) {
+               refs = atomic_fetchadd_64(&dio->refs, 1);
+               if ((refs & HAMMER2_DIO_MASK) == 0) {
                        atomic_add_int(&dio->hmp->iofree_count, -1);
                }
+               if (refs & HAMMER2_DIO_GOOD)
+                       *isgoodp = 1;
                hammer2_spin_unsh(&hmp->io_spin);
        } else if (createit) {
+               refs = 0;
                hammer2_spin_unsh(&hmp->io_spin);
                dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
                dio->hmp = hmp;
                dio->pbase = pbase;
                dio->psize = psize;
                dio->btype = btype;
-               dio->refs = 1;
+               dio->refs = refs + 1;
                dio->act = 5;
-               hammer2_spin_init(&dio->spin, "h2dio");
-               TAILQ_INIT(&dio->iocbq);
                hammer2_spin_ex(&hmp->io_spin);
                xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
                if (xio == NULL) {
                        atomic_add_int(&hammer2_dio_count, 1);
                        hammer2_spin_unex(&hmp->io_spin);
                } else {
-                       if ((atomic_fetchadd_64(&xio->refs, 1) &
-                            HAMMER2_DIO_MASK) == 0) {
+                       refs = atomic_fetchadd_64(&xio->refs, 1);
+                       if ((refs & HAMMER2_DIO_MASK) == 0)
                                atomic_add_int(&xio->hmp->iofree_count, -1);
-                       }
+                       if (refs & HAMMER2_DIO_GOOD)
+                               *isgoodp = 1;
                        hammer2_spin_unex(&hmp->io_spin);
                        kfree(dio, M_HAMMER2);
                        dio = xio;
@@ -171,359 +178,182 @@ hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
 }
 
 /*
- * Allocate/Locate the requested dio, reference it, issue or queue iocb.
+ * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
+ * a buffer.  If set the buffer already exists and is good to go.
  */
-void
-hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize,
-                 hammer2_iocb_t *iocb)
+hammer2_io_t *
+hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, int op)
 {
        hammer2_io_t *dio;
-       uint64_t refs;
+       off_t peof;
+       uint64_t orefs;
+       uint64_t nrefs;
+       int isgood;
+       int error;
+       int hce;
 
        KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
-       dio = hammer2_io_alloc(hmp, lbase, iocb->btype, 1);
 
-       iocb->dio = dio;
+       if (op == HAMMER2_DOP_READQ) {
+               dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
+               if (dio == NULL)
+                       return NULL;
+               op = HAMMER2_DOP_READ;
+       } else {
+               dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
+       }
 
        for (;;) {
-               refs = dio->refs;
+               orefs = dio->refs;
                cpu_ccfence();
 
                /*
-                * Issue the iocb immediately if the buffer is already good.
-                * Once set GOOD cannot be cleared until refs drops to 0.
-                *
-                * lfence required because dio's are not interlocked for
-                * the DIO_GOOD test.
+                * Buffer is already good, handle the op and return.
                 */
-               if (refs & HAMMER2_DIO_GOOD) {
-                       cpu_lfence();
-                       iocb->callback(iocb);
-                       break;
+               if (orefs & HAMMER2_DIO_GOOD) {
+                       if (isgood == 0)
+                               cpu_mfence();
+
+                       switch(op) {
+                       case HAMMER2_DOP_NEW:
+                               bzero(hammer2_io_data(dio, lbase), lsize);
+                               /* fall through */
+                       case HAMMER2_DOP_NEWNZ:
+                               atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
+                               break;
+                       case HAMMER2_DOP_READ:
+                       default:
+                               /* nothing to do */
+                               break;
+                       }
+                       return (dio);
                }
 
                /*
-                * Try to own the DIO by setting INPROG so we can issue
-                * I/O on it.
+                * Try to own the DIO
                 */
-               if (refs & HAMMER2_DIO_INPROG) {
-                       /*
-                        * If DIO_INPROG is already set then set WAITING and
-                        * queue the iocb.
-                        */
-                       hammer2_spin_ex(&dio->spin);
-                       if (atomic_cmpset_64(&dio->refs, refs,
-                                             refs | HAMMER2_DIO_WAITING)) {
-                               iocb->flags |= HAMMER2_IOCB_ONQ |
-                                              HAMMER2_IOCB_INPROG;
-                               TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry);
-                               hammer2_spin_unex(&dio->spin);
-                               break;
+               if (orefs & HAMMER2_DIO_INPROG) {
+                       nrefs = orefs | HAMMER2_DIO_WAITING;
+                       tsleep_interlock(dio, 0);
+                       if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
+                               tsleep(dio, PINTERLOCKED, "h2dio", hz);
                        }
-                       hammer2_spin_unex(&dio->spin);
                        /* retry */
                } else {
-                       /*
-                        * If DIO_INPROG is not set then set it and issue the
-                        * callback immediately to start I/O.
-                        */
-                       if (atomic_cmpset_64(&dio->refs, refs,
-                                             refs | HAMMER2_DIO_INPROG)) {
-                               iocb->flags |= HAMMER2_IOCB_INPROG;
-                               iocb->callback(iocb);
+                       nrefs = orefs | HAMMER2_DIO_INPROG;
+                       if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
                                break;
                        }
-                       /* retry */
                }
-               /* retry */
        }
-}
-
-/*
- * Quickly obtain a good DIO buffer, return NULL if the system no longer
- * caches the data.
- */
-hammer2_io_t *
-hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize, int notgood)
-{
-       hammer2_iocb_t iocb;
-       hammer2_io_t *dio;
-       struct buf *bp;
-       off_t pbase;
-       off_t pmask;
-       int psize = HAMMER2_PBUFSIZE;
-       uint64_t orefs;
-       uint64_t nrefs;
-
-       pmask = ~(hammer2_off_t)(psize - 1);
-
-       KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
-       lbase &= ~HAMMER2_OFF_MASK_RADIX;
-       pbase = lbase & pmask;
-       if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
-               kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
-                       pbase, lbase, lsize, pmask);
-       }
-       KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
 
        /*
-        * Access/Allocate the DIO, bump dio->refs to prevent destruction.
+        * We break to here if GOOD is not set and we acquired INPROG for
+        * the I/O.
         */
-       hammer2_spin_sh(&hmp->io_spin);
-       dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
-       if (dio == NULL) {
-               hammer2_spin_unsh(&hmp->io_spin);
-               return NULL;
-       }
-
-       if ((atomic_fetchadd_64(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0)
-               atomic_add_int(&dio->hmp->iofree_count, -1);
-       hammer2_spin_unsh(&hmp->io_spin);
-
-       dio->ticks = ticks;
-       if (dio->act < 10)
-               ++dio->act;             /* SMP race ok */
-
-       /*
-        * Obtain/validate the buffer.  Do NOT issue I/O.  Discard if
-        * the system does not have the data already cached.
-        */
-       nrefs = (uint64_t)-1;
-       for (;;) {
-               orefs = dio->refs;
-               cpu_ccfence();
-
-               /*
-                * Issue the iocb immediately if the buffer is already good.
-                * Once set GOOD cannot be cleared until refs drops to 0.
-                *
-                * lfence required because dio is not interlockedf for
-                * the DIO_GOOD test.
-                */
-               if (orefs & HAMMER2_DIO_GOOD) {
-                       cpu_lfence();
-                       break;
-               }
-
-               /*
-                * Try to own the DIO by setting INPROG so we can issue
-                * I/O on it.  INPROG might already be set, in which case
-                * there is no way we can do this non-blocking so we punt.
-                */
-               if ((orefs & HAMMER2_DIO_INPROG))
+       KKASSERT(dio->bp == NULL);
+
+       error = 0;
+       if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
+           dio->psize == lsize) {
+               switch(op) {
+               case HAMMER2_DOP_NEW:
+               case HAMMER2_DOP_NEWNZ:
+                       dio->bp = getblk(dio->hmp->devvp,
+                                        dio->pbase, dio->psize,
+                                        0, 0);
+                       if (op == HAMMER2_DOP_NEW)
+                               bzero(dio->bp->b_data, dio->psize);
+                       atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
                        break;
-               nrefs = orefs | HAMMER2_DIO_INPROG;
-               if (atomic_cmpset_64(&dio->refs, orefs, nrefs) == 0)
-                       continue;
-
-               /*
-                * We own DIO_INPROG, try to set DIO_GOOD.
-                *
-                * If (notgood) specified caller just wants the dio and doesn't
-                * care about the buffer a whole lot.  However, if the buffer
-                * is good (or dirty), we still want to return it.
-                *
-                * Otherwise we are trying to resolve a dedup and bread()
-                * is expected to always be better than building a new buffer
-                * that will be written.  Use bread() for better determinism
-                * than getblk().
-                */
-               bp = dio->bp;
-               dio->bp = NULL;
-               if (bp == NULL) {
-                       if (notgood)
-                               bp = getblk(hmp->devvp, dio->pbase,
-                                           dio->psize, 0, 0);
-                       else
-                               bread(hmp->devvp, dio->pbase, dio->psize, &bp);
-               }
-
-               /*
-                * System buffer must also have remained cached.
-                */
-               if (bp) {
-                       if ((bp->b_flags & B_ERROR) == 0 &&
-                           (bp->b_flags & B_CACHE)) {
-                               dio->bp = bp;   /* assign BEFORE setting flag */
-                               atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD);
+               case HAMMER2_DOP_READ:
+               default:
+                       if ((hce = hammer2_cluster_read) > 0) {
+                               /*
+                                * Synchronous cluster I/O for now.
+                                */
+                               peof = (dio->pbase + HAMMER2_SEGMASK64) &
+                                      ~HAMMER2_SEGMASK64;
+                               error = cluster_read(dio->hmp->devvp,
+                                                    peof, dio->pbase,
+                                                    dio->psize,
+                                                    dio->psize,
+                                                    HAMMER2_PBUFSIZE*hce,
+                                                    &dio->bp);
                        } else {
-                               bqrelse(bp);
-                               bp = NULL;
+                               error = bread(dio->hmp->devvp, dio->pbase,
+                                             dio->psize, &dio->bp);
                        }
                }
-
-               /*
-                * Clear DIO_INPROG.
-                *
-                * This is actually a bit complicated, see
-                * hammer2_io_complete() for more information.
-                */
-               iocb.dio = dio;
-               iocb.flags = HAMMER2_IOCB_INPROG;
-               hammer2_io_complete(&iocb);
-               break;
-       }
-
-       /*
-        * Only return the dio if its buffer is good.  If notgood != 0,
-        * we return the buffer regardless (so ephermal dedup bits can be
-        * cleared).
-        */
-       if (notgood == 0 && (dio->refs & HAMMER2_DIO_GOOD) == 0) {
-               hammer2_io_putblk(&dio);
-       }
-       return dio;
-}
-
-/*
- * The originator of the iocb is finished with it.
- *
- * WARNING: iocb may be partially initialized with only iocb->dio and
- *         iocb->flags.
- */
-void
-hammer2_io_complete(hammer2_iocb_t *iocb)
-{
-       hammer2_io_t *dio = iocb->dio;
-       hammer2_iocb_t *cbtmp;
-       uint64_t orefs;
-       uint64_t nrefs;
-       uint32_t oflags;
-       uint32_t nflags;
-
-       /*
-        * If IOCB_INPROG was not set completion is synchronous due to the
-        * buffer already being good.  We can simply set IOCB_DONE and return.
-        *
-        * In this situation DIO_INPROG is not set and we have no visibility
-        * on dio->bp.  We should not try to mess with dio->bp because another
-        * thread may be finishing up its processing.  dio->bp should already
-        * be set to BUF_KERNPROC()!
-        */
-       if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) {
-               atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE);
-               return;
-       }
-
-       /*
-        * The iocb was queued, obtained DIO_INPROG, and its callback was
-        * made.  The callback is now complete.  We still own DIO_INPROG.
-        *
-        * We can set DIO_GOOD if no error occurred, which gives certain
-        * stability guarantees to dio->bp and allows other accessors to
-        * short-cut access.  DIO_GOOD cannot be cleared until the last
-        * ref is dropped.
-        */
-       KKASSERT(dio->refs & HAMMER2_DIO_INPROG);
-       if (dio->bp) {
-               BUF_KERNPROC(dio->bp);
-               if ((dio->bp->b_flags & B_ERROR) == 0) {
-                       KKASSERT(dio->bp->b_flags & B_CACHE);
-                       atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD);
+       } else {
+               if ((hce = hammer2_cluster_read) > 0) {
+                       /*
+                        * Synchronous cluster I/O for now.
+                        */
+                       peof = (dio->pbase + HAMMER2_SEGMASK64) &
+                              ~HAMMER2_SEGMASK64;
+                       error = cluster_read(dio->hmp->devvp,
+                                            peof, dio->pbase,
+                                            dio->psize,
+                                            dio->psize,
+                                            HAMMER2_PBUFSIZE*hce,
+                                            &dio->bp);
+               } else {
+                       error = bread(dio->hmp->devvp, dio->pbase,
+                                     dio->psize, &dio->bp);
                }
-       }
-
-       /*
-        * Clean up the dio before marking the iocb as being done.  If another
-        * iocb is pending we chain to it while leaving DIO_INPROG set (it
-        * will call io completion and presumably clear DIO_INPROG).
-        *
-        * Otherwise if no other iocbs are pending we clear DIO_INPROG before
-        * finishing up the cbio.  This means that DIO_INPROG is cleared at
-        * the end of the chain before ANY of the cbios are marked done.
-        *
-        * NOTE: The TAILQ is not stable until the spin-lock is held.
-        */
-       for (;;) {
-               orefs = dio->refs;
-               nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG);
-
-               if (orefs & HAMMER2_DIO_WAITING) {
-                       hammer2_spin_ex(&dio->spin);
-                       cbtmp = TAILQ_FIRST(&dio->iocbq);
-                       if (cbtmp) {
-                               /*
-                                * NOTE: flags not adjusted in this case.
-                                *       Flags will be adjusted by the last
-                                *       iocb.
-                                */
-                               TAILQ_REMOVE(&dio->iocbq, cbtmp, entry);
-                               hammer2_spin_unex(&dio->spin);
-                               cbtmp->callback(cbtmp); /* chained */
+               if (dio->bp) {
+                       switch(op) {
+                       case HAMMER2_DOP_NEW:
+                               bzero(hammer2_io_data(dio, lbase), lsize);
+                               /* fall through */
+                       case HAMMER2_DOP_NEWNZ:
+                               atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
                                break;
-                       } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
-                               hammer2_spin_unex(&dio->spin);
+                       case HAMMER2_DOP_READ:
+                       default:
                                break;
                        }
-                       hammer2_spin_unex(&dio->spin);
-                       /* retry */
-               } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
-                       break;
-               } /* else retry */
-               /* retry */
+               }
        }
 
+       if (dio->bp)
+               BUF_KERNPROC(dio->bp);
+       dio->error = error;
+
        /*
-        * Mark the iocb as done and wakeup any waiters.  This is done after
-        * all iocb chains have been called back and after DIO_INPROG has been
-        * cleared.  This avoids races against ref count drops by the waiting
-        * threads (a hard but not impossible SMP race) which might result in
-        * a 1->0 transition of the refs while DIO_INPROG is still set.
+        * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
         */
        for (;;) {
-               oflags = iocb->flags;
+               orefs = dio->refs;
                cpu_ccfence();
-               nflags = oflags;
-               nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG);
-               nflags |= HAMMER2_IOCB_DONE;
-
-               if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
-                       if (oflags & HAMMER2_IOCB_WAKEUP)
-                               wakeup(iocb);
-                       /* SMP: iocb is now stale */
+               nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
+               if (error == 0)
+                       nrefs |= HAMMER2_DIO_GOOD;
+               if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
+                       if (orefs & HAMMER2_DIO_WAITING)
+                               wakeup(dio);
                        break;
                }
-               /* retry */
+               cpu_pause();
        }
-       iocb = NULL;
 
-}
-
-/*
- * Wait for an iocb's I/O to finish.
- */
-void
-hammer2_iocb_wait(hammer2_iocb_t *iocb)
-{
-       uint32_t oflags;
-       uint32_t nflags;
-
-       for (;;) {
-               oflags = iocb->flags;
-               cpu_ccfence();
-               nflags = oflags | HAMMER2_IOCB_WAKEUP;
-               if (oflags & HAMMER2_IOCB_DONE)
-                       break;
-               tsleep_interlock(iocb, 0);
-               if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) {
-                       tsleep(iocb, PINTERLOCKED, "h2iocb", hz);
-               }
-       }
+       /* XXX error handling */
 
+       return dio;
 }
 
 /*
  * Release our ref on *diop.
  *
- * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG,
- * then dispose of the underlying buffer.
+ * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
+ * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
  */
 void
 hammer2_io_putblk(hammer2_io_t **diop)
 {
        hammer2_dev_t *hmp;
        hammer2_io_t *dio;
-       hammer2_iocb_t iocb;
        struct buf *bp;
        off_t peof;
        off_t pbase;
@@ -541,41 +371,42 @@ hammer2_io_putblk(hammer2_io_t **diop)
        /*
         * Drop refs.
         *
-        * On the 1->0 transition clear flags and set INPROG.
-        *
-        * On the 1->0 transition if INPROG is already set, another thread
-        * is in lastdrop and we can just return after the transition.
-        *
-        * On any other transition we can generally just return.
+        * On the 1->0 transition clear GOOD and set INPROG, and break.
+        * On any other transition we can return early.
         */
        for (;;) {
                orefs = dio->refs;
                cpu_ccfence();
-               nrefs = orefs - 1;
 
                if ((orefs & HAMMER2_DIO_MASK) == 1 &&
                    (orefs & HAMMER2_DIO_INPROG) == 0) {
                        /*
                         * Lastdrop case, INPROG can be set.
                         */
+                       nrefs = orefs - 1;
                        nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
                        nrefs |= HAMMER2_DIO_INPROG;
                        if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
                                break;
                } else if ((orefs & HAMMER2_DIO_MASK) == 1) {
                        /*
-                        * Lastdrop case, INPROG already set.
+                        * Lastdrop case, INPROG already set.  We must
+                        * wait for INPROG to clear.
                         */
+                       nrefs = orefs | HAMMER2_DIO_WAITING;
+                       tsleep_interlock(dio, 0);
                        if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
-                               atomic_add_int(&hmp->iofree_count, 1);
-                               return;
+                               tsleep(dio, PINTERLOCKED, "h2dio", hz);
                        }
+                       /* retry */
                } else {
                        /*
                         * Normal drop case.
                         */
+                       nrefs = orefs - 1;
                        if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
                                return;
+                       /* retry */
                }
                cpu_pause();
                /* retry */
@@ -596,16 +427,10 @@ hammer2_io_putblk(hammer2_io_t **diop)
        bp = dio->bp;
        dio->bp = NULL;
 
-       if (orefs & HAMMER2_DIO_GOOD) {
-               KKASSERT(bp != NULL);
-#if 0
-               if (hammer2_inval_enable &&
-                   (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) {
-                       ++hammer2_iod_invals;
-                       bp->b_flags |= B_INVAL | B_RELBUF;
-                       brelse(bp);
-               } else
-#endif
+       if ((orefs & HAMMER2_DIO_GOOD) && bp) {
+               /*
+                * Non-errored disposal of bp
+                */
                if (orefs & HAMMER2_DIO_DIRTY) {
                        int hce;
 
@@ -634,37 +459,33 @@ hammer2_io_putblk(hammer2_io_t **diop)
                        bqrelse(bp);
                }
        } else if (bp) {
-#if 0
-               if (hammer2_inval_enable &&
-                   (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) {
-                       ++hammer2_iod_invals;
-                       bp->b_flags |= B_INVAL | B_RELBUF;
-                       brelse(bp);
-               } else
-#endif
-               if (orefs & HAMMER2_DIO_DIRTY) {
-                       dio_write_stats_update(dio, bp);
-                       bdwrite(bp);
-               } else {
-                       bqrelse(bp);
-               }
+               /*
+                * Errored disposal of bp
+                */
+               brelse(bp);
        }
 
        /*
-        * The instant we call io_complete dio is a free agent again and
-        * can be ripped out from under us.
-        *
-        * we can cleanup our final DIO_INPROG by simulating an iocb
-        * completion.
+        * Update iofree_count before disposing of the dio
         */
-       hmp = dio->hmp;                         /* extract fields */
+       hmp = dio->hmp;
        atomic_add_int(&hmp->iofree_count, 1);
-       cpu_ccfence();
 
-       iocb.dio = dio;
-       iocb.flags = HAMMER2_IOCB_INPROG;
-       hammer2_io_complete(&iocb);
-       dio = NULL;                             /* dio stale */
+       /*
+        * Clear INPROG, GOOD, and WAITING
+        */
+       for (;;) {
+               orefs = dio->refs;
+               cpu_ccfence();
+               nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
+                                 HAMMER2_DIO_WAITING);
+               if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
+                       if (orefs & HAMMER2_DIO_WAITING)
+                               wakeup(dio);
+                       break;
+               }
+               cpu_pause();
+       }
 
        /*
         * We cache free buffers so re-use cases can use a shared lock, but
@@ -734,6 +555,11 @@ hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
                RB_REMOVE(hammer2_io_tree, tree, dio);
                KKASSERT(dio->bp == NULL &&
                    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
+               if (dio->refs & HAMMER2_DIO_DIRTY) {
+                       kprintf("hammer2_io_cleanup: Dirty buffer "
+                               "%016jx/%d (bp=%p)\n",
+                               dio->pbase, dio->psize, dio->bp);
+               }
                kfree(dio, M_HAMMER2);
                atomic_add_int(&hammer2_dio_count, -1);
                atomic_add_int(&hmp->iofree_count, -1);
@@ -756,304 +582,37 @@ hammer2_io_data(hammer2_io_t *dio, off_t lbase)
        return(bp->b_data + off);
 }
 
-#if 0
-/*
- * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done
- * in the chain structure, but chain structure needs to be persistent as
- * well on refs=0 and it isn't.
- */
-int
-hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp)
-{
-       hammer2_io_t *dio;
-       uint64_t mask;
-
-       if ((dio = chain->dio) != NULL && chain->bytes >= 1024) {
-               mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes);
-               *maskp = mask;
-               if ((dio->crc_good_mask & mask) == mask)
-                       return 1;
-               return 0;
-       }
-       *maskp = 0;
-
-       return 0;
-}
-
-void
-hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask)
-{
-       if (dio) {
-               if (sizeof(long) == 8) {
-                       atomic_set_long(&dio->crc_good_mask, mask);
-               } else {
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-                       atomic_set_int(&((int *)&dio->crc_good_mask)[0],
-                                       (uint32_t)mask);
-                       atomic_set_int(&((int *)&dio->crc_good_mask)[1],
-                                       (uint32_t)(mask >> 32));
-#else
-                       atomic_set_int(&((int *)&dio->crc_good_mask)[0],
-                                       (uint32_t)(mask >> 32));
-                       atomic_set_int(&((int *)&dio->crc_good_mask)[1],
-                                       (uint32_t)mask);
-#endif
-               }
-       }
-}
-
-void
-hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask)
-{
-       if (dio) {
-               if (sizeof(long) == 8) {
-                       atomic_clear_long(&dio->crc_good_mask, mask);
-               } else {
-#if _BYTE_ORDER == _LITTLE_ENDIAN
-                       atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
-                                       (uint32_t)mask);
-                       atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
-                                       (uint32_t)(mask >> 32));
-#else
-                       atomic_clear_int(&((int *)&dio->crc_good_mask)[0],
-                                       (uint32_t)(mask >> 32));
-                       atomic_clear_int(&((int *)&dio->crc_good_mask)[1],
-                                       (uint32_t)mask);
-#endif
-               }
-       }
-}
-#endif
-
-/*
- * Helpers for hammer2_io_new*() functions
- */
-static
-void
-hammer2_iocb_new_callback(hammer2_iocb_t *iocb)
-{
-       hammer2_io_t *dio = iocb->dio;
-       int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0;
-
-       /*
-        * If IOCB_INPROG is not set the dio already has a good buffer and we
-        * can't mess with it other than zero the requested range.
-        *
-        * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
-        * do what needs to be done with dio->bp.
-        */
-       if (iocb->flags & HAMMER2_IOCB_INPROG) {
-               if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
-                       if (iocb->lsize == dio->psize) {
-                               /*
-                                * Fully covered buffer, try to optimize to
-                                * avoid any I/O.  We might already have the
-                                * buffer due to iocb chaining.
-                                */
-                               if (dio->bp == NULL) {
-                                       dio->bp = getblk(dio->hmp->devvp,
-                                                        dio->pbase, dio->psize,
-                                                        gbctl, 0);
-                               }
-                               if (dio->bp) {
-                                       vfs_bio_clrbuf(dio->bp);
-                                       dio->bp->b_flags |= B_CACHE;
-                               }
-                       } else if (iocb->flags & HAMMER2_IOCB_QUICK) {
-                               /*
-                                * Partial buffer, quick mode.  Do nothing.
-                                * Do not instantiate the buffer or try to
-                                * mark it B_CACHE because other portions of
-                                * the buffer might have to be read by other
-                                * accessors.
-                                */
-                       } else if (dio->bp == NULL ||
-                                  (dio->bp->b_flags & B_CACHE) == 0) {
-                               /*
-                                * Partial buffer, normal mode, requires
-                                * read-before-write.  Chain the read.
-                                *
-                                * We might already have the buffer due to
-                                * iocb chaining.  XXX unclear if we really
-                                * need to write/release it and reacquire
-                                * in that case.
-                                *
-                                * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE.
-                                */
-                               if (dio->bp) {
-                                       if (dio->refs & HAMMER2_DIO_DIRTY) {
-                                               dio_write_stats_update(dio,
-                                                                      dio->bp);
-                                               bdwrite(dio->bp);
-                                       } else {
-                                               bqrelse(dio->bp);
-                                       }
-                                       dio->bp = NULL;
-                               }
-                               atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ);
-                               breadcb(dio->hmp->devvp,
-                                       dio->pbase, dio->psize,
-                                       hammer2_io_callback, iocb);
-                               return;
-                       } /* else buffer is good */
-               } /* else callback from breadcb is complete */
-       }
-       if (dio->bp) {
-               if (iocb->flags & HAMMER2_IOCB_ZERO)
-                       bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize);
-               atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
-       }
-       hammer2_io_complete(iocb);
-}
-
-static
-int
-_hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
-               hammer2_io_t **diop, int flags)
-{
-       hammer2_iocb_t iocb;
-
-       iocb.callback = hammer2_iocb_new_callback;
-       iocb.chain = NULL;
-       iocb.ptr = NULL;
-       iocb.lbase = lbase;
-       iocb.lsize = lsize;
-       iocb.flags = flags;
-       iocb.btype = btype;
-       iocb.error = 0;
-       hammer2_io_getblk(hmp, lbase, lsize, &iocb);
-       if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
-               hammer2_iocb_wait(&iocb);
-       *diop = iocb.dio;
-
-       return (iocb.error);
-}
-
 int
 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
               hammer2_io_t **diop)
 {
-       return(_hammer2_io_new(hmp, btype, lbase, lsize,
-                              diop, HAMMER2_IOCB_ZERO));
+       *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
+       return ((*diop)->error);
 }
 
 int
 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
                 hammer2_io_t **diop)
 {
-       return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0));
-}
-
-/*
- * This is called from the freemap to pre-validate a full-sized buffer
- * whos contents we don't care about, in order to prevent an unnecessary
- * read-before-write.
- */
-void
-hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize)
-{
-       hammer2_io_t *dio = NULL;
-
-       _hammer2_io_new(hmp, btype, lbase, lsize, &dio, HAMMER2_IOCB_QUICK);
-       hammer2_io_bqrelse(&dio);
-}
-
-static
-void
-hammer2_iocb_bread_callback(hammer2_iocb_t *iocb)
-{
-       hammer2_io_t *dio = iocb->dio;
-       off_t peof;
-       int error;
-
-       /*
-        * If IOCB_INPROG is not set the dio already has a good buffer and we
-        * can't mess with it other than zero the requested range.
-        *
-        * If IOCB_INPROG is set we also own DIO_INPROG at this time and can
-        * do what needs to be done with dio->bp.
-        */
-       if (iocb->flags & HAMMER2_IOCB_INPROG) {
-               int hce;
-
-               if (dio->bp && (dio->bp->b_flags & B_CACHE)) {
-                       /*
-                        * Already good, likely due to being chained from
-                        * another iocb.
-                        */
-                       error = 0;
-               } else if ((hce = hammer2_cluster_read) > 0) {
-                       /*
-                        * Synchronous cluster I/O for now.
-                        */
-                       if (dio->bp) {
-                               bqrelse(dio->bp);
-                               dio->bp = NULL;
-                       }
-                       peof = (dio->pbase + HAMMER2_SEGMASK64) &
-                              ~HAMMER2_SEGMASK64;
-                       error = cluster_read(dio->hmp->devvp, peof, dio->pbase,
-                                            dio->psize,
-                                            dio->psize, HAMMER2_PBUFSIZE*hce,
-                                            &dio->bp);
-               } else {
-                       /*
-                        * Synchronous I/O for now.
-                        */
-                       if (dio->bp) {
-                               bqrelse(dio->bp);
-                               dio->bp = NULL;
-                       }
-                       error = bread(dio->hmp->devvp, dio->pbase,
-                                     dio->psize, &dio->bp);
-               }
-               if (error) {
-                       brelse(dio->bp);
-                       dio->bp = NULL;
-               }
-       }
-       hammer2_io_complete(iocb);
+       *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
+       return ((*diop)->error);
 }
 
 int
 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
                hammer2_io_t **diop)
 {
-       hammer2_iocb_t iocb;
-
-       iocb.callback = hammer2_iocb_bread_callback;
-       iocb.chain = NULL;
-       iocb.ptr = NULL;
-       iocb.lbase = lbase;
-       iocb.lsize = lsize;
-       iocb.btype = btype;
-       iocb.flags = 0;
-       iocb.error = 0;
-       hammer2_io_getblk(hmp, lbase, lsize, &iocb);
-       if ((iocb.flags & HAMMER2_IOCB_DONE) == 0)
-               hammer2_iocb_wait(&iocb);
-       *diop = iocb.dio;
-
-       return (iocb.error);
+       *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_READ);
+       return ((*diop)->error);
 }
 
-/*
- * System buf/bio async callback extracts the iocb and chains
- * to the iocb callback.
- */
-void
-hammer2_io_callback(struct bio *bio)
+hammer2_io_t *
+hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize)
 {
-       struct buf *dbp = bio->bio_buf;
-       hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr;
        hammer2_io_t *dio;
 
-       dio = iocb->dio;
-       if ((bio->bio_flags & BIO_DONE) == 0)
-               bpdone(dbp, 0);
-       bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
-       dio->bp = bio->bio_buf;
-       iocb->callback(iocb);
+       dio = hammer2_io_getblk(hmp, 0, lbase, lsize, HAMMER2_DOP_READQ);
+       return dio;
 }
 
 void
@@ -1118,12 +677,6 @@ hammer2_io_bqrelse(hammer2_io_t **diop)
        hammer2_io_putblk(diop);
 }
 
-int
-hammer2_io_isdirty(hammer2_io_t *dio)
-{
-       return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
-}
-
 /*
  * Set dedup validation bits in a DIO.  We do not need the buffer cache
  * buffer for this.  This must be done concurrent with setting bits in
@@ -1135,8 +688,9 @@ hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
        hammer2_io_t *dio;
        uint64_t mask;
        int lsize;
+       int isgood;
 
-       dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1);
+       dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
        lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
        mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
        atomic_clear_64(&dio->dedup_valid, mask);
@@ -1156,12 +710,13 @@ hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
 {
        hammer2_io_t *dio;
        uint64_t mask;
+       int isgood;
 
        if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
                return;
        if (btype != HAMMER2_BREF_TYPE_DATA)
                return;
-       dio = hammer2_io_alloc(hmp, data_off, btype, 0);
+       dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
        if (dio) {
                if (data_off < dio->pbase ||
                    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
@@ -1185,8 +740,10 @@ void
 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
 {
        hammer2_io_t *dio;
+       int isgood;
 
-       dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 0);
+       dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
+                              0, &isgood);
        if (dio) {
                KASSERT((dio->dedup_alloc &
                          hammer2_dedup_mask(dio, data_off, bytes)) == 0,
index 6903f58..c015297 100644 (file)
@@ -1554,7 +1554,7 @@ hammer2_dedup_lookup(hammer2_dev_t *hmp, char **datap, int pblksize)
                        continue;
                if ((1 << (int)(off & HAMMER2_OFF_MASK_RADIX)) != pblksize)
                        continue;
-               dio = hammer2_io_getquick(hmp, off, pblksize, 0);
+               dio = hammer2_io_getquick(hmp, off, pblksize);
                if (dio) {
                        dtmp = hammer2_io_data(dio, off),
                        mask = hammer2_dedup_mask(dio, off, pblksize);
index 76f69db..c89ac72 100644 (file)
@@ -1691,7 +1691,12 @@ again:
        if ((devvp = hmp->devvp) != NULL) {
                ronly = hmp->ronly;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
+               kprintf("hammer2_unmount(A): devvp %s rbdirty %p ronly=%d\n",
+                       hmp->devrepname, RB_ROOT(&devvp->v_rbdirty_tree),
+                       ronly);
                vinvalbuf(devvp, (ronly ? 0 : V_SAVE), 0, 0);
+               kprintf("hammer2_unmount(B): devvp %s rbdirty %p\n",
+                       hmp->devrepname, RB_ROOT(&devvp->v_rbdirty_tree));
                hmp->devvp = NULL;
                VOP_CLOSE(devvp, (ronly ? FREAD : FREAD|FWRITE), NULL);
                vn_unlock(devvp);